feat(nodejs): surface skip_auto_cleanup on add and merge insert

feat(nodejs): expose connectNamespace for namespace-backed connections (#3383 )
### Summary Adds a `connectNamespace(implName, properties, options?)` to the NodeJS SDK`. Closes #3380. ### Testing - pnpm test - Ran smoke test ``` import { connectNamespace } from "lancedb" import { tmpdir } from "os"; import { mkdtempSync } from "fs"; import { join } from "path"; const dir = mkdtempSync(join(tmpdir(), "lancedb-connect-namespace-smoke-")); console.log(`Using temp dir: ${dir}\n`); // 1. Happy path: connect via the "dir" namespace impl, create + list a table. console.log('Connecting via connectNamespace("dir", { root })...'); const db = await connectNamespace("dir", { root: dir }); console.log(" ✓ connected:", db.display()); console.log("Creating a table and listing it..."); await db.createTable("users", [ { id: 1, name: "alice" }, { id: 2, name: "bob" }, ]); console.log(" ✓ tableNames ->", await db.tableNames()); const table = await db.openTable("users"); console.log(" ✓ users.countRows ->", await table.countRows()); // 2. Storage options pass-through. console.log("\nReconnecting with storageOptions (plumbing check)..."); const dbWithOpts = await connectNamespace( "dir", { root: dir }, { storageOptions: { newTableDataStorageVersion: "stable" } }, ); console.log(" ✓ connected with storageOptions:", dbWithOpts.display()); await dbWithOpts.close(); // 3. Empty implName -> clear error. console.log("\nCalling connectNamespace('', {}) (expect error)..."); try { await connectNamespace("", {}); console.error(" UNEXPECTED: empty implName did not throw"); } catch (err) { console.log(` ✓ Got expected error: ${err.message.split("\n")[0]}`); } // 4. Unknown impl -> error. console.log("\nCalling connectNamespace('not-a-real-impl', {}) (expect error)..."); try { await connectNamespace("not-a-real-impl", {}); console.error(" UNEXPECTED: unknown impl did not throw"); } catch (err) { console.log(` ✓ Got expected error: ${err.message.split("\n")[0]}`); } // 5. Create a table inside a child namespace, then reconnect with a fresh // connectNamespace call and confirm the table is reachable via that // namespace path. (The dir+manifest impl keeps the namespace hierarchy in // a root manifest, so "scoping" happens via namespacePath args, not by // pointing root at a subdir.) console.log("\nCreating a table inside a child namespace..."); const dir2 = mkdtempSync(join(tmpdir(), "lancedb-connect-namespace-smoke-")); const writer = await connectNamespace("dir", { root: dir2, manifest_enabled: "true", }); await writer.createNamespace(["analytics"]); await writer.createTable( "orders", [ { id: 1, total: 10 }, { id: 2, total: 20 }, ], ["analytics"], ); console.log( " ✓ writer sees tables under [analytics] ->", await writer.tableNames(["analytics"]), ); await writer.close(); console.log("Reconnecting and reading the table via its namespace path..."); const reader = await connectNamespace("dir", { root: dir2, manifest_enabled: "true", }); console.log( " ✓ reader tableNames(['analytics']) ->", await reader.tableNames(["analytics"]), ); const orders = await reader.openTable("orders", ["analytics"]); console.log(" ✓ orders.countRows via reader ->", await orders.countRows()); await reader.close(); await db.close(); console.log("\nAll checks passed."); ``` ``` Using temp dir: /var/folders/bj/hn6jv9c50y301d1nx0y8xmn00000gn/T/lancedb-connect-namespace-smoke-WByF1P Connecting via connectNamespace("dir", { root })... ✓ connected: LanceNamespaceDatabase Creating a table and listing it... ✓ tableNames -> [ 'users' ] ✓ users.countRows -> 2 Reconnecting with storageOptions (plumbing check)... ✓ connected with storageOptions: LanceNamespaceDatabase Calling connectNamespace('', {}) (expect error)... ✓ Got expected error: implName must be a non-empty string Calling connectNamespace('not-a-real-impl', {}) (expect error)... ✓ Got expected error: Invalid input, Failed to connect to namespace: Namespace { source: Unsupported { message: "Implementation 'not-a-real-impl' is not available. Supported: dir, rest" }, location: Location { file: "/Users/brendan/.cargo/git/checkouts/lance-8ddea23c38163eda/f693245/rust/lance-namespace-impls/src/connect.rs", line: 216, column: 14 } } Creating a table inside a child namespace... ✓ writer sees tables under [analytics] -> [ 'orders' ] Reconnecting and reading the table via its namespace path... ✓ reader tableNames(['analytics']) -> [ 'orders' ] ✓ orders.countRows via reader -> 2 All checks passed. ``` ### Docs - regenerated docs
2026-06-02 11:50:41 +00:00 · 2026-05-14 12:59:21 -07:00 · 2026-05-13 16:16:56 -07:00 · 2026-05-13 11:49:27 -07:00 · 2026-05-13 11:27:38 -07:00 · 2026-05-12 15:29:48 -07:00
84 changed files with 15719 additions and 15851 deletions
--- a/.github/workflows/codex-fix-ci.yml
+++ b/.github/workflows/codex-fix-ci.yml
@@ -45,7 +45,9 @@ jobs:
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
-          node-version: 20
+          # pnpm 11 (used by the nodejs install step below) requires
+          # Node >= 22.13; use 24 since 22 hits EOL in October.
+          node-version: 24

      - name: Install Codex CLI
        run: npm install -g @openai/codex
@@ -79,10 +81,14 @@ jobs:
          java-version: '11'
          cache: maven

+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 11.1.1
      - name: Install Node.js dependencies for TypeScript bindings
        run: |
          cd nodejs
-          npm ci
+          pnpm install --frozen-lockfile

      - name: Configure git user
        run: |
@@ -137,7 +143,7 @@ jobs:
               - For Rust test failures: Run the specific test with "cargo test -p <crate> <test_name>"
               - For Python test failures: Build with "cd python && maturin develop" then run "pytest <specific_test_file>::<test_name>"
               - For Java test failures: Run "cd java && mvn test -Dtest=<TestClass>#<testMethod>"
-               - For TypeScript test failures: Run "cd nodejs && npm run build && npm test -- --testNamePattern='<test_name>'"
+               - For TypeScript test failures: Run "cd nodejs && pnpm build && pnpm test -- --testNamePattern='<test_name>'"
               - Do NOT run the full test suite - only run the tests that were failing

          7. If the additional guidelines are provided, follow them as well.
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,7 @@ jobs:
          server-username: SONATYPE_USER
          server-password: SONATYPE_TOKEN
          gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
-          gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
+          gpg-passphrase: MAVEN_GPG_PASSPHRASE
      - name: Set git config
        run: |
          git config --global user.email "dev+gha@lancedb.com"
@@ -58,10 +58,11 @@ jobs:
          echo "use-agent" >> ~/.gnupg/gpg.conf
          echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
          export GPG_TTY=$(tty)
-          ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
+          ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+          MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}

  report-failure:
    name: Report Workflow Failure
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -42,11 +42,17 @@ jobs:
      with:
        fetch-depth: 0
        lfs: true
+    - uses: pnpm/action-setup@v4
+      with:
+        version: 11.1.1
    - uses: actions/setup-node@v4
      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: nodejs/package-lock.json
+        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
+        # in October. The library itself still supports Node >= 18
+        # (see test matrix below).
+        node-version: 24
+        cache: 'pnpm'
+        cache-dependency-path: nodejs/pnpm-lock.yaml
    - uses: actions-rust-lang/setup-rust-toolchain@v1
      with:
        components: rustfmt, clippy
@@ -61,11 +67,13 @@ jobs:
      run: cargo clippy --profile ci --all --all-features -- -D warnings
    - name: Lint Typescript
      run: |
-        npm ci
-        npm run lint-ci
+        pnpm install --frozen-lockfile
+        pnpm lint-ci
    - name: Lint examples
      working-directory: nodejs/examples
-      run: npm ci && npm run lint-ci
+      # The `@lancedb/lancedb` dep points at file:../dist; pnpm errors if
+      # that dir is missing, so create an empty one for lint-only runs.
+      run: mkdir -p ../dist && pnpm install --frozen-lockfile && pnpm lint-ci
  linux:
    name: Linux (NodeJS ${{ matrix.node-version }})
    timeout-minutes: 30
@@ -82,14 +90,18 @@ jobs:
      with:
        fetch-depth: 0
        lfs: true
-    - uses: actions/setup-node@v4
-      name: Setup Node.js 20 for build
+    - uses: pnpm/action-setup@v4
      with:
-        # @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
-        # Build always on Node 20; tests run on the matrix version below.
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: nodejs/package-lock.json
+        version: 11.1.1
+    - uses: actions/setup-node@v4
+      name: Setup Node.js 24 for build
+      with:
+        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
+        # in October. Build/install runs on Node 24; tests run on the
+        # matrix version below using direct jest invocation.
+        node-version: 24
+        cache: 'pnpm'
+        cache-dependency-path: nodejs/pnpm-lock.yaml
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
@@ -97,45 +109,52 @@ jobs:
        sudo apt install -y protobuf-compiler libssl-dev
    - name: Build
      run: |
-        npm ci --include=optional
-        npm run build:debug -- --profile ci
+        pnpm install --frozen-lockfile
+        # No `--` separator: pnpm forwards it literally, which would
+        # make napi-rs treat `--profile ci` as a cargo passthrough arg.
+        pnpm build:debug --profile ci
+        pnpm tsc
+    - name: Setup examples
+      working-directory: nodejs/examples
+      run: pnpm install --frozen-lockfile
+    - name: Check docs
+      run: |
+        # We run this as part of the job because the binary needs to be built
+        # first to export the types of the native code.
+        set -e
+        # `pnpm docs` would invoke pnpm's built-in `docs` command, not
+        # the script — use `pnpm run docs`.
+        pnpm run docs
+        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
+          echo "Docs need to be updated"
+          echo "Run 'pnpm run docs', fix any warnings, and commit the changes."
+          exit 1
+        fi
    - uses: actions/setup-node@v4
      name: Setup Node.js ${{ matrix.node-version }} for test
      with:
        node-version: ${{ matrix.node-version }}
-    - name: Compile TypeScript
-      run: npm run tsc
    - name: Setup localstack
      working-directory: .
      run: docker compose up --detach --wait
    - name: Test
      env:
        S3_TEST: "1"
-      run: npm run test
-    - name: Setup examples
-      working-directory: nodejs/examples
-      run: npm ci
+        # Newer @smithy/core uses dynamic ESM imports.
+        NODE_OPTIONS: "--experimental-vm-modules"
+      # Invoke jest directly because pnpm 11 itself requires Node 22+
+      # while the matrix tests on older Node versions.
+      run: npx jest --verbose
    - name: Test examples
      working-directory: ./
      env:
        OPENAI_API_KEY: test
        OPENAI_BASE_URL: http://0.0.0.0:8000
+        NODE_OPTIONS: "--experimental-vm-modules"
      run: |
        python ci/mock_openai.py &
        cd nodejs/examples
-        npm test
-    - name: Check docs
-      run: |
-        # We run this as part of the job because the binary needs to be built
-        # first to export the types of the native code.
-        set -e
-        npm ci
-        npm run docs
-        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
-          echo "Docs need to be updated"
-          echo "Run 'npm run docs', fix any warnings, and commit the changes."
-          exit 1
-        fi
+        npx jest --testEnvironment jest-environment-node-single-context --verbose
  macos:
    timeout-minutes: 30
    runs-on: "macos-14"
@@ -148,20 +167,28 @@ jobs:
      with:
        fetch-depth: 0
        lfs: true
+    - uses: pnpm/action-setup@v4
+      with:
+        version: 11.1.1
    - uses: actions/setup-node@v4
      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: nodejs/package-lock.json
+        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
+        # in October.
+        node-version: 24
+        cache: 'pnpm'
+        cache-dependency-path: nodejs/pnpm-lock.yaml
+    - uses: dtolnay/rust-toolchain@stable
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
        brew install protobuf
    - name: Build
      run: |
-        npm ci --include=optional
-        npm run build:debug -- --profile ci
-        npm run tsc
+        pnpm install --frozen-lockfile
+        # No `--` separator: pnpm forwards it literally, which would
+        # make napi-rs treat `--profile ci` as a cargo passthrough arg.
+        pnpm build:debug --profile ci
+        pnpm tsc
    - name: Test
      run: |
-        npm run test
+        pnpm test
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -171,13 +171,18 @@ jobs:
        working-directory: nodejs
    steps:
      - uses: actions/checkout@v4
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 11.1.1
      - name: Setup node
        uses: actions/setup-node@v4
-        if: ${{ !matrix.settings.docker }}
        with:
-          node-version: 20
-          cache: npm
-          cache-dependency-path: nodejs/package-lock.json
+          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
+          # in October.
+          node-version: 24
+          cache: pnpm
+          cache-dependency-path: nodejs/pnpm-lock.yaml
      - name: Install
        uses: dtolnay/rust-toolchain@stable
        if: ${{ !matrix.settings.docker }}
@@ -195,7 +200,7 @@ jobs:
            target/
          key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
      - name: Install dependencies
-        run: npm ci
+        run: pnpm install --frozen-lockfile
      - name: Install Zig
        uses: mlugg/setup-zig@v2
        if: ${{ contains(matrix.settings.target, 'musl') }}
@@ -248,7 +253,7 @@ jobs:
      # one to do the upload.
      - name: Make generic artifacts
        if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
-        run: npm run tsc
+        run: pnpm tsc
      - name: Upload Generic Artifacts
        if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
        uses: actions/upload-artifact@v4
@@ -283,14 +288,24 @@ jobs:
        working-directory: nodejs
    steps:
      - uses: actions/checkout@v4
-      - name: Setup node
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 11.1.1
+      - name: Setup Node.js 24 for install
+        uses: actions/setup-node@v4
+        with:
+          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
+          # in October.
+          node-version: 24
+          cache: pnpm
+          cache-dependency-path: nodejs/pnpm-lock.yaml
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+      - name: Setup Node.js ${{ matrix.node }} for test
        uses: actions/setup-node@v4
        with:
          node-version: ${{ matrix.node }}
-          cache: npm
-          cache-dependency-path: nodejs/package-lock.json
-      - name: Install dependencies
-        run: npm ci
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
@@ -311,7 +326,9 @@ jobs:
      - name: Move built files
        run: cp dist/native.d.ts dist/native.js dist/*.node lancedb/
      - name: Test bindings
-        run: npm test
+        # Invoke jest directly because pnpm 11 itself requires Node 22+
+        # while the matrix tests on older Node versions.
+        run: npx jest --verbose
  publish:
    name: Publish
    runs-on: ubuntu-latest
@@ -323,15 +340,19 @@ jobs:
      - test-lancedb
    steps:
      - uses: actions/checkout@v4
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 11.1.1
      - name: Setup node
        uses: actions/setup-node@v4
        with:
          node-version: 24
-          cache: npm
-          cache-dependency-path: nodejs/package-lock.json
+          cache: pnpm
+          cache-dependency-path: nodejs/pnpm-lock.yaml
          registry-url: "https://registry.npmjs.org"
      - name: Install dependencies
-        run: npm ci
+        run: pnpm install --frozen-lockfile
      - uses: actions/download-artifact@v4
        with:
          name: nodejs-dist
@@ -351,7 +372,7 @@ jobs:
      - name: Display structure of downloaded files
        run: find dist && find nodejs-artifacts
      - name: Move artifacts
-        run: npx napi artifacts -d nodejs-artifacts
+        run: pnpm exec napi artifacts -d nodejs-artifacts
      - name: List packages
        run: find npm
      - name: Publish
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=7.0.0-beta.2", default-features = false, "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=7.0.0-beta.2", default-features = false, "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=7.0.0-beta.2", default-features = false, "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=7.0.0-beta.2", "tag" = "v7.0.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
@@ -54,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.13.2"
 pin-project = "1.0.7"
 rand = "0.9"
 snafu = "0.8"
--- a/deny.toml
+++ b/deny.toml
@@ -51,6 +51,18 @@ ignore = [
    # https://rustsec.org/advisories/RUSTSEC-2024-0436
    { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },

+    # encoding: unmaintained. Reached through lindera-dictionary, which is
+    # required by the native Lindera tokenizer path. Lindera has not migrated
+    # off this crate yet.
+    # https://rustsec.org/advisories/RUSTSEC-2021-0153
+    { id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
+
+    # fast-float: unsound and unmaintained. Reached only through polars-arrow
+    # from the optional Polars integration; replacement requires a Polars
+    # dependency upgrade.
+    # https://rustsec.org/advisories/RUSTSEC-2024-0379
+    { id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
+
    # tantivy: segfault on malformed input due to missing bounds check.
    # Pulled in via lance for full-text search. We only feed tantivy
    # documents we construct ourselves, not attacker-controlled bytes.
@@ -68,11 +80,17 @@ ignore = [
    # https://rustsec.org/advisories/RUSTSEC-2025-0119
    { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },

-    # rustls-pemfile: unmaintained. Reached from two separate chains:
-    # rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12.
-    # Both upstream dependencies need to move before we can drop it.
-    # https://rustsec.org/advisories/RUSTSEC-2025-0134
-    { id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" },
+    # bincode: unmaintained. Reached through lindera and lindera-dictionary,
+    # which are required by the native Lindera tokenizer path. Lindera has not
+    # migrated to another serialization format yet.
+    # https://rustsec.org/advisories/RUSTSEC-2025-0141
+    { id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
+
+    # lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
+    # LanceDB's dev-dependency graph; LanceDB does not use that iterator
+    # directly. Clearing this requires the AWS SDK chain to update lru.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0002
+    { id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },

    # rustls-webpki 0.101.7 (old major line): name-constraint checks for
    # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
@@ -89,6 +107,12 @@ ignore = [
    # we actively use is upgraded to 0.103.13 which contains the fix.
    # https://rustsec.org/advisories/RUSTSEC-2026-0104
    { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
+
+    # rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
+    # logger. Reached through several transitive chains. LanceDB does not use
+    # rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
+    # https://rustsec.org/advisories/RUSTSEC-2026-0097
+    { id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
 ]

 # ---------------------------------------------------------------------------
--- a/docs/src/js/_media/CONTRIBUTING.md
+++ b/docs/src/js/_media/CONTRIBUTING.md
@@ -12,20 +12,22 @@ Typescript.
 * `src/`: Rust bindings source code
 * `lancedb/`: Typescript package source code
 * `__test__/`: Unit tests
-* `examples/`: An npm package with the examples shown in the documentation
+* `examples/`: A pnpm package with the examples shown in the documentation

 ## Development environment

 To set up your development environment, you will need to install the following:

-1. Node.js 14 or later
-2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
-3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
+1. Node.js 22 or later (required by pnpm 11)
+2. [pnpm](https://pnpm.io/installation) 11 or later (or run via `corepack enable`,
+   which uses the `packageManager` field in `package.json`)
+3. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
+4. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

 Initial setup:

 ```shell
-npm install
+pnpm install
 ```

 ### Commit Hooks
@@ -39,38 +41,38 @@ pre-commit install

 ## Development

-Most common development commands can be run using the npm scripts.
+Most common development commands can be run using the pnpm scripts.

 Build the package

 ```shell
-npm install
-npm run build
+pnpm install
+pnpm build
 ```

 Lint:

 ```shell
-npm run lint
+pnpm lint
 ```

 Format and fix lints:

 ```shell
-npm run lint-fix
+pnpm lint-fix
 ```

 Run tests:

 ```shell
-npm test
+pnpm test
 ```

 To run a single test:

 ```shell
 # Single file: table.test.ts
-npm test -- table.test.ts
+pnpm test -- table.test.ts
 # Single test: 'merge insert' in table.test.ts
-npm test -- table.test.ts --testNamePattern=merge\ insert
+pnpm test -- table.test.ts --testNamePattern=merge\ insert
 ```
--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -148,6 +148,33 @@ Creates a new empty Table

 ***

+### createNamespace()
+
+```ts
+abstract createNamespace(namespacePath, options?): Promise<CreateNamespaceResponse>
+```
+
+Create a new namespace at the given path.
+
+#### Parameters
+
+* **namespacePath**: `string`[]
+    The namespace path to create.
+
+* **options?**: `Partial`&lt;[`CreateNamespaceOptions`](../interfaces/CreateNamespaceOptions.md)&gt;
+    Creation `mode`
+    ("create" | "exist_ok" | "overwrite") and optional `properties`
+    to attach to the namespace.
+
+#### Returns
+
+`Promise`&lt;[`CreateNamespaceResponse`](../interfaces/CreateNamespaceResponse.md)&gt;
+
+The properties of the
+  created namespace and an optional transaction id.
+
+***
+
 ### createTable()

 #### createTable(options, namespacePath)
@@ -230,6 +257,29 @@ Creates a new Table and initialize it with new data.

 ***

+### describeNamespace()
+
+```ts
+abstract describeNamespace(namespacePath): Promise<DescribeNamespaceResponse>
+```
+
+Describe a namespace, returning its properties.
+
+#### Parameters
+
+* **namespacePath**: `string`[]
+    The namespace path to describe, in
+    parent → child order, e.g. `["analytics", "sales"]`.
+
+#### Returns
+
+`Promise`&lt;[`DescribeNamespaceResponse`](../interfaces/DescribeNamespaceResponse.md)&gt;
+
+The namespace's properties
+  (may be undefined if the namespace has none).
+
+***
+
 ### display()

 ```ts
@@ -263,6 +313,36 @@ Drop all tables in the database.

 ***

+### dropNamespace()
+
+```ts
+abstract dropNamespace(namespacePath, options?): Promise<DropNamespaceResponse>
+```
+
+Drop a namespace.
+
+Use `behavior: "cascade"` to also drop everything contained in the
+namespace (sub-namespaces and tables). The default `"restrict"`
+behavior refuses to drop a non-empty namespace.
+
+#### Parameters
+
+* **namespacePath**: `string`[]
+    The namespace path to drop.
+
+* **options?**: `Partial`&lt;[`DropNamespaceOptions`](../interfaces/DropNamespaceOptions.md)&gt;
+    `mode` ("skip" | "fail"
+    for missing-namespace handling) and `behavior` ("restrict" | "cascade").
+
+#### Returns
+
+`Promise`&lt;[`DropNamespaceResponse`](../interfaces/DropNamespaceResponse.md)&gt;
+
+Any properties returned by
+  the server and an optional transaction id.
+
+***
+
 ### dropTable()

 ```ts
@@ -299,6 +379,36 @@ Return true if the connection has not been closed

 ***

+### listNamespaces()
+
+```ts
+abstract listNamespaces(namespacePath?, options?): Promise<ListNamespacesResponse>
+```
+
+List the immediate child namespaces under the given parent.
+
+Results may be paginated. To retrieve subsequent pages, pass the
+`pageToken` returned by a previous call.
+
+#### Parameters
+
+* **namespacePath?**: `string`[]
+    The parent namespace path. Defaults
+    to the root namespace if omitted.
+
+* **options?**: `Partial`&lt;[`ListNamespacesOptions`](../interfaces/ListNamespacesOptions.md)&gt;
+    Pagination options
+    (`pageToken`, `limit`).
+
+#### Returns
+
+`Promise`&lt;[`ListNamespacesResponse`](../interfaces/ListNamespacesResponse.md)&gt;
+
+Child namespace names and
+  an optional token for fetching the next page.
+
+***
+
 ### openTable()

 ```ts
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -501,6 +501,34 @@ Modeled after ``VACUUM`` in PostgreSQL.

 ***

+### prewarmData()
+
+```ts
+abstract prewarmData(columns?): Promise<void>
+```
+
+Prewarm one or more columns of data in the table.
+
+#### Parameters
+
+* **columns?**: `string`[]
+    The columns to prewarm. If undefined, all columns are prewarmed.
+    This will load the column data into the page cache so that future queries that
+    read those columns avoid the initial cold-start latency.  This call initiates
+    prewarming and returns once the request is accepted; the warming itself may
+    continue in the background.  Calling it on already-prewarmed columns is a
+    no-op on the server.
+    Prewarming is generally useful for columns used in filters or projections.
+    Large columns (e.g. high-dimensional vectors or binary data) may not be
+    practical to prewarm.
+    This feature is currently only supported on remote tables.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
 ### prewarmIndex()

 ```ts
--- a/docs/src/js/functions/connectNamespace.md
+++ b/docs/src/js/functions/connectNamespace.md
@@ -0,0 +1,131 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / connectNamespace
+
+# Function: connectNamespace()
+
+## connectNamespace(implName, config, options)
+
+```ts
+function connectNamespace(
+   implName,
+   config,
+   options?): Promise<Connection>
+```
+
+Connect to a LanceDB database through a namespace.
+
+Unlike [connect](connect.md), which routes by URI scheme (local path vs.
+`db://` cloud), `connectNamespace` always returns a namespace-backed
+connection. The `implName` selects the namespace implementation:
+
+- `"dir"` — directory namespace, configured with [DirNamespaceConfig](../interfaces/DirNamespaceConfig.md).
+- `"rest"` — remote REST catalog, configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md).
+- Any other string — full module path for a custom implementation,
+  configured with a free-form string-keyed `properties` map.
+
+### Parameters
+
+* **implName**: `"dir"`
+
+* **config**: [`DirNamespaceConfig`](../interfaces/DirNamespaceConfig.md)
+
+* **options?**: `Partial`&lt;[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)&gt;
+
+### Returns
+
+`Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
+
+### Examples
+
+```ts
+const db = await connectNamespace("dir", { root: "/path/to/db" });
+await db.createTable("users", [{ id: 1 }]);
+```
+
+```ts
+const db = await connectNamespace("rest", {
+  uri: "https://catalog.example.com",
+  headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
+});
+```
+
+```ts
+const db = await connectNamespace("my.custom.Namespace", {
+  endpoint: "...",
+});
+```
+
+## connectNamespace(implName, config, options)
+
+```ts
+function connectNamespace(
+   implName,
+   config,
+   options?): Promise<Connection>
+```
+
+Connect through the built-in REST namespace.
+
+Configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md). See the function-level
+documentation above for the full surface, examples, and how this
+relates to [connect](connect.md).
+
+### Parameters
+
+* **implName**: `"rest"`
+
+* **config**: [`RestNamespaceConfig`](../interfaces/RestNamespaceConfig.md)
+
+* **options?**: `Partial`&lt;[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)&gt;
+
+### Returns
+
+`Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
+
+### Example
+
+```ts
+const db = await connectNamespace("rest", {
+  uri: "https://catalog.example.com",
+  headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
+});
+```
+
+## connectNamespace(implName, properties, options)
+
+```ts
+function connectNamespace(
+   implName,
+   properties,
+   options?): Promise<Connection>
+```
+
+Connect through a custom namespace implementation by full module path,
+configured with a free-form string-keyed `properties` map. Use the
+typed overloads above for the built-in `"dir"` and `"rest"` impls.
+
+See the function-level documentation above for examples and how this
+relates to [connect](connect.md).
+
+### Parameters
+
+* **implName**: `string`
+
+* **properties**: `Record`&lt;`string`, `string`&gt;
+
+* **options?**: `Partial`&lt;[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)&gt;
+
+### Returns
+
+`Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
+
+### Example
+
+```ts
+const db = await connectNamespace("my.custom.Namespace", {
+  endpoint: "...",
+});
+```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -51,10 +51,17 @@
 - [ClientConfig](interfaces/ClientConfig.md)
 - [ColumnAlteration](interfaces/ColumnAlteration.md)
 - [CompactionStats](interfaces/CompactionStats.md)
+- [ConnectNamespaceOptions](interfaces/ConnectNamespaceOptions.md)
 - [ConnectionOptions](interfaces/ConnectionOptions.md)
+- [CreateNamespaceOptions](interfaces/CreateNamespaceOptions.md)
+- [CreateNamespaceResponse](interfaces/CreateNamespaceResponse.md)
 - [CreateTableOptions](interfaces/CreateTableOptions.md)
 - [DeleteResult](interfaces/DeleteResult.md)
+- [DescribeNamespaceResponse](interfaces/DescribeNamespaceResponse.md)
+- [DirNamespaceConfig](interfaces/DirNamespaceConfig.md)
 - [DropColumnsResult](interfaces/DropColumnsResult.md)
+- [DropNamespaceOptions](interfaces/DropNamespaceOptions.md)
+- [DropNamespaceResponse](interfaces/DropNamespaceResponse.md)
 - [ExecutableQuery](interfaces/ExecutableQuery.md)
 - [FragmentStatistics](interfaces/FragmentStatistics.md)
 - [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
@@ -69,12 +76,15 @@
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
 - [IvfRqOptions](interfaces/IvfRqOptions.md)
+- [ListNamespacesOptions](interfaces/ListNamespacesOptions.md)
+- [ListNamespacesResponse](interfaces/ListNamespacesResponse.md)
 - [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
 - [OptimizeStats](interfaces/OptimizeStats.md)
 - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
 - [RemovalStats](interfaces/RemovalStats.md)
+- [RestNamespaceConfig](interfaces/RestNamespaceConfig.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [ShuffleOptions](interfaces/ShuffleOptions.md)
 - [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
@@ -107,6 +117,7 @@

 - [RecordBatchIterator](functions/RecordBatchIterator.md)
 - [connect](functions/connect.md)
+- [connectNamespace](functions/connectNamespace.md)
 - [makeArrowTable](functions/makeArrowTable.md)
 - [packBits](functions/packBits.md)
 - [permutationBuilder](functions/permutationBuilder.md)
--- a/docs/src/js/interfaces/ConnectNamespaceOptions.md
+++ b/docs/src/js/interfaces/ConnectNamespaceOptions.md
@@ -0,0 +1,54 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / ConnectNamespaceOptions
+
+# Interface: ConnectNamespaceOptions
+
+## Properties
+
+### namespaceClientProperties?
+
+```ts
+optional namespaceClientProperties: Record<string, string>;
+```
+
+Extra properties for the backing namespace client.
+
+***
+
+### readConsistencyInterval?
+
+```ts
+optional readConsistencyInterval: number;
+```
+
+The interval, in seconds, at which to check for updates to the table
+from other processes. If None, then consistency is not checked. For
+performance reasons, this is the default. For strong consistency, set
+this to zero seconds. Then every read will check for updates from other
+processes. As a compromise, you can set this to a non-zero value for
+eventual consistency.
+
+***
+
+### session?
+
+```ts
+optional session: Session;
+```
+
+The session to use for this connection. Holds shared caches and other
+session-specific state.
+
+***
+
+### storageOptions?
+
+```ts
+optional storageOptions: Record<string, string>;
+```
+
+Configuration for object storage. The available options are described
+at https://docs.lancedb.com/storage/
--- a/docs/src/js/interfaces/CreateNamespaceOptions.md
+++ b/docs/src/js/interfaces/CreateNamespaceOptions.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / CreateNamespaceOptions
+
+# Interface: CreateNamespaceOptions
+
+## Properties
+
+### mode?
+
+```ts
+optional mode: "overwrite" | "create" | "exist_ok";
+```
+
+Creation mode.
+
+***
+
+### properties?
+
+```ts
+optional properties: Record<string, string>;
+```
+
+Properties to set on the new namespace.
--- a/docs/src/js/interfaces/CreateNamespaceResponse.md
+++ b/docs/src/js/interfaces/CreateNamespaceResponse.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / CreateNamespaceResponse
+
+# Interface: CreateNamespaceResponse
+
+## Properties
+
+### properties?
+
+```ts
+optional properties: Record<string, string>;
+```
+
+***
+
+### transactionId?
+
+```ts
+optional transactionId: string;
+```
--- a/docs/src/js/interfaces/DescribeNamespaceResponse.md
+++ b/docs/src/js/interfaces/DescribeNamespaceResponse.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DescribeNamespaceResponse
+
+# Interface: DescribeNamespaceResponse
+
+## Properties
+
+### properties?
+
+```ts
+optional properties: Record<string, string>;
+```
--- a/docs/src/js/interfaces/DirNamespaceConfig.md
+++ b/docs/src/js/interfaces/DirNamespaceConfig.md
@@ -0,0 +1,47 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DirNamespaceConfig
+
+# Interface: DirNamespaceConfig
+
+Configuration for the built-in directory namespace (`"dir"`).
+
+The directory namespace stores tables under a single root path (local
+filesystem or object storage URI). See
+[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
+less-common knobs live under [DirNamespaceConfig.extraProperties](DirNamespaceConfig.md#extraproperties).
+
+## Properties
+
+### extraProperties?
+
+```ts
+optional extraProperties: Record<string, string>;
+```
+
+Additional raw properties passed verbatim to the namespace
+implementation (e.g. `storage.*`, `credential_vendor.*`). Typed
+fields above take precedence on key collision.
+
+***
+
+### manifestEnabled?
+
+```ts
+optional manifestEnabled: boolean;
+```
+
+Whether to maintain a namespace manifest at the root. Required for
+child namespaces. Defaults to true on the impl side.
+
+***
+
+### root
+
+```ts
+root: string;
+```
+
+Root path or URI containing the LanceDB tables.
--- a/docs/src/js/interfaces/DropNamespaceOptions.md
+++ b/docs/src/js/interfaces/DropNamespaceOptions.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DropNamespaceOptions
+
+# Interface: DropNamespaceOptions
+
+## Properties
+
+### behavior?
+
+```ts
+optional behavior: "restrict" | "cascade";
+```
+
+Refuse to drop if non-empty (restrict) or drop recursively (cascade).
+
+***
+
+### mode?
+
+```ts
+optional mode: "fail" | "skip";
+```
+
+Whether to skip if the namespace doesn't exist, or fail.
--- a/docs/src/js/interfaces/DropNamespaceResponse.md
+++ b/docs/src/js/interfaces/DropNamespaceResponse.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DropNamespaceResponse
+
+# Interface: DropNamespaceResponse
+
+## Properties
+
+### properties?
+
+```ts
+optional properties: Record<string, string>;
+```
+
+***
+
+### transactionId?
+
+```ts
+optional transactionId: string[];
+```
--- a/docs/src/js/interfaces/ListNamespacesOptions.md
+++ b/docs/src/js/interfaces/ListNamespacesOptions.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / ListNamespacesOptions
+
+# Interface: ListNamespacesOptions
+
+## Properties
+
+### limit?
+
+```ts
+optional limit: number;
+```
+
+An optional limit to the number of results to return.
+
+***
+
+### pageToken?
+
+```ts
+optional pageToken: string;
+```
+
+Token from a previous response for pagination.
--- a/docs/src/js/interfaces/ListNamespacesResponse.md
+++ b/docs/src/js/interfaces/ListNamespacesResponse.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / ListNamespacesResponse
+
+# Interface: ListNamespacesResponse
+
+## Properties
+
+### namespaces
+
+```ts
+namespaces: string[];
+```
+
+***
+
+### pageToken?
+
+```ts
+optional pageToken: string;
+```
--- a/docs/src/js/interfaces/RestNamespaceConfig.md
+++ b/docs/src/js/interfaces/RestNamespaceConfig.md
@@ -0,0 +1,47 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RestNamespaceConfig
+
+# Interface: RestNamespaceConfig
+
+Configuration for the built-in REST namespace (`"rest"`).
+
+The REST namespace talks to a remote catalog server over HTTP. See
+[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
+less-common knobs (TLS, metrics) live under
+[RestNamespaceConfig.extraProperties](RestNamespaceConfig.md#extraproperties).
+
+## Properties
+
+### extraProperties?
+
+```ts
+optional extraProperties: Record<string, string>;
+```
+
+Additional raw properties passed verbatim to the namespace
+implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`).
+Typed fields above take precedence on key collision.
+
+***
+
+### headers?
+
+```ts
+optional headers: Record<string, string>;
+```
+
+HTTP headers forwarded with each request. Keys are passed through
+as-is (e.g. `"x-api-key"`, `"Authorization"`).
+
+***
+
+### uri
+
+```ts
+uri: string;
+```
+
+Catalog endpoint URL.
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -94,11 +94,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and

 ## Full text search

-::: lancedb.fts.create_index
+Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
+[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
+asynchronous API.

-::: lancedb.fts.populate_index
-
-::: lancedb.fts.search_index
+::: lancedb.index.FTS

 ## Utilities

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>7.0.0-beta.2</lance-core.version>
+        <lance-core.version>7.0.0-beta.7</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/AGENTS.md
+++ b/nodejs/AGENTS.md
@@ -3,11 +3,11 @@ The core Rust library is in the `../rust/lancedb` directory, the rust binding
 code is in the `src/` directory and the typescript bindings are in
 the `lancedb/` directory.

-Whenever you change the Rust code, you will need to recompile: `npm run build`.
+Whenever you change the Rust code, you will need to recompile: `pnpm build`.

 Common commands:
-* Build: `npm run build`
-* Lint: `npm run lint`
-* Fix lints: `npm run lint-fix`
-* Test: `npm test`
-* Run single test file: `npm test __test__/arrow.test.ts`
+* Build: `pnpm build`
+* Lint: `pnpm lint`
+* Fix lints: `pnpm lint-fix`
+* Test: `pnpm test`
+* Run single test file: `pnpm test __test__/arrow.test.ts`
--- a/nodejs/CONTRIBUTING.md
+++ b/nodejs/CONTRIBUTING.md
@@ -12,20 +12,22 @@ Typescript.
 * `src/`: Rust bindings source code
 * `lancedb/`: Typescript package source code
 * `__test__/`: Unit tests
-* `examples/`: An npm package with the examples shown in the documentation
+* `examples/`: A pnpm package with the examples shown in the documentation

 ## Development environment

 To set up your development environment, you will need to install the following:

-1. Node.js 14 or later
-2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
-3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
+1. Node.js 22 or later (required by pnpm 11)
+2. [pnpm](https://pnpm.io/installation) 11 or later (or run via `corepack enable`,
+   which uses the `packageManager` field in `package.json`)
+3. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
+4. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

 Initial setup:

 ```shell
-npm install
+pnpm install
 ```

 ### Commit Hooks
@@ -39,38 +41,38 @@ pre-commit install

 ## Development

-Most common development commands can be run using the npm scripts.
+Most common development commands can be run using the pnpm scripts.

 Build the package

 ```shell
-npm install
-npm run build
+pnpm install
+pnpm build
 ```

 Lint:

 ```shell
-npm run lint
+pnpm lint
 ```

 Format and fix lints:

 ```shell
-npm run lint-fix
+pnpm lint-fix
 ```

 Run tests:

 ```shell
-npm test
+pnpm test
 ```

 To run a single test:

 ```shell
 # Single file: table.test.ts
-npm test -- table.test.ts
+pnpm test -- table.test.ts
 # Single test: 'merge insert' in table.test.ts
-npm test -- table.test.ts --testNamePattern=merge\ insert
+pnpm test -- table.test.ts --testNamePattern=merge\ insert
 ```
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -22,6 +22,7 @@ arrow-schema.workspace = true
 env_logger.workspace = true
 futures.workspace = true
 lancedb = { path = "../rust/lancedb", default-features = false }
+lance-namespace.workspace = true
 napi = { version = "3.8.3", default-features = false, features = [
    "napi9",
    "async"
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -4,7 +4,7 @@
 import { readdirSync } from "fs";
 import { Field, Float64, Schema } from "apache-arrow";
 import * as tmp from "tmp";
-import { Connection, Table, connect } from "../lancedb";
+import { Connection, Table, connect, connectNamespace } from "../lancedb";
 import { LocalTable } from "../lancedb/table";

 describe("when connecting", () => {
@@ -306,3 +306,186 @@ describe("clone table functionality", () => {
    ).rejects.toThrow("Deep clone is not yet implemented");
  });
 });
+
+describe("namespaces", () => {
+  let tmpDir: tmp.DirResult;
+  let db: Connection;
+
+  beforeEach(async () => {
+    tmpDir = tmp.dirSync({ unsafeCleanup: true });
+    // The local DirectoryNamespace backend only supports child namespaces
+    // when manifest mode is enabled (see lance-namespace-impls/src/dir.rs).
+    db = await connect(tmpDir.name, {
+      // biome-ignore lint/style/useNamingConvention: opaque backend property key, must match Rust
+      namespaceClientProperties: { manifest_enabled: "true" },
+    });
+  });
+  afterEach(() => tmpDir.removeCallback());
+
+  it("should create and describe a namespace", async () => {
+    await db.createNamespace(["myns"]);
+    const desc = await db.describeNamespace(["myns"]);
+    expect(desc).toBeDefined();
+  });
+
+  it("should list namespaces created at the root", async () => {
+    await db.createNamespace(["alpha"]);
+    await db.createNamespace(["beta"]);
+    const list = await db.listNamespaces();
+    expect(list.namespaces).toEqual(expect.arrayContaining(["alpha", "beta"]));
+  });
+
+  it("should list child namespaces under a parent", async () => {
+    await db.createNamespace(["parent"]);
+    await db.createNamespace(["parent", "child"]);
+    const list = await db.listNamespaces(["parent"]);
+    expect(list.namespaces).toContain("child");
+  });
+
+  it("should drop a namespace", async () => {
+    await db.createNamespace(["ephemeral"]);
+    await db.dropNamespace(["ephemeral"]);
+    const list = await db.listNamespaces();
+    expect(list.namespaces).not.toContain("ephemeral");
+  });
+
+  it("should raise an error on any namespace op after close", async () => {
+    await db.close();
+    await expect(db.describeNamespace(["foo"])).rejects.toThrow(
+      "Connection is closed",
+    );
+    await expect(db.listNamespaces()).rejects.toThrow("Connection is closed");
+    await expect(db.createNamespace(["foo"])).rejects.toThrow(
+      "Connection is closed",
+    );
+    await expect(db.dropNamespace(["foo"])).rejects.toThrow(
+      "Connection is closed",
+    );
+  });
+
+  it("should raise an understandable error when describing a non-existent namespace", async () => {
+    await expect(db.describeNamespace(["does-not-exist"])).rejects.toThrow(
+      /not found/i,
+    );
+  });
+
+  it("should raise an error when creating a namespace that already exists", async () => {
+    await db.createNamespace(["dup"]);
+    await expect(db.createNamespace(["dup"])).rejects.toThrow();
+  });
+
+  it("should reject an unrecognized createNamespace mode with a clear error", async () => {
+    await expect(
+      // biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
+      db.createNamespace(["x"], { mode: "frobnicate" as any }),
+    ).rejects.toThrow(/Invalid mode 'frobnicate'/);
+  });
+
+  it("should reject an unrecognized dropNamespace mode with a clear error", async () => {
+    await db.createNamespace(["x"]);
+    await expect(
+      // biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
+      db.dropNamespace(["x"], { mode: "frobnicate" as any }),
+    ).rejects.toThrow(/Invalid mode 'frobnicate'/);
+  });
+
+  it("should reject an unrecognized dropNamespace behavior with a clear error", async () => {
+    await db.createNamespace(["x"]);
+    await expect(
+      // biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
+      db.dropNamespace(["x"], { behavior: "frobnicate" as any }),
+    ).rejects.toThrow(/Invalid behavior 'frobnicate'/);
+  });
+});
+
+describe("connectNamespace", () => {
+  let tmpDir: tmp.DirResult;
+  beforeEach(() => {
+    tmpDir = tmp.dirSync({ unsafeCleanup: true });
+  });
+  afterEach(() => tmpDir.removeCallback());
+
+  it("connects via the dir implementation and supports table ops", async () => {
+    const db = await connectNamespace("dir", { root: tmpDir.name });
+    await db.createTable("users", [{ id: 1 }, { id: 2 }]);
+    await expect(db.tableNames()).resolves.toContain("users");
+  });
+
+  it("throws a clear error when implName is empty", async () => {
+    await expect(connectNamespace("", {})).rejects.toThrow(
+      "implName must be a non-empty string",
+    );
+  });
+
+  it("throws when the namespace implementation is unknown", async () => {
+    await expect(connectNamespace("not-a-real-impl", {})).rejects.toThrow();
+  });
+
+  it("passes storage options through to the namespace", async () => {
+    const db = await connectNamespace(
+      "dir",
+      { root: tmpDir.name },
+      { storageOptions: { newTableDataStorageVersion: "stable" } },
+    );
+    await db.createTable("plumbing", [{ id: 1 }]);
+    await expect(db.tableNames()).resolves.toContain("plumbing");
+  });
+
+  it("supports child namespaces when manifestEnabled is true on the dir config", async () => {
+    const writer = await connectNamespace("dir", {
+      root: tmpDir.name,
+      manifestEnabled: true,
+    });
+    await writer.createNamespace(["analytics"]);
+    await writer.createTable("orders", [{ id: 1 }, { id: 2 }], ["analytics"]);
+    await writer.close();
+
+    const reader = await connectNamespace("dir", {
+      root: tmpDir.name,
+      manifestEnabled: true,
+    });
+    await expect(reader.tableNames(["analytics"])).resolves.toContain("orders");
+    const orders = await reader.openTable("orders", ["analytics"]);
+    await expect(orders.countRows()).resolves.toBe(2);
+  });
+
+  it("merges extraProperties into the dir config and is overridden by typed fields", async () => {
+    // Two observable assertions:
+    // - Typed `root` overrides extraProperties.root: createTable would fail
+    //   under the bogus path if the override didn't happen.
+    // - extraProperties.manifest_enabled="false" is honored end-to-end. Child
+    //   namespaces require manifest mode (default true), so explicitly
+    //   disabling it via extraProperties must make createNamespace reject. If
+    //   extraProperties pass-through were silently broken, the default would
+    //   let createNamespace succeed.
+    const db = await connectNamespace("dir", {
+      root: tmpDir.name,
+      extraProperties: {
+        root: "/should/be/overridden",
+        // biome-ignore lint/style/useNamingConvention: backend property key
+        manifest_enabled: "false",
+      },
+    });
+    await db.createTable("base", [{ id: 1 }]);
+    await expect(db.tableNames()).resolves.toContain("base");
+    await expect(db.createNamespace(["analytics"])).rejects.toThrow();
+  });
+
+  it("flows unknown top-level keys through when implName is dynamic (no silent drop)", async () => {
+    // Routes via the third overload because `impl` is `string`, not the
+    // literal `"dir"`. The dispatcher still notices the runtime value is
+    // "dir", but unknown keys like `manifest_enabled` must not be silently
+    // dropped during the conversion.
+    //
+    // Asserting a *negative* outcome (manifest disabled -> createNamespace
+    // rejects) is required for observability, since the backend default for
+    // `manifest_enabled` is true.
+    const impl: string = "dir";
+    const db = await connectNamespace(impl, {
+      root: tmpDir.name,
+      // biome-ignore lint/style/useNamingConvention: backend property key
+      manifest_enabled: "false",
+    });
+    await expect(db.createNamespace(["mixed"])).rejects.toThrow();
+  });
+});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -115,6 +115,12 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await expect(table.countRows()).resolves.toBe(1);
    });

+    it("should accept skipAutoCleanup on add()", async () => {
+      await table.add([{ id: 1 }], { skipAutoCleanup: true });
+      await table.add([{ id: 2 }], { skipAutoCleanup: true });
+      await expect(table.countRows()).resolves.toBe(2);
+    });
+
    it("should let me close the table", async () => {
      expect(table.isOpen()).toBe(true);
      table.close();
@@ -1870,6 +1876,25 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(results.length).toBe(3);
    });

+    test("prewarmData errors on local tables", async () => {
+      const db = await connect(tmpDir.name);
+      const data = [
+        { text: "alpha", vector: [0.1, 0.2, 0.3] },
+        { text: "beta", vector: [0.4, 0.5, 0.6] },
+      ];
+      const table = await db.createTable("prewarm_data_test", data);
+
+      // prewarmData is only supported on remote tables. We verify the call
+      // is wired through napi and surfaces the expected error for both
+      // arg shapes (undefined and string[]).
+      await expect(table.prewarmData()).rejects.toThrow(
+        "prewarm_data is currently only supported on remote tables",
+      );
+      await expect(table.prewarmData(["text"])).rejects.toThrow(
+        "prewarm_data is currently only supported on remote tables",
+      );
+    });
+
    test("full text index on list", async () => {
      const db = await connect(tmpDir.name);
      const data = [
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
--- a/nodejs/examples/package.json
+++ b/nodejs/examples/package.json
@@ -11,16 +11,17 @@
    "test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
    "lint": "biome check *.ts && biome format *.ts",
    "lint-ci": "biome ci .",
-    "lint-fix": "biome check --write *.ts && npm run format",
+    "lint-fix": "biome check --write *.ts && pnpm format",
    "format": "biome format --write *.ts"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
+  "packageManager": "pnpm@11.1.1",
  "dependencies": {
-    "@huggingface/transformers": "^3.0.2",
+    "@huggingface/transformers": "3.0.2",
    "@lancedb/lancedb": "file:../dist",
-    "openai": "^4.29.2",
-    "sharp": "^0.33.5"
+    "openai": "4.29.2",
+    "sharp": "0.33.5"
  },
  "devDependencies": {
    "@biomejs/biome": "^1.7.3",
--- a/nodejs/examples/pnpm-lock.yaml
+++ b/nodejs/examples/pnpm-lock.yaml
--- a/nodejs/examples/pnpm-workspace.yaml
+++ b/nodejs/examples/pnpm-workspace.yaml
@@ -0,0 +1,13 @@
+# Block resolution of versions less than 24h old (Shai-Hulud window).
+# This is the pnpm 11 default but pinned here so it's visible to
+# reviewers and survives a future pnpm major flipping the default.
+minimumReleaseAge: 1440
+
+# Fail install if a transitive dep tries to run an unapproved script.
+strictDepBuilds: true
+
+allowBuilds:
+  '@biomejs/biome': true
+  onnxruntime-node: true
+  protobufjs: true
+  sharp: true
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -16,6 +16,18 @@ import {
 } from "./arrow";
 import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
 import { Connection as LanceDbConnection } from "./native";
+import type {
+  CreateNamespaceResponse,
+  DescribeNamespaceResponse,
+  DropNamespaceResponse,
+  ListNamespacesResponse,
+} from "./native";
+export type {
+  CreateNamespaceResponse,
+  DescribeNamespaceResponse,
+  DropNamespaceResponse,
+  ListNamespacesResponse,
+};
 import { sanitizeTable } from "./sanitize";
 import { LocalTable, Table } from "./table";

@@ -110,6 +122,28 @@ export interface TableNamesOptions {
  /** An optional limit to the number of results to return. */
  limit?: number;
 }
+
+export interface ListNamespacesOptions {
+  /** Token from a previous response for pagination. */
+  pageToken?: string;
+  /** An optional limit to the number of results to return. */
+  limit?: number;
+}
+
+export interface CreateNamespaceOptions {
+  /** Creation mode. */
+  mode?: "create" | "exist_ok" | "overwrite";
+  /** Properties to set on the new namespace. */
+  properties?: Record<string, string>;
+}
+
+export interface DropNamespaceOptions {
+  /** Whether to skip if the namespace doesn't exist, or fail. */
+  mode?: "skip" | "fail";
+  /** Refuse to drop if non-empty (restrict) or drop recursively (cascade). */
+  behavior?: "restrict" | "cascade";
+}
+
 /**
 * A LanceDB Connection that allows you to open tables and create new ones.
 *
@@ -268,6 +302,69 @@ export abstract class Connection {
   */
  abstract dropAllTables(namespacePath?: string[]): Promise<void>;

+  /**
+   * Describe a namespace, returning its properties.
+   *
+   * @param {string[]} namespacePath - The namespace path to describe, in
+   *   parent → child order, e.g. `["analytics", "sales"]`.
+   * @returns {Promise<DescribeNamespaceResponse>} The namespace's properties
+   *   (may be undefined if the namespace has none).
+   */
+  abstract describeNamespace(
+    namespacePath: string[],
+  ): Promise<DescribeNamespaceResponse>;
+
+  /**
+   * List the immediate child namespaces under the given parent.
+   *
+   * Results may be paginated. To retrieve subsequent pages, pass the
+   * `pageToken` returned by a previous call.
+   *
+   * @param {string[]} namespacePath - The parent namespace path. Defaults
+   *   to the root namespace if omitted.
+   * @param {Partial<ListNamespacesOptions>} options - Pagination options
+   *   (`pageToken`, `limit`).
+   * @returns {Promise<ListNamespacesResponse>} Child namespace names and
+   *   an optional token for fetching the next page.
+   */
+  abstract listNamespaces(
+    namespacePath?: string[],
+    options?: Partial<ListNamespacesOptions>,
+  ): Promise<ListNamespacesResponse>;
+
+  /**
+   * Create a new namespace at the given path.
+   *
+   * @param {string[]} namespacePath - The namespace path to create.
+   * @param {Partial<CreateNamespaceOptions>} options - Creation `mode`
+   *   ("create" | "exist_ok" | "overwrite") and optional `properties`
+   *   to attach to the namespace.
+   * @returns {Promise<CreateNamespaceResponse>} The properties of the
+   *   created namespace and an optional transaction id.
+   */
+  abstract createNamespace(
+    namespacePath: string[],
+    options?: Partial<CreateNamespaceOptions>,
+  ): Promise<CreateNamespaceResponse>;
+
+  /**
+   * Drop a namespace.
+   *
+   * Use `behavior: "cascade"` to also drop everything contained in the
+   * namespace (sub-namespaces and tables). The default `"restrict"`
+   * behavior refuses to drop a non-empty namespace.
+   *
+   * @param {string[]} namespacePath - The namespace path to drop.
+   * @param {Partial<DropNamespaceOptions>} options - `mode` ("skip" | "fail"
+   *   for missing-namespace handling) and `behavior` ("restrict" | "cascade").
+   * @returns {Promise<DropNamespaceResponse>} Any properties returned by
+   *   the server and an optional transaction id.
+   */
+  abstract dropNamespace(
+    namespacePath: string[],
+    options?: Partial<DropNamespaceOptions>,
+  ): Promise<DropNamespaceResponse>;
+
  /**
   * Clone a table from a source table.
   *
@@ -515,6 +612,45 @@ export class LocalConnection extends Connection {
  async dropAllTables(namespacePath?: string[]): Promise<void> {
    return this.inner.dropAllTables(namespacePath ?? []);
  }
+
+  describeNamespace(
+    namespacePath: string[],
+  ): Promise<DescribeNamespaceResponse> {
+    return this.inner.describeNamespace(namespacePath);
+  }
+
+  listNamespaces(
+    namespacePath?: string[],
+    options?: Partial<ListNamespacesOptions>,
+  ): Promise<ListNamespacesResponse> {
+    return this.inner.listNamespaces(
+      namespacePath ?? [],
+      options?.pageToken,
+      options?.limit,
+    );
+  }
+
+  createNamespace(
+    namespacePath: string[],
+    options?: Partial<CreateNamespaceOptions>,
+  ): Promise<CreateNamespaceResponse> {
+    return this.inner.createNamespace(
+      namespacePath,
+      options?.mode,
+      options?.properties,
+    );
+  }
+
+  dropNamespace(
+    namespacePath: string[],
+    options?: Partial<DropNamespaceOptions>,
+  ): Promise<DropNamespaceResponse> {
+    return this.inner.dropNamespace(
+      namespacePath,
+      options?.mode,
+      options?.behavior,
+    );
+  }
 }

 /**
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -8,6 +8,7 @@ import {
 } from "./connection";

 import {
+  ConnectNamespaceOptions,
  ConnectionOptions,
  Connection as LanceDbConnection,
  JsHeaderProvider as NativeJsHeaderProvider,
@@ -22,6 +23,7 @@ export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
 export {
  AddColumnsSql,
  ConnectionOptions,
+  ConnectNamespaceOptions,
  IndexStatistics,
  IndexConfig,
  ClientConfig,
@@ -62,6 +64,13 @@ export {
  CreateTableOptions,
  TableNamesOptions,
  OpenTableOptions,
+  ListNamespacesOptions,
+  CreateNamespaceOptions,
+  DropNamespaceOptions,
+  ListNamespacesResponse,
+  CreateNamespaceResponse,
+  DropNamespaceResponse,
+  DescribeNamespaceResponse,
 } from "./connection";

 export { Session } from "./native.js";
@@ -293,3 +302,197 @@ export async function connect(
  );
  return new LocalConnection(nativeConn);
 }
+
+/**
+ * Configuration for the built-in directory namespace (`"dir"`).
+ *
+ * The directory namespace stores tables under a single root path (local
+ * filesystem or object storage URI). See
+ * {@link https://docs.lancedb.com/namespaces} for the documented surface;
+ * less-common knobs live under {@link DirNamespaceConfig.extraProperties}.
+ */
+export interface DirNamespaceConfig {
+  /** Root path or URI containing the LanceDB tables. */
+  root: string;
+  /**
+   * Whether to maintain a namespace manifest at the root. Required for
+   * child namespaces. Defaults to true on the impl side.
+   */
+  manifestEnabled?: boolean;
+  /**
+   * Additional raw properties passed verbatim to the namespace
+   * implementation (e.g. `storage.*`, `credential_vendor.*`). Typed
+   * fields above take precedence on key collision.
+   */
+  extraProperties?: Record<string, string>;
+}
+
+/**
+ * Configuration for the built-in REST namespace (`"rest"`).
+ *
+ * The REST namespace talks to a remote catalog server over HTTP. See
+ * {@link https://docs.lancedb.com/namespaces} for the documented surface;
+ * less-common knobs (TLS, metrics) live under
+ * {@link RestNamespaceConfig.extraProperties}.
+ */
+export interface RestNamespaceConfig {
+  /** Catalog endpoint URL. */
+  uri: string;
+  /**
+   * HTTP headers forwarded with each request. Keys are passed through
+   * as-is (e.g. `"x-api-key"`, `"Authorization"`).
+   */
+  headers?: Record<string, string>;
+  /**
+   * Additional raw properties passed verbatim to the namespace
+   * implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`).
+   * Typed fields above take precedence on key collision.
+   */
+  extraProperties?: Record<string, string>;
+}
+
+function dirConfigToProperties(
+  config: DirNamespaceConfig,
+): Record<string, string> {
+  // Spread the whole input so that unknown keys (e.g. a raw `manifest_enabled`
+  // passed via the dynamic-impl path) flow through instead of being dropped.
+  // Typed transformations layer on top.
+  const { manifestEnabled, extraProperties, ...rest } = config;
+  const properties: Record<string, string> = {
+    ...(extraProperties ?? {}),
+    ...(rest as Record<string, string>),
+  };
+  if (manifestEnabled !== undefined) {
+    properties.manifest_enabled = String(manifestEnabled);
+  }
+  return properties;
+}
+
+function restConfigToProperties(
+  config: RestNamespaceConfig,
+): Record<string, string> {
+  const { headers, extraProperties, ...rest } = config;
+  const properties: Record<string, string> = {
+    ...(extraProperties ?? {}),
+    ...(rest as Record<string, string>),
+  };
+  if (headers) {
+    for (const [name, value] of Object.entries(headers)) {
+      properties[`headers.${name}`] = value;
+    }
+  }
+  return properties;
+}
+
+/**
+ * Connect to a LanceDB database through a namespace.
+ *
+ * Unlike {@link connect}, which routes by URI scheme (local path vs.
+ * `db://` cloud), `connectNamespace` always returns a namespace-backed
+ * connection. The `implName` selects the namespace implementation:
+ *
+ * - `"dir"` — directory namespace, configured with {@link DirNamespaceConfig}.
+ * - `"rest"` — remote REST catalog, configured with {@link RestNamespaceConfig}.
+ * - Any other string — full module path for a custom implementation,
+ *   configured with a free-form string-keyed `properties` map.
+ *
+ * @example Typed dir namespace
+ * ```ts
+ * const db = await connectNamespace("dir", { root: "/path/to/db" });
+ * await db.createTable("users", [{ id: 1 }]);
+ * ```
+ *
+ * @example Typed REST namespace with auth headers
+ * ```ts
+ * const db = await connectNamespace("rest", {
+ *   uri: "https://catalog.example.com",
+ *   headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
+ * });
+ * ```
+ *
+ * @example Custom implementation with raw properties
+ * ```ts
+ * const db = await connectNamespace("my.custom.Namespace", {
+ *   endpoint: "...",
+ * });
+ * ```
+ */
+export function connectNamespace(
+  implName: "dir",
+  config: DirNamespaceConfig,
+  options?: Partial<ConnectNamespaceOptions>,
+): Promise<Connection>;
+/**
+ * Connect through the built-in REST namespace.
+ *
+ * Configured with {@link RestNamespaceConfig}. See the function-level
+ * documentation above for the full surface, examples, and how this
+ * relates to {@link connect}.
+ *
+ * @example
+ * ```ts
+ * const db = await connectNamespace("rest", {
+ *   uri: "https://catalog.example.com",
+ *   headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
+ * });
+ * ```
+ */
+export function connectNamespace(
+  implName: "rest",
+  config: RestNamespaceConfig,
+  options?: Partial<ConnectNamespaceOptions>,
+): Promise<Connection>;
+/**
+ * Connect through a custom namespace implementation by full module path,
+ * configured with a free-form string-keyed `properties` map. Use the
+ * typed overloads above for the built-in `"dir"` and `"rest"` impls.
+ *
+ * See the function-level documentation above for examples and how this
+ * relates to {@link connect}.
+ *
+ * @example
+ * ```ts
+ * const db = await connectNamespace("my.custom.Namespace", {
+ *   endpoint: "...",
+ * });
+ * ```
+ */
+export function connectNamespace(
+  implName: string,
+  properties: Record<string, string>,
+  options?: Partial<ConnectNamespaceOptions>,
+): Promise<Connection>;
+export async function connectNamespace(
+  implName: string,
+  configOrProperties:
+    | DirNamespaceConfig
+    | RestNamespaceConfig
+    | Record<string, string>,
+  options?: Partial<ConnectNamespaceOptions>,
+): Promise<Connection> {
+  let properties: Record<string, string>;
+  if (implName === "dir") {
+    properties = dirConfigToProperties(
+      configOrProperties as DirNamespaceConfig,
+    );
+  } else if (implName === "rest") {
+    properties = restConfigToProperties(
+      configOrProperties as RestNamespaceConfig,
+    );
+  } else {
+    properties = configOrProperties as Record<string, string>;
+  }
+
+  const finalOptions: ConnectNamespaceOptions = (options ??
+    {}) as ConnectNamespaceOptions;
+  finalOptions.storageOptions = cleanseStorageOptions(
+    finalOptions.storageOptions,
+  );
+
+  const nativeConn = await LanceDbConnection.newWithNamespace(
+    implName,
+    properties,
+    finalOptions,
+  );
+  return new LocalConnection(nativeConn);
+}
--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -87,6 +87,23 @@ export class MergeInsertBuilder {
      this.#schema,
    );
  }
+
+  /**
+   * Skip the automatic cleanup of old dataset versions that would otherwise
+   * run as part of this merge insert's commit. Forwards to
+   * `MergeInsertBuilder::skip_auto_cleanup` in lance-core.
+   *
+   * Useful for high-frequency writers that prefer to manage version cleanup
+   * themselves, or writers without delete permissions on the underlying storage.
+   *
+   * @param skip - If true, the auto-cleanup step is skipped at commit time.
+   */
+  skipAutoCleanup(skip: boolean): MergeInsertBuilder {
+    return new MergeInsertBuilder(
+      this.#native.skipAutoCleanup(skip),
+      this.#schema,
+    );
+  }
  /**
   * Executes the merge insert operation
   *
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -56,6 +56,18 @@ export interface AddDataOptions {
   * If "overwrite" then the new data will replace the existing data in the table.
   */
  mode: "append" | "overwrite";
+  /**
+   * If true, skip the automatic cleanup of old dataset versions that would
+   * otherwise run as part of this write's commit. Forwards to
+   * `WriteParams.skip_auto_cleanup` in lance-core.
+   *
+   * Useful for high-frequency writers that prefer to manage version cleanup
+   * themselves (for example, via a separate periodic optimize job), or for
+   * writers that don't have delete permissions on the underlying storage.
+   *
+   * Defaults to false.
+   */
+  skipAutoCleanup?: boolean;
 }

 export interface UpdateOptions {
@@ -285,6 +297,25 @@ export abstract class Table {
   */
  abstract prewarmIndex(name: string): Promise<void>;

+  /**
+   * Prewarm one or more columns of data in the table.
+   *
+   * @param columns The columns to prewarm. If undefined, all columns are prewarmed.
+   *
+   * This will load the column data into the page cache so that future queries that
+   * read those columns avoid the initial cold-start latency.  This call initiates
+   * prewarming and returns once the request is accepted; the warming itself may
+   * continue in the background.  Calling it on already-prewarmed columns is a
+   * no-op on the server.
+   *
+   * Prewarming is generally useful for columns used in filters or projections.
+   * Large columns (e.g. high-dimensional vectors or binary data) may not be
+   * practical to prewarm.
+   *
+   * This feature is currently only supported on remote tables.
+   */
+  abstract prewarmData(columns?: string[]): Promise<void>;
+
  /**
   * Waits for asynchronous indexing to complete on the table.
   *
@@ -617,7 +648,7 @@ export class LocalTable extends Table {
    const schema = await this.schema();

    const buffer = await fromDataToBuffer(data, undefined, schema);
-    return await this.inner.add(buffer, mode);
+    return await this.inner.add(buffer, mode, options?.skipAutoCleanup);
  }

  async update(
@@ -710,6 +741,10 @@ export class LocalTable extends Table {
    await this.inner.prewarmIndex(name);
  }

+  async prewarmData(columns?: string[]): Promise<void> {
+    await this.inner.prewarmData(columns);
+  }
+
  async waitForIndex(
    indexNames: string[],
    timeoutSeconds: number,
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -38,15 +38,15 @@
    "url": "https://github.com/lancedb/lancedb"
  },
  "devDependencies": {
-    "@aws-sdk/client-dynamodb": "^3.33.0",
-    "@aws-sdk/client-kms": "^3.33.0",
-    "@aws-sdk/client-s3": "^3.33.0",
+    "@aws-sdk/client-dynamodb": "3.1003.0",
+    "@aws-sdk/client-kms": "3.1003.0",
+    "@aws-sdk/client-s3": "3.1003.0",
    "@biomejs/biome": "^1.7.3",
    "@jest/globals": "^29.7.0",
-    "@napi-rs/cli": "^3.5.1",
+    "@napi-rs/cli": "3.5.1",
    "@types/axios": "^0.14.0",
    "@types/jest": "^29.1.2",
-    "@types/node": "^22.7.4",
+    "@types/node": "22.7.4",
    "@types/tmp": "^0.2.6",
    "apache-arrow-15": "npm:apache-arrow@15.0.0",
    "apache-arrow-16": "npm:apache-arrow@16.0.0",
@@ -57,9 +57,9 @@
    "shx": "^0.3.4",
    "tmp": "^0.2.3",
    "ts-jest": "^29.1.2",
-    "typedoc": "^0.26.4",
-    "typedoc-plugin-markdown": "^4.2.1",
-    "typescript": "^5.5.4",
+    "typedoc": "0.26.4",
+    "typedoc-plugin-markdown": "4.2.1",
+    "typescript": "5.5.4",
    "typescript-eslint": "^7.1.0"
  },
  "ava": {
@@ -68,16 +68,16 @@
  "engines": {
    "node": ">= 18"
  },
+  "packageManager": "pnpm@11.1.1",
  "cpu": ["x64", "arm64"],
  "os": ["darwin", "linux", "win32"],
  "scripts": {
    "artifacts": "napi artifacts",
    "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
-    "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
+    "postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/ && node -e \"require('fs').writeFileSync('dist/package.json', JSON.stringify({name:'@lancedb/lancedb',type:'commonjs'}))\"",
    "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
-    "postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
-    "build": "npm run build:debug && npm run tsc",
-    "build-release": "npm run build:release && npm run tsc",
+    "build": "pnpm build:debug && pnpm tsc",
+    "build-release": "pnpm build:release && pnpm tsc",
    "tsc": "tsc -b",
    "posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
    "lint-ci": "biome ci .",
@@ -87,7 +87,7 @@
    "lint-fix": "biome check --write . && biome format --write .",
    "prepublishOnly": "napi prepublish -t npm",
    "test": "jest --verbose",
-    "integration": "S3_TEST=1 npm run test",
+    "integration": "S3_TEST=1 pnpm test",
    "universal": "napi universalize",
    "version": "napi version"
  },
@@ -95,8 +95,8 @@
    "reflect-metadata": "^0.2.2"
  },
  "optionalDependencies": {
-    "@huggingface/transformers": "^3.0.2",
-    "openai": "^4.29.2"
+    "@huggingface/transformers": "3.0.2",
+    "openai": "4.29.2"
  },
  "peerDependencies": {
    "apache-arrow": ">=15.0.0 <=18.1.0"
--- a/nodejs/pnpm-lock.yaml
+++ b/nodejs/pnpm-lock.yaml
--- a/nodejs/pnpm-workspace.yaml
+++ b/nodejs/pnpm-workspace.yaml
@@ -0,0 +1,18 @@
+# Flat node_modules layout. The @napi-rs/cli build step fails to locate
+# the cdylib artifact under pnpm's isolated layout; the hoisted linker
+# mirrors npm's structure and unblocks the native build.
+nodeLinker: hoisted
+
+# Block resolution of versions less than 24h old (Shai-Hulud window).
+# This is the pnpm 11 default but pinned here so it's visible to
+# reviewers and survives a future pnpm major flipping the default.
+minimumReleaseAge: 1440
+
+# Fail install if a transitive dep tries to run an unapproved script.
+strictDepBuilds: true
+
+allowBuilds:
+  '@biomejs/biome': true
+  onnxruntime-node: true
+  protobufjs: true
+  sharp: true
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -8,12 +8,16 @@ use lancedb::database::{CreateTableMode, Database};
 use napi::bindgen_prelude::*;
 use napi_derive::*;

+use crate::ConnectNamespaceOptions;
 use crate::ConnectionOptions;
 use crate::error::NapiErrorExt;
 use crate::header::JsHeaderProvider;
 use crate::table::Table;
-use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
+use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection, connect_namespace};

+use lance_namespace::models::{
+    CreateNamespaceRequest, DescribeNamespaceRequest, DropNamespaceRequest, ListNamespacesRequest,
+};
 use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};

 #[napi]
@@ -21,6 +25,29 @@ pub struct Connection {
    inner: Option<LanceDBConnection>,
 }

+#[napi(object)]
+pub struct DescribeNamespaceResponse {
+    pub properties: Option<HashMap<String, String>>,
+}
+
+#[napi(object)]
+pub struct ListNamespacesResponse {
+    pub namespaces: Vec<String>,
+    pub page_token: Option<String>,
+}
+
+#[napi(object)]
+pub struct CreateNamespaceResponse {
+    pub properties: Option<HashMap<String, String>>,
+    pub transaction_id: Option<String>,
+}
+
+#[napi(object)]
+pub struct DropNamespaceResponse {
+    pub properties: Option<HashMap<String, String>>,
+    pub transaction_id: Option<Vec<String>>,
+}
+
 impl Connection {
    pub(crate) fn inner_new(inner: LanceDBConnection) -> Self {
        Self { inner: Some(inner) }
@@ -106,6 +133,39 @@ impl Connection {
        Ok(Self::inner_new(builder.execute().await.default_error()?))
    }

+    /// Create a new Connection instance backed by a namespace implementation.
+    #[napi(factory)]
+    pub async fn new_with_namespace(
+        impl_name: String,
+        properties: HashMap<String, String>,
+        options: ConnectNamespaceOptions,
+    ) -> napi::Result<Self> {
+        if impl_name.is_empty() {
+            return Err(napi::Error::from_reason(
+                "implName must be a non-empty string",
+            ));
+        }
+
+        let mut builder = connect_namespace(&impl_name, properties);
+        if let Some(interval) = options.read_consistency_interval {
+            builder =
+                builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval));
+        }
+        if let Some(storage_options) = options.storage_options {
+            for (key, value) in storage_options {
+                builder = builder.storage_option(key, value);
+            }
+        }
+        if let Some(namespace_client_properties) = options.namespace_client_properties {
+            builder = builder.namespace_client_properties(namespace_client_properties);
+        }
+        if let Some(session) = options.session {
+            builder = builder.session(session.inner.clone());
+        }
+
+        Ok(Self::inner_new(builder.execute().await.default_error()?))
+    }
+
    #[napi]
    pub fn display(&self) -> napi::Result<String> {
        Ok(self.get_inner()?.to_string())
@@ -273,4 +333,130 @@ impl Connection {
        let ns = namespace_path.unwrap_or_default();
        self.get_inner()?.drop_all_tables(&ns).await.default_error()
    }
+
+    #[napi(catch_unwind)]
+    /// Describe a namespace and return its properties.
+    pub async fn describe_namespace(
+        &self,
+        namespace_path: Vec<String>,
+    ) -> napi::Result<DescribeNamespaceResponse> {
+        let req = DescribeNamespaceRequest {
+            id: Some(namespace_path),
+            ..Default::default()
+        };
+        let resp = self
+            .get_inner()?
+            .describe_namespace(req)
+            .await
+            .default_error()?;
+        Ok(DescribeNamespaceResponse {
+            properties: resp.properties,
+        })
+    }
+
+    #[napi(catch_unwind)]
+    /// List child namespaces under the given namespace path
+    pub async fn list_namespaces(
+        &self,
+        namespace_path: Option<Vec<String>>,
+        page_token: Option<String>,
+        limit: Option<u32>,
+    ) -> napi::Result<ListNamespacesResponse> {
+        let req = ListNamespacesRequest {
+            id: namespace_path,
+            page_token,
+            limit: limit.map(|l| l as i32),
+            ..Default::default()
+        };
+        let resp = self
+            .get_inner()?
+            .list_namespaces(req)
+            .await
+            .default_error()?;
+        Ok(ListNamespacesResponse {
+            namespaces: resp.namespaces,
+            page_token: resp.page_token,
+        })
+    }
+
+    #[napi(catch_unwind)]
+    /// Create a new namespace with optional properties.
+    pub async fn create_namespace(
+        &self,
+        namespace_path: Vec<String>,
+        mode: Option<String>,
+        properties: Option<HashMap<String, String>>,
+    ) -> napi::Result<CreateNamespaceResponse> {
+        let mode_str = mode
+            .map(|m| match m.to_lowercase().as_str() {
+                "create" => Ok("Create".to_string()),
+                "exist_ok" => Ok("ExistOk".to_string()),
+                "overwrite" => Ok("Overwrite".to_string()),
+                _ => Err(napi::Error::from_reason(format!(
+                    "Invalid mode '{}': expected one of 'create', 'exist_ok', 'overwrite'",
+                    m
+                ))),
+            })
+            .transpose()?;
+        let req = CreateNamespaceRequest {
+            id: Some(namespace_path),
+            mode: mode_str,
+            properties,
+            ..Default::default()
+        };
+        let resp = self
+            .get_inner()?
+            .create_namespace(req)
+            .await
+            .default_error()?;
+        Ok(CreateNamespaceResponse {
+            properties: resp.properties,
+            transaction_id: resp.transaction_id,
+        })
+    }
+
+    #[napi(catch_unwind)]
+    /// Drop a namespace.
+    pub async fn drop_namespace(
+        &self,
+        namespace_path: Vec<String>,
+        mode: Option<String>,
+        behavior: Option<String>,
+    ) -> napi::Result<DropNamespaceResponse> {
+        let mode_str = mode
+            .map(|m| match m.to_lowercase().as_str() {
+                "skip" => Ok("Skip".to_string()),
+                "fail" => Ok("Fail".to_string()),
+                _ => Err(napi::Error::from_reason(format!(
+                    "Invalid mode '{}': expected one of 'skip', 'fail'",
+                    m
+                ))),
+            })
+            .transpose()?;
+        let behavior_str = behavior
+            .map(|b| match b.to_lowercase().as_str() {
+                "restrict" => Ok("Restrict".to_string()),
+                "cascade" => Ok("Cascade".to_string()),
+                _ => Err(napi::Error::from_reason(format!(
+                    "Invalid behavior '{}': expected one of 'restrict', 'cascade'",
+                    b
+                ))),
+            })
+            .transpose()?;
+        let req = DropNamespaceRequest {
+            id: Some(namespace_path),
+            mode: mode_str,
+            behavior: behavior_str,
+            ..Default::default()
+        };
+        let resp = self
+            .get_inner()?
+            .drop_namespace(req)
+            .await
+            .default_error()?;
+        Ok(DropNamespaceResponse {
+            properties: resp.properties,
+            transaction_id: resp.transaction_id,
+        })
+    }
 }
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -67,6 +67,26 @@ pub struct OpenTableOptions {
    pub storage_options: Option<HashMap<String, String>>,
 }

+#[napi(object)]
+#[derive(Debug)]
+pub struct ConnectNamespaceOptions {
+    /// The interval, in seconds, at which to check for updates to the table
+    /// from other processes. If None, then consistency is not checked. For
+    /// performance reasons, this is the default. For strong consistency, set
+    /// this to zero seconds. Then every read will check for updates from other
+    /// processes. As a compromise, you can set this to a non-zero value for
+    /// eventual consistency.
+    pub read_consistency_interval: Option<f64>,
+    /// Configuration for object storage. The available options are described
+    /// at https://docs.lancedb.com/storage/
+    pub storage_options: Option<HashMap<String, String>>,
+    /// Extra properties for the backing namespace client.
+    pub namespace_client_properties: Option<HashMap<String, String>>,
+    /// The session to use for this connection. Holds shared caches and other
+    /// session-specific state.
+    pub session: Option<session::Session>,
+}
+
 #[napi_derive::module_init]
 fn init() {
    let env = Env::new()
--- a/nodejs/src/merge.rs
+++ b/nodejs/src/merge.rs
@@ -50,6 +50,13 @@ impl NativeMergeInsertBuilder {
        this
    }

+    #[napi]
+    pub fn skip_auto_cleanup(&self, skip: bool) -> Self {
+        let mut this = self.clone();
+        this.inner.skip_auto_cleanup(skip);
+        this
+    }
+
    #[napi(catch_unwind)]
    pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
        let data = ipc_file_to_batches(buf.to_vec())
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -6,7 +6,7 @@ use std::collections::HashMap;
 use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
 use lancedb::table::{
    AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
-    OptimizeAction, OptimizeOptions, Table as LanceDbTable,
+    OptimizeAction, OptimizeOptions, Table as LanceDbTable, WriteOptions,
 };
 use napi::bindgen_prelude::*;
 use napi_derive::napi;
@@ -68,7 +68,12 @@ impl Table {
    }

    #[napi(catch_unwind)]
-    pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
+    pub async fn add(
+        &self,
+        buf: Buffer,
+        mode: String,
+        skip_auto_cleanup: Option<bool>,
+    ) -> napi::Result<AddResult> {
        let batches = ipc_file_to_batches(buf.to_vec())
            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
        let batches = batches
@@ -92,6 +97,13 @@ impl Table {
            return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
        };

+        if skip_auto_cleanup.unwrap_or(false) {
+            op = op.write_options(WriteOptions {
+                skip_auto_cleanup: true,
+                ..Default::default()
+            });
+        }
+
        let res = op.execute().await.default_error()?;
        Ok(res.into())
    }
@@ -159,6 +171,14 @@ impl Table {
            .default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> napi::Result<()> {
+        self.inner_ref()?
+            .prewarm_data(columns)
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
        let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -35,7 +35,8 @@ futures.workspace = true
 serde = "1"
 serde_json = "1"
 snafu.workspace = true
-tokio = { version = "1.40", features = ["sync"] }
+tokio = { version = "1.40", features = ["sync", "rt-multi-thread"] }
+libc = "0.2"

 [build-dependencies]
 pyo3-build-config = { version = "0.28", features = [
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -7,7 +7,6 @@ import os
 from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
 from typing import Dict, Optional, Union, Any, List
-import warnings

 __version__ = importlib.metadata.version("lancedb")

@@ -438,13 +437,3 @@ __all__ = [
    "Table",
    "__version__",
 ]
-
-
-def __warn_on_fork():
-    warnings.warn(
-        "lance is not fork-safe. If you are using multiprocessing, use spawn instead.",
-    )
-
-
-if hasattr(os, "register_at_fork"):
-    os.register_at_fork(before=__warn_on_fork)  # type: ignore[attr-defined]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -12,6 +12,7 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from lance_namespace import (
@@ -25,6 +26,7 @@ from .remote import ClientConfig

 IvfHnswPq: type[HnswPq] = HnswPq
 IvfHnswSq: type[HnswSq] = HnswSq
+IvfHnswFlat: type[HnswFlat] = HnswFlat

 class PyExpr:
    """A type-safe DataFusion expression node (Rust-side handle)."""
@@ -180,6 +182,7 @@ class Table:
            IvfPq,
            HnswPq,
            HnswSq,
+            HnswFlat,
            BTree,
            Bitmap,
            LabelList,
@@ -442,7 +445,7 @@ class AsyncPermutationBuilder:
    async def execute(self) -> Table: ...

 def async_permutation_builder(
-    table: Table, dest_table_name: str
+    table: Table,
 ) -> AsyncPermutationBuilder: ...
 def fts_query_to_json(query: Any) -> str: ...

--- a/python/python/lancedb/background_loop.py
+++ b/python/python/lancedb/background_loop.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import asyncio
+import os
 import threading
+import warnings


 class BackgroundEventLoop:
@@ -13,6 +15,9 @@ class BackgroundEventLoop:
    """

    def __init__(self):
+        self._start()
+
+    def _start(self):
        self.loop = asyncio.new_event_loop()
        self.thread = threading.Thread(
            target=self.loop.run_forever,
@@ -31,3 +36,30 @@ class BackgroundEventLoop:


 LOOP = BackgroundEventLoop()
+
+_FORK_WARNED = False
+
+
+def _reset_after_fork():
+    # Threads do not survive fork(), so the asyncio loop in LOOP.thread is
+    # dead in the child. Re-initialize the singleton in place so existing
+    # `from .background_loop import LOOP` references in other modules see
+    # the new state. The Rust-side tokio runtime is reset analogously by a
+    # pthread_atfork hook installed in the _lancedb extension.
+    LOOP._start()
+    global _FORK_WARNED
+    if not _FORK_WARNED:
+        _FORK_WARNED = True
+        warnings.warn(
+            "lancedb fork support is experimental: the internal async "
+            "runtime has been reset in the forked child, but a small chance "
+            "of deadlock remains if other state was mid-operation at fork "
+            "time. The 'forkserver' or 'spawn' multiprocessing start method "
+            "is likely a safer alternative.",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+
+
+if hasattr(os, "register_at_fork"):
+    os.register_at_fork(after_in_child=_reset_after_fork)
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -7,6 +7,7 @@ from typing import Literal, Optional
 from ._lancedb import (
    IndexConfig,
 )
+from .types import BaseTokenizerType

 lang_mapping = {
    "ar": "Arabic",
@@ -111,8 +112,12 @@ class FTS:
        - "simple": Splits text by whitespace and punctuation.
        - "whitespace": Split text by whitespace, but not punctuation.
        - "raw": No tokenization. The entire text is treated as a single token.
+        - "ngram": N-gram tokenizer for substring-style matching.
+        - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+        - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
    language : str, default "English"
-        The language to use for tokenization.
+        The language to use for stemming and stop-word removal. This is not the
+        primary way to enable CJK tokenization.
    max_token_length : int, default 40
        The maximum token length to index. Tokens longer than this length will be
        ignored.
@@ -127,10 +132,17 @@ class FTS:
    ascii_folding : bool, default True
        Whether to fold ASCII characters. This converts accented characters to
        their ASCII equivalent. For example, "café" would be converted to "cafe".
+
+    Notes
+    -----
+    Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+    require tokenizer models in Lance's language model home. Set
+    ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+    directory under ``lance/language_models``.
    """

    with_position: bool = False
-    base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple"
+    base_tokenizer: BaseTokenizerType = "simple"
    language: str = "English"
    max_token_length: Optional[int] = 40
    lower_case: bool = True
@@ -376,9 +388,98 @@ class HnswSq:
    target_partition_size: Optional[int] = None


+@dataclass
+class HnswFlat:
+    """Describe a HNSW-FLAT index configuration.
+
+    HNSW-FLAT stands for Hierarchical Navigable Small World without quantization.
+    It stores raw vectors in the HNSW graph, providing the highest recall among
+    the IVF_HNSW family at the cost of more memory and disk space compared to
+    :class:`HnswSq` or :class:`HnswPq`.
+
+    Parameters
+    ----------
+
+    distance_type: str, default "l2"
+
+        The distance metric used to train the index.
+
+        The following distance types are available:
+
+        "l2" - Euclidean distance. This is a very common distance metric that
+        accounts for both magnitude and direction when determining the distance
+        between vectors. l2 distance has a range of [0, ∞).
+
+        "cosine" - Cosine distance.  Cosine distance is a distance metric
+        calculated from the cosine similarity between two vectors. Cosine
+        similarity is a measure of similarity between two non-zero vectors of an
+        inner product space. It is defined to equal the cosine of the angle
+        between them.  Unlike l2, the cosine distance is not affected by the
+        magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+        "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+        distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+        l2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+    num_partitions, default sqrt(num_rows)
+
+        The number of IVF partitions to create.
+
+        For HNSW, we recommend a small number of partitions. Setting this to 1
+        works well for most tables. For very large tables, training just one HNSW
+        graph will require too much memory. Each partition becomes its own HNSW
+        graph, so setting this value higher reduces the peak memory use of
+        training.
+
+    max_iterations, default 50
+
+        Max iterations to train kmeans.
+
+        When training an IVF index we use kmeans to calculate the partitions.
+        This parameter controls how many iterations of kmeans to run.
+
+    sample_rate, default 256
+
+        The rate used to calculate the number of training vectors for kmeans.
+
+    m, default 20
+
+        The number of neighbors to select for each vector in the HNSW graph.
+
+        This value controls the tradeoff between search speed and accuracy.
+        The higher the value the more accurate the search but the slower it
+        will be.
+
+    ef_construction, default 300
+
+        The number of candidates to evaluate during the construction of the HNSW
+        graph.
+
+        This value controls the tradeoff between build speed and accuracy.
+        The higher the value the more accurate the build but the slower it will
+        be.  150 to 300 is the typical range. 100 is a minimum for good quality
+        search results. In most cases, there is no benefit to setting this higher
+        than 500.  This value should be set to a value that is not less than `ef`
+        in the search phase.
+
+    target_partition_size, default is 1,048,576
+
+        The target size of each partition.
+    """
+
+    distance_type: Literal["l2", "cosine", "dot"] = "l2"
+    num_partitions: Optional[int] = None
+    max_iterations: int = 50
+    sample_rate: int = 256
+    m: int = 20
+    ef_construction: int = 300
+    target_partition_size: Optional[int] = None
+
+
 # Backwards-compatible aliases
 IvfHnswPq = HnswPq
 IvfHnswSq = HnswSq
+IvfHnswFlat = HnswFlat


@dataclass
@@ -698,11 +799,13 @@ __all__ = [
    "IvfPq",
    "IvfHnswPq",
    "IvfHnswSq",
+    "IvfHnswFlat",
    "IvfSq",
    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
+    "HnswFlat",
    "IndexConfig",
    "FTS",
    "Bitmap",
--- a/python/python/lancedb/permutation.py
+++ b/python/python/lancedb/permutation.py
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

-from deprecation import deprecated
-from lancedb import AsyncConnection, DBConnection
-import pyarrow as pa
+import copy
 import json

+from deprecation import deprecated
+import pyarrow as pa
+
 from ._lancedb import async_permutation_builder, PermutationReader
 from .table import LanceTable
 from .background_loop import LOOP
@@ -36,10 +37,7 @@ class PermutationBuilder:
    be referenced by name in the future.  If names are not provided then they can only
    be referenced by their ordinal index.  There is no requirement to name every split.

-    By default, the permutation will be stored in memory and will be lost when the
-    program exits.  To persist the permutation (for very large datasets or to share
-    the permutation across multiple workers) use the [persist](#persist) method to
-    create a permanent table.
+    The permutation is stored in memory and will be lost when the program exits.
    """

    def __init__(self, table: LanceTable):
@@ -51,15 +49,6 @@ class PermutationBuilder:
        """
        self._async = async_permutation_builder(table)

-    def persist(
-        self, database: Union[DBConnection, AsyncConnection], table_name: str
-    ) -> "PermutationBuilder":
-        """
-        Persist the permutation to the given database.
-        """
-        self._async.persist(database, table_name)
-        return self
-
    def split_random(
        self,
        *,
@@ -380,20 +369,44 @@ class Permutation:

    def __init__(
        self,
-        reader: PermutationReader,
+        base_table: LanceTable,
+        permutation_table: Optional[LanceTable],
+        split: int,
        selection: dict[str, str],
        batch_size: int,
        transform_fn: Callable[pa.RecordBatch, Any],
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+        connection_factory: Optional[Callable[[str], LanceTable]] = None,
+        _reader: Optional[PermutationReader] = None,
    ):
        """
        Internal constructor.  Use [from_tables](#from_tables) instead.
        """
-        assert reader is not None, "reader is required"
+        assert base_table is not None, "base_table is required"
        assert selection is not None, "selection is required"
-        self.reader = reader
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = split
        self.selection = selection
        self.transform_fn = transform_fn
        self.batch_size = batch_size
+        self.offset = offset
+        self.limit = limit
+        self.connection_factory = connection_factory
+        if _reader is None:
+            _reader = LOOP.run(self._build_reader())
+        self.reader: PermutationReader = _reader
+
+    async def _build_reader(self) -> PermutationReader:
+        reader = await PermutationReader.from_tables(
+            self.base_table, self.permutation_table, self.split
+        )
+        if self.offset is not None:
+            reader = await reader.with_offset(self.offset)
+        if self.limit is not None:
+            reader = await reader.with_limit(self.limit)
+        return reader

    def _with_selection(self, selection: dict[str, str]) -> "Permutation":
        """
@@ -402,21 +415,97 @@ class Permutation:
        Does not validation of the selection and it replaces it entirely.  This is not
        intended for public use.
        """
-        return Permutation(self.reader, selection, self.batch_size, self.transform_fn)
-
-    def _with_reader(self, reader: PermutationReader) -> "Permutation":
-        """
-        Creates a new permutation with the given reader
-
-        This is an internal method and should not be used directly.
-        """
-        return Permutation(reader, self.selection, self.batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.selection = selection
+        return new

    def with_batch_size(self, batch_size: int) -> "Permutation":
        """
        Creates a new permutation with the given batch size
        """
-        return Permutation(self.reader, self.selection, batch_size, self.transform_fn)
+        new = copy.copy(self)
+        new.batch_size = batch_size
+        return new
+
+    def with_connection_factory(
+        self, connection_factory: Callable[[str], LanceTable]
+    ) -> "Permutation":
+        """
+        Creates a new permutation that will use ``connection_factory`` to reopen
+        the base table when this permutation is unpickled in a worker process.
+
+        The factory is a callable that takes a single argument — the base table
+        name — and returns a [LanceTable]. It must be picklable; the worker
+        will pickle it via standard ``pickle`` and call it to recover the base
+        table. Picklable callables in practice means top-level (module-level)
+        functions, ``functools.partial`` of such functions, or instances of
+        picklable classes implementing ``__call__``. Lambdas and closures over
+        local variables don't pickle with the default protocol.
+
+        Setting a factory is necessary when the URI alone is not enough to
+        re-open the connection — most importantly for LanceDB Cloud (``db://``)
+        connections, where ``api_key`` and ``region`` aren't recoverable from
+        the connection object after construction.
+
+        For local file or cloud-storage paths the factory is optional: if not
+        set, ``__getstate__`` falls back to capturing
+        ``(uri, storage_options, namespace_path)`` and re-opening via
+        ``lancedb.connect(uri, storage_options=...)``.
+
+        Examples
+        --------
+        Basic native (file-system path), parameterized via ``functools.partial``::
+
+            import functools, lancedb
+            from lancedb.permutation import Permutation
+
+            def open_native_table(uri: str, table_name: str):
+                return lancedb.connect(uri).open_table(table_name)
+
+            factory = functools.partial(open_native_table, "/data/lance_db")
+            permutation = Permutation.identity(
+                factory("training")
+            ).with_connection_factory(factory)
+
+        Native via :func:`lancedb.connect_namespace` (e.g. a directory- or
+        REST-backed namespace client). The factory takes the
+        implementation name and properties dict as partial-bound args so
+        the worker can rebuild the same namespace connection::
+
+            def open_via_namespace(
+                impl: str, properties: dict[str, str], table_name: str,
+            ):
+                return lancedb.connect_namespace(impl, properties).open_table(
+                    table_name,
+                )
+
+            factory = functools.partial(
+                open_via_namespace,
+                "dir",
+                {"root": "/data/lance_db"},
+            )
+
+        LanceDB Cloud, reading credentials from env vars at worker startup
+        so secrets aren't pickled into the dataset::
+
+            import os, lancedb
+
+            def open_remote_table(table_name: str):
+                db = lancedb.connect(
+                    "db://my-database",
+                    api_key=os.environ["LANCEDB_API_KEY"],
+                    region=os.environ.get("LANCEDB_REGION", "us-east-1"),
+                )
+                return db.open_table(table_name)
+
+            permutation = Permutation.identity(
+                open_remote_table("training")
+            ).with_connection_factory(open_remote_table)
+        """
+        assert connection_factory is not None, "connection_factory is required"
+        new = copy.copy(self)
+        new.connection_factory = connection_factory
+        return new

    @classmethod
    def identity(cls, table: LanceTable) -> "Permutation":
@@ -489,11 +578,126 @@ class Permutation:
            schema = await reader.output_schema(None)
            initial_selection = {name: name for name in schema.names}
            return cls(
-                reader, initial_selection, DEFAULT_BATCH_SIZE, Transforms.arrow2python
+                base_table,
+                permutation_table,
+                split,
+                initial_selection,
+                DEFAULT_BATCH_SIZE,
+                Transforms.arrow2python,
+                _reader=reader,
            )

        return LOOP.run(do_from_tables())

+    def __getstate__(self) -> dict[str, Any]:
+        """Build a picklable state dict for this permutation.
+
+        The base table is captured either via a user-supplied
+        ``connection_factory`` (see [with_connection_factory]) or, as a
+        fallback, by introspecting ``(uri, storage_options, namespace_path)``
+        on the connection. The permutation table — always an in-memory
+        LanceDB table — is captured as a pyarrow Table (which pickles via
+        Arrow IPC natively). The reader is dropped from the wire format;
+        ``__setstate__`` rebuilds it from the restored tables.
+        """
+        permutation_data: Optional[pa.Table] = None
+        if self.permutation_table is not None:
+            permutation_data = self.permutation_table.to_arrow()
+
+        common = {
+            "base_table_name": self.base_table.name,
+            "permutation_data": permutation_data,
+            "split": self.split,
+            "selection": self.selection,
+            "batch_size": self.batch_size,
+            "transform_fn": self.transform_fn,
+            "offset": self.offset,
+            "limit": self.limit,
+            "connection_factory": self.connection_factory,
+        }
+
+        if self.connection_factory is not None:
+            # The factory carries enough state to recover the base table on
+            # its own; we don't need to capture the URI / storage options /
+            # namespace from the existing connection.
+            return common
+
+        # URI-introspection fallback: only viable for native (OSS) connections
+        # where (uri, storage_options) is enough to reopen. Remote / cloud
+        # connections don't expose recoverable api_key / region — those users
+        # must call with_connection_factory().
+        try:
+            base_uri = self.base_table._conn.uri
+            storage_options = self.base_table._conn.storage_options
+        except AttributeError as e:
+            raise ValueError(
+                "Cannot pickle this Permutation: the base table's connection "
+                "does not expose a uri/storage_options, which usually means it "
+                "is a remote (LanceDB Cloud) connection. Call "
+                "Permutation.with_connection_factory(...) first to provide a "
+                "picklable callable that re-opens the base table from a worker "
+                "process."
+            ) from e
+
+        if base_uri.startswith("memory://"):
+            # In-memory base tables don't exist in any worker process by
+            # default, so dump the entire base table into the pickle. This
+            # can be expensive for large datasets — users with large
+            # in-memory base tables should either persist them or set a
+            # connection_factory.
+            return {
+                **common,
+                "base_table_data": self.base_table.to_arrow(),
+            }
+
+        return {
+            **common,
+            "base_table_uri": base_uri,
+            "base_table_namespace": self.base_table._namespace_path,
+            "base_table_storage_options": storage_options,
+        }
+
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        from . import connect
+
+        connection_factory = state["connection_factory"]
+        if connection_factory is not None:
+            base_table = connection_factory(state["base_table_name"])
+        elif "base_table_data" in state:
+            # In-memory base table inlined into the pickle; rebuild the same
+            # way we rebuild the in-memory permutation table.
+            mem_db = connect("memory://")
+            base_table = mem_db.create_table(
+                state["base_table_name"], state["base_table_data"]
+            )
+        else:
+            base_db = connect(
+                state["base_table_uri"],
+                storage_options=state["base_table_storage_options"],
+            )
+            base_table = base_db.open_table(
+                state["base_table_name"],
+                namespace_path=state["base_table_namespace"] or None,
+            )
+
+        permutation_table: Optional[LanceTable] = None
+        if state["permutation_data"] is not None:
+            mem_db = connect("memory://")
+            permutation_table = mem_db.create_table(
+                "permutation", state["permutation_data"]
+            )
+
+        self.base_table = base_table
+        self.permutation_table = permutation_table
+        self.split = state["split"]
+        self.selection = state["selection"]
+        self.batch_size = state["batch_size"]
+        self.transform_fn = state["transform_fn"]
+        self.offset = state["offset"]
+        self.limit = state["limit"]
+        self.connection_factory = connection_factory
+        self.reader = LOOP.run(self._build_reader())
+
    @property
    def schema(self) -> pa.Schema:
        async def do_output_schema():
@@ -760,7 +964,9 @@ class Permutation:
        for expensive operations such as image decoding.
        """
        assert transform is not None, "transform is required"
-        return Permutation(self.reader, self.selection, self.batch_size, transform)
+        new = copy.copy(self)
+        new.transform_fn = transform
+        return new

    def __getitem__(self, index: int) -> Any:
        """
@@ -795,12 +1001,10 @@ class Permutation:
        """
        Skip the first `skip` rows of the permutation
        """
-
-        async def do_with_skip():
-            reader = await self.reader.with_offset(skip)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_skip())
+        new = copy.copy(self)
+        new.offset = skip
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_take instead")
    def take(self, limit: int) -> "Permutation":
@@ -818,12 +1022,10 @@ class Permutation:
        """
        Limit the permutation to `limit` rows (following any `skip`)
        """
-
-        async def do_with_take():
-            reader = await self.reader.with_limit(limit)
-            return self._with_reader(reader)
-
-        return LOOP.run(do_with_take())
+        new = copy.copy(self)
+        new.limit = limit
+        new.reader = LOOP.run(new._build_reader())
+        return new

    @deprecated(details="Use with_repeat instead")
    def repeat(self, times: int) -> "Permutation":
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -22,6 +22,7 @@ from lancedb.index import (
    FTS,
    BTree,
    Bitmap,
+    HnswFlat,
    HnswSq,
    IvfFlat,
    IvfPq,
@@ -39,6 +40,7 @@ from lancedb.table import _normalize_progress

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
 from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
+from ..types import BaseTokenizerType


 class RemoteTable(Table):
@@ -167,7 +169,7 @@ class RemoteTable(Table):
        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
-        base_tokenizer: str = "simple",
+        base_tokenizer: BaseTokenizerType = "simple",
        language: str = "English",
        max_token_length: Optional[int] = 40,
        lower_case: bool = True,
@@ -284,13 +286,15 @@ class RemoteTable(Table):
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_FLAT":
            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
-                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
+                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ', 'IVF_HNSW_FLAT'"
            )

        LOOP.run(
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -57,6 +57,7 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from .merge import LanceMergeInsertBuilder
@@ -86,6 +87,59 @@ from .util import (
 )
 from .index import lang_mapping

+_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
+_MODEL_BACKED_TOKENIZER_ERRORS = (
+    "unknown base tokenizer",
+    "Invalid directory path:",
+    "Failed to load Jieba",
+    "Failed to load tokenizer config",
+    "Failed to initialize default tokenizer",
+)
+
+
+def _add_unique_note(exception: BaseException, note: str) -> None:
+    existing_notes = getattr(exception, "__notes__", ()) or ()
+    message = (
+        exception.args[0]
+        if exception.args and isinstance(exception.args[0], str)
+        else ""
+    )
+    if note not in existing_notes and note not in message:
+        add_note(exception, note)
+
+
+def _is_model_backed_tokenizer(base_tokenizer: str) -> bool:
+    return any(
+        base_tokenizer == prefix or base_tokenizer.startswith(f"{prefix}/")
+        for prefix in _MODEL_BACKED_TOKENIZER_PREFIXES
+    )
+
+
+def _maybe_add_fts_error_note(
+    exception: BaseException, *, base_tokenizer: str, language: Optional[str] = None
+) -> None:
+    message = str(exception)
+    if language is not None and "not support the requested language" in message:
+        supported_langs = ", ".join(lang_mapping.values())
+        _add_unique_note(exception, f"Supported languages: {supported_langs}")
+        return
+
+    if not _is_model_backed_tokenizer(base_tokenizer):
+        return
+
+    if not any(marker in message for marker in _MODEL_BACKED_TOKENIZER_ERRORS):
+        return
+
+    _add_unique_note(
+        exception,
+        "Model-backed tokenizers such as 'jieba/default' and 'lindera/ipadic' "
+        "require tokenizer models in Lance's language model home. Set "
+        "LANCE_LANGUAGE_MODEL_HOME to override the default platform data "
+        "directory under 'lance/language_models'. Expected layouts include "
+        "'<model-home>/jieba/default/...' and "
+        "'<model-home>/lindera/ipadic/...'.",
+    )
+

 if TYPE_CHECKING:
    from .db import LanceDBConnection
@@ -958,7 +1012,10 @@ class Table(ABC):
        tokenizer_name: str, default "default"
            A compatibility alias for native tokenizer configs. Can be "raw",
            "default" or the 2 letter language code followed by "_stem". So
-            for english it would be "en_stem".
+            for english it would be "en_stem". For new native FTS indexes, use
+            ``base_tokenizer`` directly; ``tokenizer_name`` is a legacy
+            compatibility alias and does not expose model-backed tokenizer names
+            such as ``jieba/default`` or ``lindera/ipadic``.
        use_tantivy: bool, default False
            Deprecated legacy Tantivy parameter. Setting this to True raises an
            error.
@@ -972,8 +1029,11 @@ class Table(ABC):
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
            - "ngram": N-Gram tokenizer.
+            - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
+            - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
        language : str, default "English"
-            The language to use for tokenization.
+            The language to use for stemming and stop-word removal. This is not
+            the primary way to enable CJK tokenization.
        max_token_length : int, default 40
            The maximum token length to index. Tokens longer than this length will be
            ignored.
@@ -999,6 +1059,13 @@ class Table(ABC):
            The timeout to wait if indexing is asynchronous.
        name: str, optional
            The name of the index. If not provided, a default name will be generated.
+
+        Notes
+        -----
+        Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
+        require tokenizer models in Lance's language model home. Set
+        ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
+        directory under ``lance/language_models``.
        """
        raise NotImplementedError

@@ -2170,7 +2237,13 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT",
+            "IVF_SQ",
+            "IVF_PQ",
+            "IVF_RQ",
+            "IVF_HNSW_SQ",
+            "IVF_HNSW_PQ",
+            "IVF_HNSW_FLAT",
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2257,6 +2330,16 @@ class LanceTable(Table):
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
+        elif index_type == "IVF_HNSW_FLAT":
+            config = HnswFlat(
+                distance_type=metric,
+                num_partitions=num_partitions,
+                max_iterations=max_iterations,
+                sample_rate=sample_rate,
+                m=m,
+                ef_construction=ef_construction,
+                target_partition_size=target_partition_size,
+            )
        else:
            raise ValueError(f"Unknown index type {index_type}")

@@ -2462,14 +2545,22 @@ class LanceTable(Table):
            **tokenizer_configs,
        )

-        LOOP.run(
-            self._table.create_index(
-                field_names,
-                replace=replace,
-                config=config,
-                name=name,
+        try:
+            LOOP.run(
+                self._table.create_index(
+                    field_names,
+                    replace=replace,
+                    config=config,
+                    name=name,
+                )
            )
-        )
+        except (ValueError, RuntimeError) as e:
+            _maybe_add_fts_error_note(
+                e,
+                base_tokenizer=config.base_tokenizer,
+                language=config.language,
+            )
+            raise e

    @staticmethod
    def infer_tokenizer_configs(tokenizer_name: str) -> dict:
@@ -3799,7 +3890,18 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[
+                IvfFlat,
+                IvfPq,
+                IvfRq,
+                HnswPq,
+                HnswSq,
+                HnswFlat,
+                BTree,
+                Bitmap,
+                LabelList,
+                FTS,
+            ]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3846,6 +3948,7 @@ class AsyncTable:
                    IvfRq,
                    HnswPq,
                    HnswSq,
+                    HnswFlat,
                    BTree,
                    Bitmap,
                    LabelList,
@@ -3865,11 +3968,13 @@ class AsyncTable:
                name=name,
                train=train,
            )
-        except ValueError as e:
-            if "not support the requested language" in str(e):
-                supported_langs = ", ".join(lang_mapping.values())
-                help_msg = f"Supported languages: {supported_langs}"
-                add_note(e, help_msg)
+        except (ValueError, RuntimeError) as e:
+            if isinstance(config, FTS):
+                _maybe_add_fts_error_note(
+                    e,
+                    base_tokenizer=config.base_tokenizer,
+                    language=config.language,
+                )
            raise e

    async def drop_index(self, name: str) -> None:
@@ -5014,6 +5119,7 @@ class IndexStatistics:
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
+        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -24,6 +24,7 @@ VectorIndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_SQ",
    "IVF_HNSW_PQ",
+    "IVF_HNSW_FLAT",
    "IVF_RQ",
 ]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
@@ -31,6 +32,7 @@ IndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_PQ",
    "IVF_HNSW_SQ",
+    "IVF_HNSW_FLAT",
    "IVF_SQ",
    "FTS",
    "BTREE",
@@ -40,4 +42,5 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BuiltinTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = BuiltinTokenizerType | str
--- a/python/python/tests/models/jieba/default/dict.txt
+++ b/python/python/tests/models/jieba/default/dict.txt
@@ -0,0 +1,8 @@
+我们 98740 r
+都 202780 d
+有 423765 v
+光明 1219 n
+的 318825 uj
+前途 1263 n
+前 62779 f
+途 857 n
--- a/python/python/tests/models/lindera/ipadic/config.yml
+++ b/python/python/tests/models/lindera/ipadic/config.yml
@@ -0,0 +1,4 @@
+segmenter:
+  mode: "normal"
+  dictionary:
+    path: "./python/tests/models/lindera/ipadic/main"
--- a/python/python/tests/models/lindera/ipadic/main.zip
+++ b/python/python/tests/models/lindera/ipadic/main.zip
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -15,7 +15,10 @@
 #  limitations under the License.
 import os
 import random
+import shutil
 from unittest import mock
+from pathlib import Path
+import zipfile

 import lancedb as ldb
 from lancedb.db import DBConnection
@@ -36,6 +39,8 @@ import pytest
 import pytest_asyncio
 from utils import exception_output

+TEST_LANGUAGE_MODEL_HOME = Path(__file__).parent / "models"
+

@pytest.fixture
 def table(tmp_path) -> ldb.table.LanceTable:
@@ -89,6 +94,40 @@ def table(tmp_path) -> ldb.table.LanceTable:
    return table


+@pytest.fixture
+def language_model_home(monkeypatch, tmp_path):
+    model_home = tmp_path / "language-models"
+    shutil.copytree(TEST_LANGUAGE_MODEL_HOME, model_home)
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(model_home))
+    return model_home
+
+
+@pytest.fixture
+def lindera_ipadic(language_model_home):
+    model_path = language_model_home / "lindera" / "ipadic"
+    extracted_model = model_path / "main"
+    config_path = model_path / "config.yml"
+
+    if extracted_model.exists():
+        shutil.rmtree(extracted_model)
+
+    with zipfile.ZipFile(model_path / "main.zip", "r") as zip_ref:
+        zip_ref.extractall(model_path)
+    config_path.write_text(
+        "segmenter:\n"
+        '  mode: "normal"\n'
+        "  dictionary:\n"
+        f'    path: "{extracted_model.resolve().as_posix()}"\n',
+        encoding="utf-8",
+    )
+
+    try:
+        yield
+    finally:
+        if extracted_model.exists():
+            shutil.rmtree(extracted_model)
+
+
@pytest_asyncio.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
    # Use local random state to avoid affecting other tests
@@ -684,6 +723,90 @@ def test_fts_ngram(mem_db: DBConnection):
    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}


+def test_fts_jieba_tokenizer(mem_db: DBConnection, language_model_home):
+    data = pa.table({"text": ["我们都有光明的前途", "光明的前途"]})
+    table = mem_db.create_table("test_jieba", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="jieba/default",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("我们", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["我们都有光明的前途"]
+
+
+def test_fts_jieba_missing_language_model_note(
+    mem_db: DBConnection, monkeypatch, tmp_path
+):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    table = mem_db.create_table(
+        "test_missing_jieba_model",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        table.create_fts_index(
+            "text",
+            base_tokenizer="jieba/default",
+            stem=False,
+            remove_stop_words=False,
+            ascii_folding=False,
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+@pytest.mark.asyncio
+async def test_fts_jieba_missing_language_model_note_async(monkeypatch, tmp_path):
+    missing_root = tmp_path / "missing-language-models"
+    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
+    db = await ldb.connect_async(tmp_path / "async-db")
+    table = await db.create_table(
+        "test_missing_jieba_model_async",
+        data=pa.table({"text": ["我们都有光明的前途"]}),
+    )
+
+    with pytest.raises((ValueError, RuntimeError)) as e:
+        await table.create_index(
+            "text",
+            config=FTS(
+                base_tokenizer="jieba/default",
+                stem=False,
+                remove_stop_words=False,
+                ascii_folding=False,
+            ),
+        )
+
+    output = exception_output(e)
+    assert "Invalid directory path:" in output
+    assert "LANCE_LANGUAGE_MODEL_HOME" in output
+    assert "jieba/default" in output
+
+
+def test_fts_lindera_tokenizer(
+    mem_db: DBConnection, language_model_home, lindera_ipadic
+):
+    data = pa.table({"text": ["成田国際空港", "東京国際空港", "羽田空港"]})
+    table = mem_db.create_table("test_lindera", data=data)
+    table.create_fts_index(
+        "text",
+        base_tokenizer="lindera/ipadic",
+        stem=False,
+        remove_stop_words=False,
+        ascii_folding=False,
+    )
+
+    results = table.search("成田", query_type="fts").limit(10).to_list()
+    assert [row["text"] for row in results] == ["成田国際空港"]
+
+
 def test_fts_query_to_json():
    """Test that FTS query to_json() produces valid JSON strings with exact format."""

--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -16,11 +16,13 @@ from lancedb.index import (
    IvfSq,
    IvfHnswPq,
    IvfHnswSq,
+    IvfHnswFlat,
    IvfRq,
    Bitmap,
    LabelList,
    HnswPq,
    HnswSq,
+    HnswFlat,
    FTS,
 )
 from lancedb.table import IndexStatistics
@@ -250,6 +252,21 @@ async def test_create_hnswpq_alias_index(some_table: AsyncTable):
    assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}


+@pytest.mark.asyncio
+async def test_create_hnswflat_index(some_table: AsyncTable):
+    await some_table.create_index("vector", config=HnswFlat(num_partitions=10))
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+
+
+@pytest.mark.asyncio
+async def test_create_hnswflat_alias_index(some_table: AsyncTable):
+    await some_table.create_index("vector", config=IvfHnswFlat(num_partitions=5))
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type in {"HnswFlat", "IvfHnswFlat"}
+
+
@pytest.mark.asyncio
 async def test_create_ivfsq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=IvfSq(num_partitions=10))
@@ -295,6 +312,7 @@ def test_index_statistics_index_type_lists_all_supported_values():
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
+        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/tests/test_permutation.py
+++ b/python/python/tests/test_permutation.py
@@ -9,21 +9,6 @@ from lancedb import DBConnection, Table, connect
 from lancedb.permutation import Permutation, Permutations, permutation_builder


-def test_permutation_persistence(tmp_path):
-    db = connect(tmp_path)
-    tbl = db.create_table("test_table", pa.table({"x": range(100), "y": range(100)}))
-
-    permutation_tbl = (
-        permutation_builder(tbl).shuffle().persist(db, "test_permutation").execute()
-    )
-    assert permutation_tbl.count_rows() == 100
-
-    re_open = db.open_table("test_permutation")
-    assert re_open.count_rows() == 100
-
-    assert permutation_tbl.to_arrow() == re_open.to_arrow()
-
-
 def test_split_random_ratios(mem_db):
    """Test random splitting with ratios."""
    tbl = mem_db.create_table(
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -6,6 +6,8 @@ import contextlib
 from datetime import timedelta
 import http.server
 import json
+import multiprocessing as mp
+import sys
 import threading
 import time
 from unittest.mock import MagicMock, patch
@@ -1230,3 +1232,82 @@ def test_background_loop_cancellation(exception):
        with pytest.raises(exception):
            loop.run(None)
        mock_future.cancel.assert_called_once()
+
+
+def _remote_fork_child(port: int, queue) -> None:
+    # Build a fresh Connection in the child so we exercise the at-fork-child
+    # tokio runtime reset rather than relying on an inherited reqwest client.
+    db = lancedb.connect(
+        "db://dev",
+        api_key="fake",
+        host_override=f"http://localhost:{port}",
+        client_config={
+            "retry_config": {"retries": 0},
+            "timeout_config": {"connect_timeout": 2, "read_timeout": 2},
+        },
+    )
+    queue.put(db.table_names())
+
+
+@pytest.mark.skipif(
+    sys.platform != "linux",
+    reason=(
+        "fork() is unavailable on Windows and unsafe on macOS "
+        "(Apple frameworks/TLS are not fork-safe)"
+    ),
+)
+def test_remote_connection_after_fork():
+    """A freshly-built remote Connection in a forked child should not hang.
+
+    The pyo3-async-runtimes tokio runtime would otherwise be inherited from
+    the parent with dead worker threads; the at-fork-child handler in our
+    runtime module rebuilds it on first use in the child.
+    """
+
+    def handler(request):
+        request.send_response(200)
+        request.send_header("Content-Type", "application/json")
+        request.end_headers()
+        request.wfile.write(b'{"tables": []}')
+
+    server = http.server.HTTPServer(("localhost", 0), make_mock_http_handler(handler))
+    port = server.server_address[1]
+    server_thread = threading.Thread(target=server.serve_forever)
+    server_thread.start()
+    try:
+        # Hit the server in the parent first so the runtime + LOOP are warm
+        # before fork; a fresh child must still succeed.
+        parent_db = lancedb.connect(
+            "db://dev",
+            api_key="fake",
+            host_override=f"http://localhost:{port}",
+            client_config={
+                "retry_config": {"retries": 0},
+                "timeout_config": {"connect_timeout": 2, "read_timeout": 2},
+            },
+        )
+        assert parent_db.table_names() == []
+
+        ctx = mp.get_context("fork")
+        queue = ctx.Queue()
+        proc = ctx.Process(target=_remote_fork_child, args=(port, queue))
+        proc.start()
+        proc.join(timeout=15)
+
+        if proc.is_alive():
+            proc.terminate()
+            proc.join(timeout=5)
+            if proc.is_alive():
+                proc.kill()
+                proc.join()
+            pytest.fail("Remote connection hung after fork")
+
+        assert proc.exitcode == 0, f"child exited with code {proc.exitcode}"
+        assert not queue.empty(), "child produced no result"
+        assert queue.get() == []
+
+        # Parent connection must still be usable after the child returned.
+        assert parent_db.table_names() == []
+    finally:
+        server.shutdown()
+        server_thread.join()
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -11,7 +11,7 @@ from unittest.mock import patch

 import lancedb
 from lancedb.dependencies import _PANDAS_AVAILABLE
-from lancedb.index import HnswPq, HnswSq, IvfPq
+from lancedb.index import HnswFlat, HnswPq, HnswSq, IvfPq
 import numpy as np
 import polars as pl
 import pyarrow as pa
@@ -917,6 +917,21 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
        "my_vector", replace=True, config=expected_config, name=None, train=True
    )

+    table.create_index(
+        vector_column_name="my_vector",
+        metric="cosine",
+        index_type="IVF_HNSW_FLAT",
+        sample_rate=0.1,
+        m=29,
+        ef_construction=10,
+    )
+    expected_config = HnswFlat(
+        distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
+    )
+    mock_create_index.assert_called_with(
+        "my_vector", replace=True, config=expected_config, name=None, train=True
+    )
+

@patch("lancedb.table.AsyncTable.create_index")
 def test_create_index_name_and_train_parameters(
--- a/python/python/tests/test_torch.py
+++ b/python/python/tests/test_torch.py
@@ -1,14 +1,29 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

+import functools
+import multiprocessing as mp
+import pickle
+import sys
+
+import lancedb
 import pyarrow as pa
 import pytest
+from lancedb.permutation import Permutation, Permutations, permutation_builder
 from lancedb.util import tbl_to_tensor
-from lancedb.permutation import Permutation

 torch = pytest.importorskip("torch")


+def _open_native_table(uri: str, table_name: str):
+    """Top-level connection factory used by the explicit-factory pickle test.
+
+    Defined at module scope so that pickle can resolve it by name in the
+    worker / unpickling process.
+    """
+    return lancedb.connect(uri).open_table(table_name)
+
+
 def test_table_dataloader(mem_db):
    table = mem_db.create_table("test_table", pa.table({"a": range(1000)}))
    dataloader = torch.utils.data.DataLoader(
@@ -40,3 +55,156 @@ def test_permutation_dataloader(mem_db):
    for batch in dataloader:
        assert batch.size(0) == 1
        assert batch.size(1) == 10
+
+
+def test_permutation_is_picklable(tmp_db):
+    """A Permutation must be picklable so it can be used with PyTorch's
+    DataLoader when num_workers > 0 (which uses multiprocessing and pickles
+    the dataset to pass it to worker processes)."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(1000)}))
+    permutation = Permutation.identity(table)
+
+    pickled = pickle.dumps(permutation)
+    restored = pickle.loads(pickled)
+
+    assert len(restored) == 1000
+    rows = restored.__getitems__([0, 1, 2])
+    assert rows == [{"a": 0}, {"a": 1}, {"a": 2}]
+
+
+def test_permutation_with_memory_base_is_picklable(mem_db):
+    """An in-memory base table is inlined into the pickle as Arrow IPC bytes
+    and rebuilt on the other side as an in-memory LanceTable, so the
+    Permutation round-trips even though the original database can't be
+    reopened across processes."""
+    table = mem_db.create_table("test_table", pa.table({"a": range(50)}))
+    permutation = Permutation.identity(table)
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == 50
+    assert restored.__getitems__([0, 10, 49]) == [{"a": 0}, {"a": 10}, {"a": 49}]
+
+
+def test_permutation_dataloader_multiprocessing(tmp_db):
+    """Using a Permutation with a PyTorch DataLoader that has num_workers > 0
+    must work end-to-end. Each worker process gets a pickled copy of the
+    dataset and reads batches from it."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(1000)}))
+    permutation = Permutation.identity(table)
+
+    dataloader = torch.utils.data.DataLoader(
+        permutation,
+        batch_size=10,
+        shuffle=True,
+        num_workers=2,
+        multiprocessing_context="spawn",
+    )
+    seen = 0
+    for batch in dataloader:
+        assert batch["a"].size(0) == 10
+        seen += batch["a"].size(0)
+    assert seen == 1000
+
+
+def test_permutation_pickle_with_connection_factory(tmp_path):
+    """When the user provides a connection_factory, pickling should round-trip
+    through that factory rather than introspecting the connection URI. Useful
+    for remote / cloud connections where the URI alone isn't reopenable."""
+    db = lancedb.connect(tmp_path)
+    db.create_table("test_table", pa.table({"a": range(50)}))
+
+    factory = functools.partial(_open_native_table, str(tmp_path))
+    permutation = Permutation.identity(factory("test_table")).with_connection_factory(
+        factory
+    )
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == 50
+    # The factory survives pickling and is what powered base-table reopen.
+    assert restored.connection_factory is not None
+    assert restored.connection_factory.func is _open_native_table
+    assert restored.__getitems__([0, 1, 2]) == [{"a": 0}, {"a": 1}, {"a": 2}]
+
+
+def test_permutation_with_builder_is_picklable(tmp_db):
+    """A Permutation built from a non-identity permutation table must round-trip
+    through pickle while preserving the row order defined by the permutation."""
+    table = tmp_db.create_table("test_table", pa.table({"a": range(100)}))
+    perm_tbl = (
+        permutation_builder(table)
+        .split_random(ratios=[0.8, 0.2], seed=42, split_names=["train", "test"])
+        .shuffle(seed=42)
+        .execute()
+    )
+    permutations = Permutations(table, perm_tbl)
+    permutation = permutations["train"]
+
+    indices = list(range(len(permutation)))
+    expected = permutation.__getitems__(indices)
+
+    restored = pickle.loads(pickle.dumps(permutation))
+
+    assert len(restored) == len(permutation)
+    assert restored.__getitems__(indices) == expected
+
+
+def _multiworker_dataloader_target(db_uri: str, result_queue):
+    import lancedb
+    from lancedb.permutation import Permutation
+
+    db = lancedb.connect(db_uri)
+    table = db.open_table("test_table")
+    permutation = Permutation.identity(table)
+
+    dataloader = torch.utils.data.DataLoader(
+        permutation,
+        batch_size=10,
+        num_workers=2,
+        multiprocessing_context="fork",
+    )
+    count = 0
+    for batch in dataloader:
+        assert batch["a"].size(0) == 10
+        count += 1
+    result_queue.put(count)
+
+
+@pytest.mark.skipif(
+    sys.platform != "linux",
+    reason=(
+        "fork() is unavailable on Windows and unsafe on macOS "
+        "(Apple frameworks/TLS are not fork-safe)"
+    ),
+)
+def test_permutation_dataloader_fork_workers(tmp_path):
+    """A Permutation used by a fork-based DataLoader should not hang.
+
+    PyTorch's DataLoader uses fork-based multiprocessing by default on Linux.
+    LanceDB drives async work through a background asyncio thread that does
+    not survive a fork, so any LOOP.run() in a worker blocks forever.
+    """
+    import lancedb
+
+    db_uri = str(tmp_path / "db")
+    db = lancedb.connect(db_uri)
+    db.create_table("test_table", pa.table({"a": list(range(1000))}))
+
+    ctx = mp.get_context("spawn")
+    queue = ctx.Queue()
+    proc = ctx.Process(target=_multiworker_dataloader_target, args=(db_uri, queue))
+    proc.start()
+    proc.join(timeout=30)
+
+    if proc.is_alive():
+        proc.terminate()
+        proc.join(timeout=5)
+        if proc.is_alive():
+            proc.kill()
+            proc.join()
+        pytest.fail("Permutation hung when iterated in a fork-based DataLoader worker")
+
+    assert proc.exitcode == 0, f"child exited with code {proc.exitcode}"
+    assert not queue.empty(), "child produced no batches"
+    assert queue.get() == 100
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -3,6 +3,8 @@

 use std::sync::Arc;

+use crate::error::PythonErrorExt;
+use crate::runtime::future_into_py;
 use arrow::{
    datatypes::SchemaRef,
    pyarrow::{IntoPyArrow, ToPyArrow},
@@ -12,9 +14,6 @@ use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
    Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods,
 };
-use pyo3_async_runtimes::tokio::future_into_py;
-
-use crate::error::PythonErrorExt;

 #[pyclass]
 pub struct RecordBatchStream {
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -7,6 +7,12 @@ use std::{
    time::Duration,
 };

+use crate::{
+    error::PythonErrorExt,
+    namespace::{create_namespace_storage_options_provider, extract_namespace_arc},
+    runtime::future_into_py,
+    table::Table,
+};
 use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
 use lancedb::{
    connection::Connection as LanceConnection,
@@ -20,13 +26,6 @@ use pyo3::{
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
 };
-use pyo3_async_runtimes::tokio::future_into_py;
-
-use crate::{
-    error::PythonErrorExt,
-    namespace::{create_namespace_storage_options_provider, extract_namespace_arc},
-    table::Table,
-};

 #[pyclass]
 pub struct Connection {
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
+use lancedb::index::vector::{
+    IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
+    IvfPqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
+};
 use lancedb::index::{
    Index as LanceDbIndex,
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
-    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
 };
 use pyo3::IntoPyObject;
 use pyo3::types::PyStringMethods;
@@ -162,8 +164,26 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
            }
+            "HnswFlat" => {
+                let params = source.extract::<IvfHnswFlatParams>()?;
+                let distance_type = parse_distance_type(params.distance_type)?;
+                let mut hnsw_flat_builder = IvfHnswFlatIndexBuilder::default()
+                    .distance_type(distance_type)
+                    .max_iterations(params.max_iterations)
+                    .sample_rate(params.sample_rate)
+                    .num_edges(params.m)
+                    .ef_construction(params.ef_construction);
+                if let Some(num_partitions) = params.num_partitions {
+                    hnsw_flat_builder = hnsw_flat_builder.num_partitions(num_partitions);
+                }
+                if let Some(target_partition_size) = params.target_partition_size {
+                    hnsw_flat_builder =
+                        hnsw_flat_builder.target_partition_size(target_partition_size);
+                }
+                Ok(LanceDbIndex::IvfHnswFlat(hnsw_flat_builder))
+            }
            not_supported => Err(PyValueError::new_err(format!(
-                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
+                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, IvfHnswSq, or IvfHnswFlat",
                not_supported
            ))),
        }
@@ -250,6 +270,17 @@ struct IvfHnswSqParams {
    target_partition_size: Option<u32>,
 }

+#[derive(FromPyObject)]
+struct IvfHnswFlatParams {
+    distance_type: String,
+    num_partitions: Option<u32>,
+    max_iterations: u32,
+    sample_rate: u32,
+    m: u32,
+    ef_construction: u32,
+    target_partition_size: Option<u32>,
+}
+
 #[pyclass(get_all)]
 /// A description of an index currently configured on a column
 pub struct IndexConfig {
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -28,6 +28,7 @@ pub mod index;
 pub mod namespace;
 pub mod permutation;
 pub mod query;
+pub mod runtime;
 pub mod session;
 pub mod table;
 pub mod util;
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -4,7 +4,7 @@
 use std::sync::{Arc, Mutex};

 use crate::{
-    arrow::RecordBatchStream, connection::Connection, error::PythonErrorExt, table::Table,
+    arrow::RecordBatchStream, error::PythonErrorExt, runtime::future_into_py, table::Table,
 };
 use arrow::pyarrow::{PyArrowType, ToPyArrow};
 use lancedb::{
@@ -21,7 +21,6 @@ use pyo3::{
    pyclass, pymethods,
    types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
 };
-use pyo3_async_runtimes::tokio::future_into_py;

 fn table_from_py<'a>(table: Bound<'a, PyAny>) -> PyResult<Bound<'a, Table>> {
    if table.hasattr("_inner")? {
@@ -80,24 +79,6 @@ impl PyAsyncPermutationBuilder {

 #[pymethods]
 impl PyAsyncPermutationBuilder {
-    #[pyo3(signature = (database, table_name))]
-    pub fn persist(
-        slf: PyRefMut<'_, Self>,
-        database: Bound<'_, PyAny>,
-        table_name: String,
-    ) -> PyResult<Self> {
-        let conn = if database.hasattr("_conn")? {
-            database
-                .getattr("_conn")?
-                .getattr("_inner")?
-                .cast_into::<Connection>()?
-        } else {
-            database.getattr("_inner")?.cast_into::<Connection>()?
-        };
-        let database = conn.borrow().database()?;
-        slf.modify(|builder| builder.persist(database, table_name))
-    }
-
    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None, split_names=None))]
    pub fn split_random(
        slf: PyRefMut<'_, Self>,
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -4,6 +4,11 @@
 use std::sync::Arc;
 use std::time::Duration;

+use crate::expr::PyExpr;
+use crate::runtime::future_into_py;
+use crate::util::parse_distance_type;
+use crate::{arrow::RecordBatchStream, util::PyLanceDB};
+use crate::{error::PythonErrorExt, index::class_name};
 use arrow::array::Array;
 use arrow::array::ArrayData;
 use arrow::array::make_array;
@@ -36,12 +41,6 @@ use pyo3::types::{PyDict, PyString};
 use pyo3::{Borrowed, FromPyObject, exceptions::PyRuntimeError};
 use pyo3::{PyErr, pyclass};
 use pyo3::{exceptions::PyValueError, intern};
-use pyo3_async_runtimes::tokio::future_into_py;
-
-use crate::expr::PyExpr;
-use crate::util::parse_distance_type;
-use crate::{arrow::RecordBatchStream, util::PyLanceDB};
-use crate::{error::PythonErrorExt, index::class_name};

 impl<'a, 'py> FromPyObject<'a, 'py> for PyLanceDB<FtsQuery> {
    type Error = PyErr;
--- a/python/src/runtime.rs
+++ b/python/src/runtime.rs
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! Fork-safe wrapper around tokio + pyo3-async-runtimes.
+//!
+//! `pyo3_async_runtimes::tokio` keeps its multi-threaded runtime in a
+//! `OnceLock` that can never be replaced.  Tokio's worker threads do not
+//! survive `fork()`, so once a child inherits a "frozen" runtime, every
+//! `future_into_py` call hangs forever.
+//!
+//! We sidestep the global by routing every future through our own
+//! [`LanceRuntime`] (a [`pyo3_async_runtimes::generic::Runtime`] impl) backed
+//! by an [`AtomicPtr`] to a tokio runtime that we own.  A `pthread_atfork`
+//! child handler nulls the pointer; the next `spawn` rebuilds the runtime in
+//! the child.  This mirrors the pattern used in the Lance Python bindings.
+
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
+
+use pyo3::{Bound, PyAny, PyResult, Python, conversion::IntoPyObject};
+use pyo3_async_runtimes::{
+    TaskLocals,
+    generic::{ContextExt, JoinError, Runtime},
+};
+use tokio::{runtime, task};
+
+static RUNTIME: AtomicPtr<runtime::Runtime> = AtomicPtr::new(std::ptr::null_mut());
+static RUNTIME_INSTALLING: AtomicBool = AtomicBool::new(false);
+static ATFORK_INSTALLED: AtomicBool = AtomicBool::new(false);
+
+fn create_runtime() -> runtime::Runtime {
+    runtime::Builder::new_multi_thread()
+        .enable_all()
+        .thread_name("lancedb-tokio-worker")
+        .build()
+        .expect("Failed to build tokio runtime")
+}
+
+fn get_runtime() -> &'static runtime::Runtime {
+    loop {
+        let ptr = RUNTIME.load(Ordering::SeqCst);
+        if !ptr.is_null() {
+            return unsafe { &*ptr };
+        }
+        if !RUNTIME_INSTALLING.fetch_or(true, Ordering::SeqCst) {
+            break;
+        }
+        std::thread::yield_now();
+    }
+    if !ATFORK_INSTALLED.fetch_or(true, Ordering::SeqCst) {
+        install_atfork();
+    }
+    let new_ptr = Box::into_raw(Box::new(create_runtime()));
+    RUNTIME.store(new_ptr, Ordering::SeqCst);
+    unsafe { &*new_ptr }
+}
+
+/// Runs in async-signal context after `fork()` in the child.  We can only
+/// touch atomics here; we deliberately leak the previous runtime because
+/// dropping a tokio `Runtime` would try to join its (now-dead) worker
+/// threads and hang.
+extern "C" fn atfork_child() {
+    RUNTIME.store(std::ptr::null_mut(), Ordering::SeqCst);
+    RUNTIME_INSTALLING.store(false, Ordering::SeqCst);
+}
+
+#[cfg(not(windows))]
+fn install_atfork() {
+    unsafe { libc::pthread_atfork(None, None, Some(atfork_child)) };
+}
+
+#[cfg(windows)]
+fn install_atfork() {}
+
+/// Marker type implementing [`Runtime`] over our fork-safe runtime slot.
+pub struct LanceRuntime;
+
+/// Newtype wrapper around `tokio::task::JoinError` so we can implement the
+/// foreign [`JoinError`] trait without violating orphan rules.
+pub struct LanceJoinError(task::JoinError);
+
+impl JoinError for LanceJoinError {
+    fn is_panic(&self) -> bool {
+        self.0.is_panic()
+    }
+    fn into_panic(self) -> Box<dyn std::any::Any + Send + 'static> {
+        self.0.into_panic()
+    }
+}
+
+impl Runtime for LanceRuntime {
+    type JoinError = LanceJoinError;
+    type JoinHandle = Pin<Box<dyn Future<Output = Result<(), Self::JoinError>> + Send>>;
+
+    fn spawn<F>(fut: F) -> Self::JoinHandle
+    where
+        F: Future<Output = ()> + Send + 'static,
+    {
+        let handle = get_runtime().spawn(fut);
+        Box::pin(async move { handle.await.map_err(LanceJoinError) })
+    }
+
+    fn spawn_blocking<F>(f: F) -> Self::JoinHandle
+    where
+        F: FnOnce() + Send + 'static,
+    {
+        let handle = get_runtime().spawn_blocking(f);
+        Box::pin(async move { handle.await.map_err(LanceJoinError) })
+    }
+}
+
+tokio::task_local! {
+    static TASK_LOCALS: std::cell::OnceCell<TaskLocals>;
+}
+
+impl ContextExt for LanceRuntime {
+    fn scope<F, R>(locals: TaskLocals, fut: F) -> Pin<Box<dyn Future<Output = R> + Send>>
+    where
+        F: Future<Output = R> + Send + 'static,
+    {
+        let cell = std::cell::OnceCell::new();
+        cell.set(locals).unwrap();
+        Box::pin(TASK_LOCALS.scope(cell, fut))
+    }
+
+    fn get_task_locals() -> Option<TaskLocals> {
+        TASK_LOCALS
+            .try_with(|c| c.get().cloned())
+            .unwrap_or_default()
+    }
+}
+
+/// Drop-in replacement for `pyo3_async_runtimes::tokio::future_into_py` that
+/// uses our fork-safe runtime.
+pub fn future_into_py<F, T>(py: Python<'_>, fut: F) -> PyResult<Bound<'_, PyAny>>
+where
+    F: Future<Output = PyResult<T>> + Send + 'static,
+    T: for<'py> IntoPyObject<'py> + Send + 'static,
+{
+    pyo3_async_runtimes::generic::future_into_py::<LanceRuntime, _, T>(py, fut)
+}
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -2,6 +2,7 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use std::{collections::HashMap, sync::Arc};

+use crate::runtime::future_into_py;
 use crate::{
    connection::Connection,
    error::PythonErrorExt,
@@ -24,7 +25,6 @@ use pyo3::{
    pyclass, pymethods,
    types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
 };
-use pyo3_async_runtimes::tokio::future_into_py;

 mod scannable;

--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -40,7 +40,7 @@ lance-datafusion.workspace = true
 lance-datagen = { workspace = true }
 lance-file = { workspace = true }
 lance-io = { workspace = true }
-lance-index = { workspace = true }
+lance-index = { workspace = true, features = ["tokenizer-jieba", "tokenizer-lindera"] }
 lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
@@ -108,7 +108,12 @@ test-log = "0.2"

 [features]
 default = []
-aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
+aws = [
+    "lance/aws",
+    "lance-io/aws",
+    "lance-namespace-impls/dir-aws",
+    "object_store/aws",
+]
 oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
 gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
 azure = [
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -505,8 +505,15 @@ impl ListingDatabase {
                // Filter out the commit store query param -- it's a lancedb param
                url.query_pairs_mut().clear();
                url.query_pairs_mut().extend_pairs(filtered_querys);
-                // Take a copy of the query string so we can propagate it to lance
-                let query_string = url.query().map(|s| s.to_string());
+                // Take a copy of the query string so we can propagate it to lance.
+                // `query_pairs_mut()` leaves the URL with `Some("")` even when no
+                // pairs survive (or none existed in the first place), so an empty
+                // string here must be treated the same as "no query" — otherwise
+                // every table URI ends up with a trailing `?`, which makes downstream
+                // sub-paths (e.g. MemWAL gen paths) re-parse as path=<base table> +
+                // query=<sub-path>, causing Lance to find the base table dataset
+                // when looking up the sub-path.
+                let query_string = url.query().filter(|q| !q.is_empty()).map(|s| s.to_string());
                // clear the query string so we can use the url as the base uri
                // use .set_query(None) instead of .set_query("") because the latter
                // will add a trailing '?' to the url
@@ -715,7 +722,7 @@ impl ListingDatabase {
        let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params)).await?;
        for name in names {
            let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
-            let full_path = self.base_path.child(dir_name.clone());
+            let full_path = self.base_path.clone().join(dir_name.clone());

            commit_handler.delete(&full_path).await?;

@@ -842,6 +849,10 @@ impl ListingDatabase {
            write_params.mode = WriteMode::Overwrite;
        }

+        if request.write_options.skip_auto_cleanup {
+            write_params.skip_auto_cleanup = true;
+        }
+
        write_params.session = Some(self.session.clone());

        write_params
@@ -2027,6 +2038,7 @@ mod tests {
                }),
                ..Default::default()
            }),
+            ..Default::default()
        };

        let table = db
@@ -2100,6 +2112,7 @@ mod tests {
                }),
                ..Default::default()
            }),
+            ..Default::default()
        };

        let table = db
@@ -2213,6 +2226,133 @@ mod tests {
        assert_eq!(uri, expected);
    }

+    /// Regression: connecting via a URL-style URI (which goes through
+    /// `url::Url::parse` and the `query_pairs_mut()` path) must not
+    /// append a trailing `?` to per-table URIs when the input URI has
+    /// no query string.
+    ///
+    /// Earlier, `query_pairs_mut().clear()` left the URL with
+    /// `query=Some("")`, which then propagated as a trailing `?` on
+    /// every table URI. Sub-path lookups against that URI (e.g. MemWAL
+    /// `<table_uri>/_mem_wal/<shard>/<rand>_gen_<n>`) re-parsed as
+    /// `path=<base table>` + `query=/_mem_wal/...`, causing
+    /// `Dataset::write` to find the base table dataset and falsely
+    /// report `Dataset already exists`.
+    /// Mirrors the URL-mutation step from
+    /// [`ListingDatabase::connect_with_options`] so we can assert the
+    /// fix without going through filesystem setup (which is awkward
+    /// across platforms — see the `file://` test below).
+    fn capture_query_like_connect(input_uri: &str) -> Option<String> {
+        let mut url = url::Url::parse(input_uri).unwrap();
+        let mut filtered_querys = Vec::new();
+        for (key, value) in url.query_pairs() {
+            if key == ENGINE || key == MIRRORED_STORE {
+                continue;
+            }
+            filtered_querys.push((key.to_string(), value.to_string()));
+        }
+        url.query_pairs_mut().clear();
+        url.query_pairs_mut().extend_pairs(filtered_querys);
+        url.query().filter(|q| !q.is_empty()).map(|s| s.to_string())
+    }
+
+    #[test]
+    fn test_capture_query_treats_empty_as_none() {
+        // No query at all. With the bug, `query_pairs_mut()` left the
+        // URL with `query=Some("")` and we used to propagate that.
+        assert_eq!(
+            capture_query_like_connect("s3://bucket/prefix/"),
+            None,
+            "empty query after mutation must be treated as no query"
+        );
+
+        // Real query is propagated.
+        assert_eq!(
+            capture_query_like_connect("s3://bucket/prefix/?foo=bar"),
+            Some("foo=bar".to_string())
+        );
+
+        // lancedb-internal `engine=` is stripped; nothing remains, so
+        // query_string is None — not Some("").
+        assert_eq!(
+            capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem", ENGINE)),
+            None
+        );
+
+        // Mixed: drop `engine=`, keep the rest.
+        let captured =
+            capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem&foo=bar", ENGINE));
+        assert_eq!(captured.as_deref(), Some("foo=bar"));
+    }
+
+    /// Regression: connecting via a URL-style URI (which goes through
+    /// `url::Url::parse` and the `query_pairs_mut()` path) must not
+    /// append a trailing `?` to per-table URIs when the input URI has
+    /// no query string. Sub-path lookups against such a URI (e.g.
+    /// MemWAL `<table_uri>/_mem_wal/<shard>/<rand>_gen_<n>`) re-parse
+    /// as `path=<base table>` + `query=/_mem_wal/...`, causing
+    /// `Dataset::write` to find the base table dataset and falsely
+    /// report `Dataset already exists`.
+    ///
+    /// Skipped on Windows: `try_create_dir` does not understand
+    /// `file:///C:/…` paths so `connect_with_options` fails before
+    /// even reaching the URL-mutation logic. The pure URL-mutation
+    /// invariant is covered by
+    /// `test_capture_query_treats_empty_as_none` above, which runs
+    /// on all platforms.
+    #[cfg(not(windows))]
+    #[tokio::test]
+    async fn test_table_uri_url_path_has_no_trailing_question_mark() {
+        let tempdir = tempdir().unwrap();
+        let uri = format!("file://{}", tempdir.path().to_str().unwrap());
+
+        let request = ConnectRequest {
+            uri: uri.clone(),
+            #[cfg(feature = "remote")]
+            client_config: Default::default(),
+            options: Default::default(),
+            namespace_client_properties: Default::default(),
+            manifest_enabled: false,
+            read_consistency_interval: None,
+            session: None,
+        };
+        let db = ListingDatabase::connect_with_options(&request)
+            .await
+            .unwrap();
+
+        assert_eq!(
+            db.query_string, None,
+            "no input query → no captured query_string"
+        );
+
+        let table_uri = db.table_uri("test").unwrap();
+        assert!(
+            !table_uri.ends_with('?'),
+            "table_uri must not have a trailing `?`: {}",
+            table_uri
+        );
+        assert_eq!(table_uri, format!("{}/test.lance", uri));
+
+        // A real query string should still be propagated.
+        let with_query = format!("{}?foo=bar", uri);
+        let request_with_query = ConnectRequest {
+            uri: with_query,
+            #[cfg(feature = "remote")]
+            client_config: Default::default(),
+            options: Default::default(),
+            namespace_client_properties: Default::default(),
+            manifest_enabled: false,
+            read_consistency_interval: None,
+            session: None,
+        };
+        let db_with_query = ListingDatabase::connect_with_options(&request_with_query)
+            .await
+            .unwrap();
+        assert_eq!(db_with_query.query_string.as_deref(), Some("foo=bar"));
+        let table_uri = db_with_query.table_uri("test").unwrap();
+        assert_eq!(table_uri, format!("{}/test.lance?foo=bar", uri));
+    }
+
    #[tokio::test]
    async fn test_namespace_client() {
        let (_tempdir, db) = setup_database().await;
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -414,6 +414,10 @@ impl Database for LanceNamespaceDatabase {
            params.mode = WriteMode::Overwrite;
        }

+        if request.write_options.skip_auto_cleanup {
+            params.skip_auto_cleanup = true;
+        }
+
        // Set up storage options if provided
        if let Some(storage_opts) = initial_storage_options {
            let store_params = params
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -13,7 +13,10 @@ use crate::{DistanceType, Error, Result, table::BaseTable};

 use self::{
    scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
-    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfSqIndexBuilder},
+    vector::{
+        IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
+        IvfSqIndexBuilder,
+    },
 };

 pub mod scalar;
@@ -67,6 +70,10 @@ pub enum Index {
    /// IVF-HNSW index with Scalar Quantization
    /// It is a variant of the HNSW algorithm that uses scalar quantization to compress the vectors.
    IvfHnswSq(IvfHnswSqIndexBuilder),
+
+    /// IVF-HNSW index without quantization.
+    /// Stores raw vectors, providing the highest recall at the cost of more memory and disk space.
+    IvfHnswFlat(IvfHnswFlatIndexBuilder),
 }

 /// Builder for the create_index operation
@@ -290,6 +297,8 @@ pub enum IndexType {
    IvfHnswPq,
    #[serde(alias = "IVF_HNSW_SQ")]
    IvfHnswSq,
+    #[serde(alias = "IVF_HNSW_FLAT")]
+    IvfHnswFlat,
    // Scalar
    #[serde(alias = "BTREE")]
    BTree,
@@ -311,6 +320,7 @@ impl std::fmt::Display for IndexType {
            Self::IvfRq => write!(f, "IVF_RQ"),
            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
+            Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
            Self::BTree => write!(f, "BTREE"),
            Self::Bitmap => write!(f, "BITMAP"),
            Self::LabelList => write!(f, "LABEL_LIST"),
@@ -334,6 +344,7 @@ impl std::str::FromStr for IndexType {
            "IVF_RQ" => Ok(Self::IvfRq),
            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
+            "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
            _ => Err(Error::InvalidInput {
                message: format!("the input value {} is not a valid IndexType", value),
            }),
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -474,3 +474,46 @@ impl IvfHnswSqIndexBuilder {
    impl_ivf_params_setter!();
    impl_hnsw_params_setter!();
 }
+
+/// Builder for an IVF_HNSW_FLAT index.
+///
+/// This index combines IVF partitioning with an HNSW graph per partition,
+/// storing raw (unquantized) vectors. It offers the highest recall among
+/// the IVF_HNSW family at the cost of more memory and disk space compared
+/// to [`IvfHnswSqIndexBuilder`] or [`IvfHnswPqIndexBuilder`].
+#[derive(Debug, Clone, Serialize)]
+pub struct IvfHnswFlatIndexBuilder {
+    // IVF
+    #[serde(rename = "metric_type")]
+    pub(crate) distance_type: DistanceType,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) num_partitions: Option<u32>,
+    pub(crate) sample_rate: u32,
+    pub(crate) max_iterations: u32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) target_partition_size: Option<u32>,
+
+    // HNSW
+    pub(crate) m: u32,
+    pub(crate) ef_construction: u32,
+}
+
+impl Default for IvfHnswFlatIndexBuilder {
+    fn default() -> Self {
+        Self {
+            distance_type: DistanceType::L2,
+            num_partitions: None,
+            sample_rate: 256,
+            max_iterations: 50,
+            m: 20,
+            ef_construction: 300,
+            target_partition_size: None,
+        }
+    }
+}
+
+impl IvfHnswFlatIndexBuilder {
+    impl_distance_type_setter!();
+    impl_ivf_params_setter!();
+    impl_hnsw_params_setter!();
+}
--- a/rust/lancedb/src/io/object_store.rs
+++ b/rust/lancedb/src/io/object_store.rs
@@ -5,11 +5,12 @@

 use std::{fmt::Formatter, sync::Arc};

-use futures::{TryFutureExt, stream::BoxStream};
+use futures::{StreamExt, TryFutureExt, stream::BoxStream};
 use lance::io::WrappingObjectStore;
 use object_store::{
-    Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
-    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path,
+    CopyOptions, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    ObjectStore, ObjectStoreExt, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result,
+    UploadPart, path::Path,
 };

 use async_trait::async_trait;
@@ -93,20 +94,6 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.get_opts(location, options).await
    }

-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        self.primary.head(location).await
-    }
-
-    async fn delete(&self, location: &Path) -> Result<()> {
-        if !location.primary_only() {
-            match self.secondary.delete(location).await {
-                Err(Error::NotFound { .. }) | Ok(_) => {}
-                Err(e) => return Err(e),
-            }
-        }
-        self.primary.delete(location).await
-    }
-
    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
        self.primary.list(prefix)
    }
@@ -115,21 +102,40 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.list_with_delimiter(prefix).await
    }

-    async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
-        if to.primary_only() {
-            self.primary.copy(from, to).await
-        } else {
-            self.secondary.copy(from, to).await?;
-            self.primary.copy(from, to).await?;
-            Ok(())
-        }
+    fn delete_stream(
+        &self,
+        locations: BoxStream<'static, Result<Path>>,
+    ) -> BoxStream<'static, Result<Path>> {
+        let primary = self.primary.clone();
+        let secondary = self.secondary.clone();
+        locations
+            .map(move |location| {
+                let primary = primary.clone();
+                let secondary = secondary.clone();
+                async move {
+                    let location = location?;
+                    if !location.primary_only() {
+                        match secondary.delete(&location).await {
+                            Err(Error::NotFound { .. }) | Ok(_) => {}
+                            Err(e) => return Err(e),
+                        }
+                    }
+                    primary.delete(&location).await?;
+                    Ok(location)
+                }
+            })
+            .buffered(10)
+            .boxed()
    }

-    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
-        if !to.primary_only() {
-            self.secondary.copy(from, to).await?;
+    async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> {
+        if to.primary_only() {
+            self.primary.copy_opts(from, to, options).await
+        } else {
+            self.secondary.copy_opts(from, to, options.clone()).await?;
+            self.primary.copy_opts(from, to, options).await?;
+            Ok(())
        }
-        self.primary.copy_if_not_exists(from, to).await
    }
 }

@@ -228,6 +234,7 @@ mod test {
            .create_table("test", data)
            .write_options(WriteOptions {
                lance_write_params: Some(param),
+                ..Default::default()
            })
            .execute()
            .await;
--- a/rust/lancedb/src/io/object_store/io_tracking.rs
+++ b/rust/lancedb/src/io/object_store/io_tracking.rs
@@ -10,9 +10,9 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use lance::io::WrappingObjectStore;
 use object_store::{
-    GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
-    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
-    path::Path,
+    CopyOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    PutMultipartOptions, PutOptions, PutPayload, PutResult, RenameOptions, Result as OSResult,
+    UploadPart, path::Path,
 };

 #[derive(Debug, Default)]
@@ -81,11 +81,6 @@ impl IoTrackingStore {
 #[async_trait::async_trait]
 #[deny(clippy::missing_trait_methods)]
 impl ObjectStore for IoTrackingStore {
-    async fn put(&self, location: &Path, bytes: PutPayload) -> OSResult<PutResult> {
-        self.record_write(bytes.content_length() as u64);
-        self.target.put(location, bytes).await
-    }
-
    async fn put_opts(
        &self,
        location: &Path,
@@ -96,14 +91,6 @@ impl ObjectStore for IoTrackingStore {
        self.target.put_opts(location, bytes, opts).await
    }

-    async fn put_multipart(&self, location: &Path) -> OSResult<Box<dyn MultipartUpload>> {
-        let target = self.target.put_multipart(location).await?;
-        Ok(Box::new(IoTrackingMultipartUpload {
-            target,
-            stats: self.stats.clone(),
-        }))
-    }
-
    async fn put_multipart_opts(
        &self,
        location: &Path,
@@ -116,15 +103,6 @@ impl ObjectStore for IoTrackingStore {
        }))
    }

-    async fn get(&self, location: &Path) -> OSResult<GetResult> {
-        let result = self.target.get(location).await;
-        if let Ok(result) = &result {
-            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes);
-        }
-        result
-    }
-
    async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult<GetResult> {
        let result = self.target.get_opts(location, options).await;
        if let Ok(result) = &result {
@@ -134,14 +112,6 @@ impl ObjectStore for IoTrackingStore {
        result
    }

-    async fn get_range(&self, location: &Path, range: std::ops::Range<u64>) -> OSResult<Bytes> {
-        let result = self.target.get_range(location, range).await;
-        if let Ok(result) = &result {
-            self.record_read(result.len() as u64);
-        }
-        result
-    }
-
    async fn get_ranges(
        &self,
        location: &Path,
@@ -154,20 +124,11 @@ impl ObjectStore for IoTrackingStore {
        result
    }

-    async fn head(&self, location: &Path) -> OSResult<ObjectMeta> {
-        self.record_read(0);
-        self.target.head(location).await
-    }
-
-    async fn delete(&self, location: &Path) -> OSResult<()> {
+    fn delete_stream(
+        &self,
+        locations: BoxStream<'static, OSResult<Path>>,
+    ) -> BoxStream<'static, OSResult<Path>> {
        self.record_write(0);
-        self.target.delete(location).await
-    }
-
-    fn delete_stream<'a>(
-        &'a self,
-        locations: BoxStream<'a, OSResult<Path>>,
-    ) -> BoxStream<'a, OSResult<Path>> {
        self.target.delete_stream(locations)
    }

@@ -190,24 +151,14 @@ impl ObjectStore for IoTrackingStore {
        self.target.list_with_delimiter(prefix).await
    }

-    async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> {
+    async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> OSResult<()> {
        self.record_write(0);
-        self.target.copy(from, to).await
+        self.target.copy_opts(from, to, options).await
    }

-    async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> {
+    async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> OSResult<()> {
        self.record_write(0);
-        self.target.rename(from, to).await
-    }
-
-    async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
-        self.record_write(0);
-        self.target.rename_if_not_exists(from, to).await
-    }
-
-    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
-        self.record_write(0);
-        self.target.copy_if_not_exists(from, to).await
+        self.target.rename_opts(from, to, options).await
    }
 }

--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1540,6 +1540,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
            Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
            Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
+            Index::IvfHnswFlat(p) => ("IVF_HNSW_FLAT", Some(to_json(p)?)),
            Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
            Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
            Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
@@ -2068,7 +2069,8 @@ mod tests {
    use serde_json::json;

    use crate::index::vector::{
-        IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
+        IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder,
+        IvfSqIndexBuilder,
    };
    use crate::remote::JSON_CONTENT_TYPE;
    use crate::remote::db::DEFAULT_SERVER_VERSION;
@@ -3321,6 +3323,35 @@ mod tests {
                        .ef_construction(500),
                ),
            ),
+            (
+                "IVF_HNSW_FLAT",
+                json!({
+                    "metric_type": "l2",
+                    "sample_rate": 256,
+                    "max_iterations": 50,
+                    "m": 20,
+                    "ef_construction": 300,
+                }),
+                Index::IvfHnswFlat(Default::default()),
+            ),
+            (
+                "IVF_HNSW_FLAT",
+                json!({
+                    "metric_type": "cosine",
+                    "num_partitions": 64,
+                    "sample_rate": 256,
+                    "max_iterations": 50,
+                    "m": 40,
+                    "ef_construction": 500,
+                }),
+                Index::IvfHnswFlat(
+                    IvfHnswFlatIndexBuilder::default()
+                        .distance_type(DistanceType::Cosine)
+                        .num_partitions(64)
+                        .num_edges(40)
+                        .ef_construction(500),
+                ),
+            ),
            (
                "IVF_SQ",
                json!({
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -189,6 +189,18 @@ pub struct WriteOptions {
    // Coming soon: https://github.com/lancedb/lancedb/issues/992
    // /// What behavior to take if the data contains invalid vectors
    // pub on_bad_vectors: BadVectorHandling,
+    /// If true, skip the automatic cleanup of old dataset versions that would
+    /// otherwise run during the commit. This forwards to
+    /// [`WriteParams::skip_auto_cleanup`] in lance-core.
+    ///
+    /// Useful for high-frequency writers that want to manage version cleanup
+    /// themselves (e.g. via a periodic optimize job), or for writers that
+    /// lack delete permissions on the underlying storage.
+    ///
+    /// If `lance_write_params` is also set with `skip_auto_cleanup = true`,
+    /// the cleanup is skipped. Setting this field to `true` forces the flag
+    /// on regardless of `lance_write_params`.
+    pub skip_auto_cleanup: bool,
    /// Advanced parameters that can be used to customize table creation
    ///
    /// Overlapping `OpenTableBuilder` options (e.g. [AddDataBuilder::mode]) will take
@@ -2033,6 +2045,24 @@ impl NativeTable {
                );
                Ok(Box::new(lance_idx_params))
            }
+            Index::IvfHnswFlat(index) => {
+                Self::validate_index_type(field, "IVF HNSW FLAT", supported_vector_data_type)?;
+                let ivf_params = Self::build_ivf_params(
+                    index.num_partitions,
+                    index.target_partition_size,
+                    index.sample_rate,
+                    index.max_iterations,
+                );
+                let hnsw_params = HnswBuildParams::default()
+                    .num_edges(index.m as usize)
+                    .ef_construction(index.ef_construction as usize);
+                let lance_idx_params = VectorIndexParams::ivf_hnsw(
+                    index.distance_type.into(),
+                    ivf_params,
+                    hnsw_params,
+                );
+                Ok(Box::new(lance_idx_params))
+            }
        }
    }

@@ -2058,7 +2088,8 @@ impl NativeTable {
            | Index::IvfPq(_)
            | Index::IvfRq(_)
            | Index::IvfHnswPq(_)
-            | Index::IvfHnswSq(_) => IndexType::Vector,
+            | Index::IvfHnswSq(_)
+            | Index::IvfHnswFlat(_) => IndexType::Vector,
        }
    }

@@ -2264,7 +2295,8 @@ impl BaseTable for NativeTable {

        let output = add.into_plan(&table_schema, &table_def)?;

-        let lance_params = output
+        let skip_auto_cleanup = output.write_options.skip_auto_cleanup;
+        let mut lance_params = output
            .write_options
            .lance_write_params
            .unwrap_or(WriteParams {
@@ -2274,6 +2306,9 @@ impl BaseTable for NativeTable {
                },
                ..Default::default()
            });
+        if skip_auto_cleanup {
+            lance_params.skip_auto_cleanup = true;
+        }

        // Repartition for write parallelism if beneficial.
        let plan = if num_partitions > 1 {
@@ -3176,6 +3211,56 @@ mod tests {
        assert_eq!(stats.num_unindexed_rows, 0);
    }

+    #[tokio::test]
+    async fn test_create_index_ivf_hnsw_flat() {
+        use arrow_array::RecordBatch;
+        use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+        use rand;
+        use std::iter::repeat_with;
+
+        use crate::index::vector::IvfHnswFlatIndexBuilder;
+        use arrow_array::Float32Array;
+
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let conn = connect(uri).execute().await.unwrap();
+
+        let dimension = 16;
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "embeddings",
+            DataType::FixedSizeList(
+                Arc::new(Field::new("item", DataType::Float32, true)),
+                dimension,
+            ),
+            false,
+        )]));
+
+        let float_arr = Float32Array::from(
+            repeat_with(rand::random::<f32>)
+                .take(512 * dimension as usize)
+                .collect::<Vec<f32>>(),
+        );
+
+        let vectors = Arc::new(create_fixed_size_list(float_arr, dimension).unwrap());
+        let batch = RecordBatch::try_new(schema.clone(), vec![vectors.clone()]).unwrap();
+
+        let table = conn.create_table("test", batch).execute().await.unwrap();
+
+        let index = IvfHnswFlatIndexBuilder::default();
+        table
+            .create_index(&["embeddings"], Index::IvfHnswFlat(index))
+            .execute()
+            .await
+            .unwrap();
+
+        let index_configs = table.list_indices().await.unwrap();
+        assert_eq!(index_configs.len(), 1);
+        let index = index_configs.into_iter().next().unwrap();
+        assert_eq!(index.index_type, crate::index::IndexType::IvfHnswFlat);
+        assert_eq!(index.columns, vec!["embeddings".to_string()]);
+        assert_eq!(table.count_rows(None).await.unwrap(), 512);
+    }
+
    fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
        let list_type = DataType::FixedSizeList(
            Arc::new(Field::new("item", values.data_type().clone(), true)),
--- a/rust/lancedb/src/table/add_data.rs
+++ b/rust/lancedb/src/table/add_data.rs
@@ -441,6 +441,7 @@ mod tests {
            .add(new_batch.clone())
            .write_options(WriteOptions {
                lance_write_params: Some(param),
+                ..Default::default()
            })
            .mode(AddDataMode::Append)
            .execute()
@@ -761,4 +762,56 @@ mod tests {
        table2.add(struct_batch).execute().await.unwrap();
        assert_eq!(table2.count_rows(None).await.unwrap(), 2);
    }
+
+    #[tokio::test]
+    async fn test_add_skip_auto_cleanup() {
+        // Verifies WriteOptions::skip_auto_cleanup is forwarded to lance-core's
+        // WriteParams and actually suppresses the cleanup hook on commit.
+        let tmp_dir = tempfile::tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let conn = connect(uri).execute().await.unwrap();
+
+        let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
+        let table = conn.create_table("t", batch).execute().await.unwrap();
+        // Cleanup on every commit, with `older_than = 0s` so prior versions are
+        // immediately eligible.
+        table
+            .as_native()
+            .unwrap()
+            .update_config(vec![
+                ("lance.auto_cleanup.interval".to_string(), "1".to_string()),
+                (
+                    "lance.auto_cleanup.older_than".to_string(),
+                    "0s".to_string(),
+                ),
+            ])
+            .await
+            .unwrap();
+
+        // Write several versions with skip_auto_cleanup; none should be removed.
+        for i in 0..3 {
+            let new_batch = record_batch!(("id", Int64, [10 + i])).unwrap();
+            table
+                .add(new_batch)
+                .write_options(WriteOptions {
+                    skip_auto_cleanup: true,
+                    ..Default::default()
+                })
+                .execute()
+                .await
+                .unwrap();
+        }
+        let versions_before = table.list_versions().await.unwrap().len();
+
+        // Now write one more without the flag; cleanup should run and prune.
+        let new_batch = record_batch!(("id", Int64, [42])).unwrap();
+        table.add(new_batch).execute().await.unwrap();
+        let versions_after = table.list_versions().await.unwrap().len();
+
+        assert!(
+            versions_after < versions_before,
+            "auto-cleanup should have removed old versions once the skip flag was off \
+             (before={versions_before}, after={versions_after})"
+        );
+    }
 }
--- a/rust/lancedb/src/table/datafusion/insert.rs
+++ b/rust/lancedb/src/table/datafusion/insert.rs
@@ -219,6 +219,7 @@ impl ExecutionPlan for InsertExec {
                && let Some(merged_txn) = merge_transactions(transactions)
            {
                let new_dataset = CommitBuilder::new(dataset.clone())
+                    .with_skip_auto_cleanup(write_params.skip_auto_cleanup)
                    .execute(merged_txn)
                    .await?;
                ds_wrapper.update(new_dataset);
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -528,6 +528,7 @@ mod tests {
                    }),
                    ..Default::default()
                }),
+                ..Default::default()
            })
            .execute()
            .await
@@ -589,6 +590,7 @@ mod tests {
                    }),
                    ..Default::default()
                }),
+                ..Default::default()
            })
            .execute()
            .await
--- a/rust/lancedb/src/table/merge.rs
+++ b/rust/lancedb/src/table/merge.rs
@@ -55,6 +55,7 @@ pub struct MergeInsertBuilder {
    pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
    pub(crate) timeout: Option<Duration>,
    pub(crate) use_index: bool,
+    pub(crate) skip_auto_cleanup: bool,
 }

 impl MergeInsertBuilder {
@@ -69,6 +70,7 @@ impl MergeInsertBuilder {
            when_not_matched_by_source_delete_filt: None,
            timeout: None,
            use_index: true,
+            skip_auto_cleanup: false,
        }
    }

@@ -148,6 +150,17 @@ impl MergeInsertBuilder {
        self
    }

+    /// Skip the automatic cleanup of old dataset versions that would otherwise
+    /// run during the merge insert commit.
+    ///
+    /// This forwards to [`lance::dataset::MergeInsertBuilder::skip_auto_cleanup`]
+    /// in lance-core. Useful for high-frequency writers that want to manage
+    /// version cleanup themselves, or writers without delete permissions.
+    pub fn skip_auto_cleanup(&mut self, skip: bool) -> &mut Self {
+        self.skip_auto_cleanup = skip;
+        self
+    }
+
    /// Executes the merge insert operation
    ///
    /// Returns version and statistics about the merge operation including the number of rows
@@ -191,6 +204,9 @@ pub(crate) async fn execute_merge_insert(
        builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
    }
    builder.use_index(params.use_index);
+    if params.skip_auto_cleanup {
+        builder.skip_auto_cleanup(true);
+    }

    let future = if let Some(timeout) = params.timeout {
        let future = builder