chore: update lance dependency to v7.0.0-beta.5

2026-06-03 04:10:41 +00:00 · 2026-05-06 17:59:54 +00:00
51 changed files with 1348 additions and 1627 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.29.0"
+current_version = "0.28.0-beta.11"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,7 @@ jobs:
          server-username: SONATYPE_USER
          server-password: SONATYPE_TOKEN
          gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
-          gpg-passphrase: MAVEN_GPG_PASSPHRASE
+          gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
      - name: Set git config
        run: |
          git config --global user.email "dev+gha@lancedb.com"
@@ -58,11 +58,10 @@ jobs:
          echo "use-agent" >> ~/.gnupg/gpg.conf
          echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
          export GPG_TTY=$(tty)
-          ./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
+          ./mvnw --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -pl lancedb-core -am -P deploy-to-ossrh
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
-          MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}

  report-failure:
    name: Report Workflow Failure
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -16,7 +16,6 @@ on:
  push:
    branches:
      - main
-      - release/**
    paths:
      - java/**
      - .github/workflows/java.yml
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -3,7 +3,6 @@ on:
  push:
    branches:
      - main
-      - release/**
  pull_request:
    paths:
      - rust/**
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -4,7 +4,6 @@ on:
  push:
    branches:
      - main
-      - release/**
  pull_request:
    paths:
      - Cargo.toml
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -4,7 +4,6 @@ on:
  push:
    branches:
      - main
-      - release/**
  pull_request:
    paths:
      - Cargo.toml
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -4,7 +4,6 @@ on:
  push:
    branches:
      - main
-      - release/**
  pull_request:
    paths:
      - Cargo.toml
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=6.0.1", default-features = false }
-lance-core = "=6.0.1"
-lance-datagen = "=6.0.1"
-lance-file = "=6.0.1"
-lance-io = { "version" = "=6.0.1", default-features = false }
-lance-index = "=6.0.1"
-lance-linalg = "=6.0.1"
-lance-namespace = "=6.0.1"
-lance-namespace-impls = { "version" = "=6.0.1", default-features = false }
-lance-table = "=6.0.1"
-lance-testing = "=6.0.1"
-lance-datafusion = "=6.0.1"
-lance-encoding = "=6.0.1"
-lance-arrow = "=6.0.1"
+lance = { "version" = "=7.0.0-beta.5", default-features = false, "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=7.0.0-beta.5", default-features = false, "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=7.0.0-beta.5", default-features = false, "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=7.0.0-beta.5", "tag" = "v7.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
@@ -54,7 +54,7 @@ half = { "version" = "2.7.1", default-features = false, features = [
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.13.2"
 pin-project = "1.0.7"
 rand = "0.9"
 snafu = "0.8"
--- a/deny.toml
+++ b/deny.toml
@@ -51,18 +51,6 @@ ignore = [
    # https://rustsec.org/advisories/RUSTSEC-2024-0436
    { id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },

-    # encoding: unmaintained. Reached through lindera-dictionary, which is
-    # required by the native Lindera tokenizer path. Lindera has not migrated
-    # off this crate yet.
-    # https://rustsec.org/advisories/RUSTSEC-2021-0153
-    { id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
-
-    # fast-float: unsound and unmaintained. Reached only through polars-arrow
-    # from the optional Polars integration; replacement requires a Polars
-    # dependency upgrade.
-    # https://rustsec.org/advisories/RUSTSEC-2024-0379
-    { id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
-
    # tantivy: segfault on malformed input due to missing bounds check.
    # Pulled in via lance for full-text search. We only feed tantivy
    # documents we construct ourselves, not attacker-controlled bytes.
@@ -80,24 +68,11 @@ ignore = [
    # https://rustsec.org/advisories/RUSTSEC-2025-0119
    { id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },

-    # rustls-pemfile: unmaintained (repo archived; code folded into
-    # rustls-pki-types). Reached only transitively via object_store → lance.
-    # No safe upgrade is available; clearing this requires object_store to
-    # migrate to the rustls-pki-types PemObject API.
+    # rustls-pemfile: unmaintained. Reached from two separate chains:
+    # rustls-native-certs 0.6 (via hyper-rustls 0.24) and object_store 0.12.
+    # Both upstream dependencies need to move before we can drop it.
    # https://rustsec.org/advisories/RUSTSEC-2025-0134
-    { id = "RUSTSEC-2025-0134", reason = "transitive via object_store/lance; waiting on object_store migration to rustls-pki-types" },
-
-    # bincode: unmaintained. Reached through lindera and lindera-dictionary,
-    # which are required by the native Lindera tokenizer path. Lindera has not
-    # migrated to another serialization format yet.
-    # https://rustsec.org/advisories/RUSTSEC-2025-0141
-    { id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
-
-    # lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
-    # LanceDB's dev-dependency graph; LanceDB does not use that iterator
-    # directly. Clearing this requires the AWS SDK chain to update lru.
-    # https://rustsec.org/advisories/RUSTSEC-2026-0002
-    { id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },
+    { id = "RUSTSEC-2025-0134", reason = "transitive via rustls-native-certs/object_store; waiting on upstream migration" },

    # rustls-webpki 0.101.7 (old major line): name-constraint checks for
    # URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
@@ -114,12 +89,6 @@ ignore = [
    # we actively use is upgraded to 0.103.13 which contains the fix.
    # https://rustsec.org/advisories/RUSTSEC-2026-0104
    { id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
-
-    # rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
-    # logger. Reached through several transitive chains. LanceDB does not use
-    # rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
-    # https://rustsec.org/advisories/RUSTSEC-2026-0097
-    { id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
 ]

 # ---------------------------------------------------------------------------
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.29.0</version>
+    <version>0.28.0-beta.11</version>
 </dependency>
 ```

--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -501,34 +501,6 @@ Modeled after ``VACUUM`` in PostgreSQL.

 ***

-### prewarmData()
-
-```ts
-abstract prewarmData(columns?): Promise<void>
-```
-
-Prewarm one or more columns of data in the table.
-
-#### Parameters
-
-* **columns?**: `string`[]
-    The columns to prewarm. If undefined, all columns are prewarmed.
-    This will load the column data into the page cache so that future queries that
-    read those columns avoid the initial cold-start latency.  This call initiates
-    prewarming and returns once the request is accepted; the warming itself may
-    continue in the background.  Calling it on already-prewarmed columns is a
-    no-op on the server.
-    Prewarming is generally useful for columns used in filters or projections.
-    Large columns (e.g. high-dimensional vectors or binary data) may not be
-    practical to prewarm.
-    This feature is currently only supported on remote tables.
-
-#### Returns
-
-`Promise`&lt;`void`&gt;
-
-***
-
 ### prewarmIndex()

 ```ts
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -94,11 +94,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and

 ## Full text search

-Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
-[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
-asynchronous API.
+::: lancedb.fts.create_index

-::: lancedb.index.FTS
+::: lancedb.fts.populate_index
+
+::: lancedb.fts.search_index

 ## Utilities

--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.29.0-final.0</version>
+      <version>0.28.0-beta.11</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.29.0-final.0</version>
+    <version>0.28.0-beta.11</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>6.0.0</lance-core.version>
+        <lance-core.version>7.0.0-beta.5</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.29.0"
+version = "0.28.0-beta.11"
 publish = false
 license.workspace = true
 description.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1870,25 +1870,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(results.length).toBe(3);
    });

-    test("prewarmData errors on local tables", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "alpha", vector: [0.1, 0.2, 0.3] },
-        { text: "beta", vector: [0.4, 0.5, 0.6] },
-      ];
-      const table = await db.createTable("prewarm_data_test", data);
-
-      // prewarmData is only supported on remote tables. We verify the call
-      // is wired through napi and surfaces the expected error for both
-      // arg shapes (undefined and string[]).
-      await expect(table.prewarmData()).rejects.toThrow(
-        "prewarm_data is currently only supported on remote tables",
-      );
-      await expect(table.prewarmData(["text"])).rejects.toThrow(
-        "prewarm_data is currently only supported on remote tables",
-      );
-    });
-
    test("full text index on list", async () => {
      const db = await connect(tmpDir.name);
      const data = [
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -285,25 +285,6 @@ export abstract class Table {
   */
  abstract prewarmIndex(name: string): Promise<void>;

-  /**
-   * Prewarm one or more columns of data in the table.
-   *
-   * @param columns The columns to prewarm. If undefined, all columns are prewarmed.
-   *
-   * This will load the column data into the page cache so that future queries that
-   * read those columns avoid the initial cold-start latency.  This call initiates
-   * prewarming and returns once the request is accepted; the warming itself may
-   * continue in the background.  Calling it on already-prewarmed columns is a
-   * no-op on the server.
-   *
-   * Prewarming is generally useful for columns used in filters or projections.
-   * Large columns (e.g. high-dimensional vectors or binary data) may not be
-   * practical to prewarm.
-   *
-   * This feature is currently only supported on remote tables.
-   */
-  abstract prewarmData(columns?: string[]): Promise<void>;
-
  /**
   * Waits for asynchronous indexing to complete on the table.
   *
@@ -729,10 +710,6 @@ export class LocalTable extends Table {
    await this.inner.prewarmIndex(name);
  }

-  async prewarmData(columns?: string[]): Promise<void> {
-    await this.inner.prewarmData(columns);
-  }
-
  async waitForIndex(
    indexNames: string[],
    timeoutSeconds: number,
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.29.0",
+  "version": "0.28.0-beta.11",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.29.0",
+	"version": "0.28.0-beta.11",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.29.0",
+  "version": "0.28.0-beta.11",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.29.0",
+      "version": "0.28.0-beta.11",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.29.0",
+  "version": "0.28.0-beta.11",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -159,14 +159,6 @@ impl Table {
            .default_error()
    }

-    #[napi(catch_unwind)]
-    pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> napi::Result<()> {
-        self.inner_ref()?
-            .prewarm_data(columns)
-            .await
-            .default_error()
-    }
-
    #[napi(catch_unwind)]
    pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
        let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.32.0"
+current_version = "0.31.0-beta.11"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.32.0"
+version = "0.31.0-beta.11"
 publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 pylance = [
-    "pylance>=6.0.0",
+    "pylance>=5.0.0b5",
 ]
 tests = [
    "aiohttp>=3.9.0",
@@ -58,7 +58,7 @@ tests = [
    "pytz>=2023.3",
    "polars>=0.19, <=1.3.0",
    "pyarrow-stubs>=16.0",
-    "pylance>=6.0.0",
+    "pylance>=5.0.0b5",
    "requests>=2.31.0",
    "datafusion>=52,<53",
 ]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -12,7 +12,6 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
-    HnswFlat,
    FTS,
 )
 from lance_namespace import (
@@ -26,7 +25,6 @@ from .remote import ClientConfig

 IvfHnswPq: type[HnswPq] = HnswPq
 IvfHnswSq: type[HnswSq] = HnswSq
-IvfHnswFlat: type[HnswFlat] = HnswFlat

 class PyExpr:
    """A type-safe DataFusion expression node (Rust-side handle)."""
@@ -182,7 +180,6 @@ class Table:
            IvfPq,
            HnswPq,
            HnswSq,
-            HnswFlat,
            BTree,
            Bitmap,
            LabelList,
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -7,7 +7,6 @@ from typing import Literal, Optional
 from ._lancedb import (
    IndexConfig,
 )
-from .types import BaseTokenizerType

 lang_mapping = {
    "ar": "Arabic",
@@ -112,12 +111,8 @@ class FTS:
        - "simple": Splits text by whitespace and punctuation.
        - "whitespace": Split text by whitespace, but not punctuation.
        - "raw": No tokenization. The entire text is treated as a single token.
-        - "ngram": N-gram tokenizer for substring-style matching.
-        - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
-        - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
    language : str, default "English"
-        The language to use for stemming and stop-word removal. This is not the
-        primary way to enable CJK tokenization.
+        The language to use for tokenization.
    max_token_length : int, default 40
        The maximum token length to index. Tokens longer than this length will be
        ignored.
@@ -132,17 +127,10 @@ class FTS:
    ascii_folding : bool, default True
        Whether to fold ASCII characters. This converts accented characters to
        their ASCII equivalent. For example, "café" would be converted to "cafe".
-
-    Notes
-    -----
-    Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
-    require tokenizer models in Lance's language model home. Set
-    ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
-    directory under ``lance/language_models``.
    """

    with_position: bool = False
-    base_tokenizer: BaseTokenizerType = "simple"
+    base_tokenizer: Literal["simple", "raw", "whitespace"] = "simple"
    language: str = "English"
    max_token_length: Optional[int] = 40
    lower_case: bool = True
@@ -388,98 +376,9 @@ class HnswSq:
    target_partition_size: Optional[int] = None


-@dataclass
-class HnswFlat:
-    """Describe a HNSW-FLAT index configuration.
-
-    HNSW-FLAT stands for Hierarchical Navigable Small World without quantization.
-    It stores raw vectors in the HNSW graph, providing the highest recall among
-    the IVF_HNSW family at the cost of more memory and disk space compared to
-    :class:`HnswSq` or :class:`HnswPq`.
-
-    Parameters
-    ----------
-
-    distance_type: str, default "l2"
-
-        The distance metric used to train the index.
-
-        The following distance types are available:
-
-        "l2" - Euclidean distance. This is a very common distance metric that
-        accounts for both magnitude and direction when determining the distance
-        between vectors. l2 distance has a range of [0, ∞).
-
-        "cosine" - Cosine distance.  Cosine distance is a distance metric
-        calculated from the cosine similarity between two vectors. Cosine
-        similarity is a measure of similarity between two non-zero vectors of an
-        inner product space. It is defined to equal the cosine of the angle
-        between them.  Unlike l2, the cosine distance is not affected by the
-        magnitude of the vectors.  Cosine distance has a range of [0, 2].
-
-        "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
-        distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-        l2 norm is 1), then dot distance is equivalent to the cosine distance.
-
-    num_partitions, default sqrt(num_rows)
-
-        The number of IVF partitions to create.
-
-        For HNSW, we recommend a small number of partitions. Setting this to 1
-        works well for most tables. For very large tables, training just one HNSW
-        graph will require too much memory. Each partition becomes its own HNSW
-        graph, so setting this value higher reduces the peak memory use of
-        training.
-
-    max_iterations, default 50
-
-        Max iterations to train kmeans.
-
-        When training an IVF index we use kmeans to calculate the partitions.
-        This parameter controls how many iterations of kmeans to run.
-
-    sample_rate, default 256
-
-        The rate used to calculate the number of training vectors for kmeans.
-
-    m, default 20
-
-        The number of neighbors to select for each vector in the HNSW graph.
-
-        This value controls the tradeoff between search speed and accuracy.
-        The higher the value the more accurate the search but the slower it
-        will be.
-
-    ef_construction, default 300
-
-        The number of candidates to evaluate during the construction of the HNSW
-        graph.
-
-        This value controls the tradeoff between build speed and accuracy.
-        The higher the value the more accurate the build but the slower it will
-        be.  150 to 300 is the typical range. 100 is a minimum for good quality
-        search results. In most cases, there is no benefit to setting this higher
-        than 500.  This value should be set to a value that is not less than `ef`
-        in the search phase.
-
-    target_partition_size, default is 1,048,576
-
-        The target size of each partition.
-    """
-
-    distance_type: Literal["l2", "cosine", "dot"] = "l2"
-    num_partitions: Optional[int] = None
-    max_iterations: int = 50
-    sample_rate: int = 256
-    m: int = 20
-    ef_construction: int = 300
-    target_partition_size: Optional[int] = None
-
-
 # Backwards-compatible aliases
 IvfHnswPq = HnswPq
 IvfHnswSq = HnswSq
-IvfHnswFlat = HnswFlat


@dataclass
@@ -799,13 +698,11 @@ __all__ = [
    "IvfPq",
    "IvfHnswPq",
    "IvfHnswSq",
-    "IvfHnswFlat",
    "IvfSq",
    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
-    "HnswFlat",
    "IndexConfig",
    "FTS",
    "Bitmap",
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -22,7 +22,6 @@ from lancedb.index import (
    FTS,
    BTree,
    Bitmap,
-    HnswFlat,
    HnswSq,
    IvfFlat,
    IvfPq,
@@ -40,7 +39,6 @@ from lancedb.table import _normalize_progress

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
 from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
-from ..types import BaseTokenizerType


 class RemoteTable(Table):
@@ -169,7 +167,7 @@ class RemoteTable(Table):
        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
-        base_tokenizer: BaseTokenizerType = "simple",
+        base_tokenizer: str = "simple",
        language: str = "English",
        max_token_length: Optional[int] = 40,
        lower_case: bool = True,
@@ -286,15 +284,13 @@ class RemoteTable(Table):
            )
        elif index_type == "IVF_HNSW_SQ":
            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
-        elif index_type == "IVF_HNSW_FLAT":
-            config = HnswFlat(distance_type=metric, num_partitions=num_partitions)
        elif index_type == "IVF_FLAT":
            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
                " 'IVF_FLAT', 'IVF_PQ', 'IVF_RQ', 'IVF_SQ',"
-                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ', 'IVF_HNSW_FLAT'"
+                " 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
            )

        LOOP.run(
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -57,7 +57,6 @@ from .index import (
    LabelList,
    HnswPq,
    HnswSq,
-    HnswFlat,
    FTS,
 )
 from .merge import LanceMergeInsertBuilder
@@ -87,59 +86,6 @@ from .util import (
 )
 from .index import lang_mapping

-_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
-_MODEL_BACKED_TOKENIZER_ERRORS = (
-    "unknown base tokenizer",
-    "Invalid directory path:",
-    "Failed to load Jieba",
-    "Failed to load tokenizer config",
-    "Failed to initialize default tokenizer",
-)
-
-
-def _add_unique_note(exception: BaseException, note: str) -> None:
-    existing_notes = getattr(exception, "__notes__", ()) or ()
-    message = (
-        exception.args[0]
-        if exception.args and isinstance(exception.args[0], str)
-        else ""
-    )
-    if note not in existing_notes and note not in message:
-        add_note(exception, note)
-
-
-def _is_model_backed_tokenizer(base_tokenizer: str) -> bool:
-    return any(
-        base_tokenizer == prefix or base_tokenizer.startswith(f"{prefix}/")
-        for prefix in _MODEL_BACKED_TOKENIZER_PREFIXES
-    )
-
-
-def _maybe_add_fts_error_note(
-    exception: BaseException, *, base_tokenizer: str, language: Optional[str] = None
-) -> None:
-    message = str(exception)
-    if language is not None and "not support the requested language" in message:
-        supported_langs = ", ".join(lang_mapping.values())
-        _add_unique_note(exception, f"Supported languages: {supported_langs}")
-        return
-
-    if not _is_model_backed_tokenizer(base_tokenizer):
-        return
-
-    if not any(marker in message for marker in _MODEL_BACKED_TOKENIZER_ERRORS):
-        return
-
-    _add_unique_note(
-        exception,
-        "Model-backed tokenizers such as 'jieba/default' and 'lindera/ipadic' "
-        "require tokenizer models in Lance's language model home. Set "
-        "LANCE_LANGUAGE_MODEL_HOME to override the default platform data "
-        "directory under 'lance/language_models'. Expected layouts include "
-        "'<model-home>/jieba/default/...' and "
-        "'<model-home>/lindera/ipadic/...'.",
-    )
-

 if TYPE_CHECKING:
    from .db import LanceDBConnection
@@ -1012,10 +958,7 @@ class Table(ABC):
        tokenizer_name: str, default "default"
            A compatibility alias for native tokenizer configs. Can be "raw",
            "default" or the 2 letter language code followed by "_stem". So
-            for english it would be "en_stem". For new native FTS indexes, use
-            ``base_tokenizer`` directly; ``tokenizer_name`` is a legacy
-            compatibility alias and does not expose model-backed tokenizer names
-            such as ``jieba/default`` or ``lindera/ipadic``.
+            for english it would be "en_stem".
        use_tantivy: bool, default False
            Deprecated legacy Tantivy parameter. Setting this to True raises an
            error.
@@ -1029,11 +972,8 @@ class Table(ABC):
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
            - "ngram": N-Gram tokenizer.
-            - "jieba/*": Jieba tokenizer loaded from Lance's language model home.
-            - "lindera/*": Lindera tokenizer loaded from Lance's language model home.
        language : str, default "English"
-            The language to use for stemming and stop-word removal. This is not
-            the primary way to enable CJK tokenization.
+            The language to use for tokenization.
        max_token_length : int, default 40
            The maximum token length to index. Tokens longer than this length will be
            ignored.
@@ -1059,13 +999,6 @@ class Table(ABC):
            The timeout to wait if indexing is asynchronous.
        name: str, optional
            The name of the index. If not provided, a default name will be generated.
-
-        Notes
-        -----
-        Model-backed tokenizers such as ``jieba/default`` and ``lindera/ipadic``
-        require tokenizer models in Lance's language model home. Set
-        ``LANCE_LANGUAGE_MODEL_HOME`` to override the default platform data
-        directory under ``lance/language_models``.
        """
        raise NotImplementedError

@@ -2237,13 +2170,7 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT",
-            "IVF_SQ",
-            "IVF_PQ",
-            "IVF_RQ",
-            "IVF_HNSW_SQ",
-            "IVF_HNSW_PQ",
-            "IVF_HNSW_FLAT",
+            "IVF_FLAT", "IVF_SQ", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2330,16 +2257,6 @@ class LanceTable(Table):
                ef_construction=ef_construction,
                target_partition_size=target_partition_size,
            )
-        elif index_type == "IVF_HNSW_FLAT":
-            config = HnswFlat(
-                distance_type=metric,
-                num_partitions=num_partitions,
-                max_iterations=max_iterations,
-                sample_rate=sample_rate,
-                m=m,
-                ef_construction=ef_construction,
-                target_partition_size=target_partition_size,
-            )
        else:
            raise ValueError(f"Unknown index type {index_type}")

@@ -2545,22 +2462,14 @@ class LanceTable(Table):
            **tokenizer_configs,
        )

-        try:
-            LOOP.run(
-                self._table.create_index(
-                    field_names,
-                    replace=replace,
-                    config=config,
-                    name=name,
-                )
+        LOOP.run(
+            self._table.create_index(
+                field_names,
+                replace=replace,
+                config=config,
+                name=name,
            )
-        except (ValueError, RuntimeError) as e:
-            _maybe_add_fts_error_note(
-                e,
-                base_tokenizer=config.base_tokenizer,
-                language=config.language,
-            )
-            raise e
+        )

    @staticmethod
    def infer_tokenizer_configs(tokenizer_name: str) -> dict:
@@ -3890,18 +3799,7 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[
-                IvfFlat,
-                IvfPq,
-                IvfRq,
-                HnswPq,
-                HnswSq,
-                HnswFlat,
-                BTree,
-                Bitmap,
-                LabelList,
-                FTS,
-            ]
+            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3948,7 +3846,6 @@ class AsyncTable:
                    IvfRq,
                    HnswPq,
                    HnswSq,
-                    HnswFlat,
                    BTree,
                    Bitmap,
                    LabelList,
@@ -3968,13 +3865,11 @@ class AsyncTable:
                name=name,
                train=train,
            )
-        except (ValueError, RuntimeError) as e:
-            if isinstance(config, FTS):
-                _maybe_add_fts_error_note(
-                    e,
-                    base_tokenizer=config.base_tokenizer,
-                    language=config.language,
-                )
+        except ValueError as e:
+            if "not support the requested language" in str(e):
+                supported_langs = ", ".join(lang_mapping.values())
+                help_msg = f"Supported languages: {supported_langs}"
+                add_note(e, help_msg)
            raise e

    async def drop_index(self, name: str) -> None:
@@ -5119,7 +5014,6 @@ class IndexStatistics:
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
-        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -24,7 +24,6 @@ VectorIndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_SQ",
    "IVF_HNSW_PQ",
-    "IVF_HNSW_FLAT",
    "IVF_RQ",
 ]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
@@ -32,7 +31,6 @@ IndexType = Literal[
    "IVF_PQ",
    "IVF_HNSW_PQ",
    "IVF_HNSW_SQ",
-    "IVF_HNSW_FLAT",
    "IVF_SQ",
    "FTS",
    "BTREE",
@@ -42,5 +40,4 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BuiltinTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
-BaseTokenizerType = BuiltinTokenizerType | str
+BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
--- a/python/python/tests/models/jieba/default/dict.txt
+++ b/python/python/tests/models/jieba/default/dict.txt
@@ -1,8 +0,0 @@
-我们 98740 r
-都 202780 d
-有 423765 v
-光明 1219 n
-的 318825 uj
-前途 1263 n
-前 62779 f
-途 857 n
--- a/python/python/tests/models/lindera/ipadic/config.yml
+++ b/python/python/tests/models/lindera/ipadic/config.yml
@@ -1,4 +0,0 @@
-segmenter:
-  mode: "normal"
-  dictionary:
-    path: "./python/tests/models/lindera/ipadic/main"
--- a/python/python/tests/models/lindera/ipadic/main.zip
+++ b/python/python/tests/models/lindera/ipadic/main.zip
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -15,10 +15,7 @@
 #  limitations under the License.
 import os
 import random
-import shutil
 from unittest import mock
-from pathlib import Path
-import zipfile

 import lancedb as ldb
 from lancedb.db import DBConnection
@@ -39,8 +36,6 @@ import pytest
 import pytest_asyncio
 from utils import exception_output

-TEST_LANGUAGE_MODEL_HOME = Path(__file__).parent / "models"
-

@pytest.fixture
 def table(tmp_path) -> ldb.table.LanceTable:
@@ -94,40 +89,6 @@ def table(tmp_path) -> ldb.table.LanceTable:
    return table


-@pytest.fixture
-def language_model_home(monkeypatch, tmp_path):
-    model_home = tmp_path / "language-models"
-    shutil.copytree(TEST_LANGUAGE_MODEL_HOME, model_home)
-    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(model_home))
-    return model_home
-
-
-@pytest.fixture
-def lindera_ipadic(language_model_home):
-    model_path = language_model_home / "lindera" / "ipadic"
-    extracted_model = model_path / "main"
-    config_path = model_path / "config.yml"
-
-    if extracted_model.exists():
-        shutil.rmtree(extracted_model)
-
-    with zipfile.ZipFile(model_path / "main.zip", "r") as zip_ref:
-        zip_ref.extractall(model_path)
-    config_path.write_text(
-        "segmenter:\n"
-        '  mode: "normal"\n'
-        "  dictionary:\n"
-        f'    path: "{extracted_model.resolve().as_posix()}"\n',
-        encoding="utf-8",
-    )
-
-    try:
-        yield
-    finally:
-        if extracted_model.exists():
-            shutil.rmtree(extracted_model)
-
-
@pytest_asyncio.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
    # Use local random state to avoid affecting other tests
@@ -723,90 +684,6 @@ def test_fts_ngram(mem_db: DBConnection):
    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}


-def test_fts_jieba_tokenizer(mem_db: DBConnection, language_model_home):
-    data = pa.table({"text": ["我们都有光明的前途", "光明的前途"]})
-    table = mem_db.create_table("test_jieba", data=data)
-    table.create_fts_index(
-        "text",
-        base_tokenizer="jieba/default",
-        stem=False,
-        remove_stop_words=False,
-        ascii_folding=False,
-    )
-
-    results = table.search("我们", query_type="fts").limit(10).to_list()
-    assert [row["text"] for row in results] == ["我们都有光明的前途"]
-
-
-def test_fts_jieba_missing_language_model_note(
-    mem_db: DBConnection, monkeypatch, tmp_path
-):
-    missing_root = tmp_path / "missing-language-models"
-    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
-    table = mem_db.create_table(
-        "test_missing_jieba_model",
-        data=pa.table({"text": ["我们都有光明的前途"]}),
-    )
-
-    with pytest.raises((ValueError, RuntimeError)) as e:
-        table.create_fts_index(
-            "text",
-            base_tokenizer="jieba/default",
-            stem=False,
-            remove_stop_words=False,
-            ascii_folding=False,
-        )
-
-    output = exception_output(e)
-    assert "Invalid directory path:" in output
-    assert "LANCE_LANGUAGE_MODEL_HOME" in output
-    assert "jieba/default" in output
-
-
-@pytest.mark.asyncio
-async def test_fts_jieba_missing_language_model_note_async(monkeypatch, tmp_path):
-    missing_root = tmp_path / "missing-language-models"
-    monkeypatch.setenv("LANCE_LANGUAGE_MODEL_HOME", str(missing_root))
-    db = await ldb.connect_async(tmp_path / "async-db")
-    table = await db.create_table(
-        "test_missing_jieba_model_async",
-        data=pa.table({"text": ["我们都有光明的前途"]}),
-    )
-
-    with pytest.raises((ValueError, RuntimeError)) as e:
-        await table.create_index(
-            "text",
-            config=FTS(
-                base_tokenizer="jieba/default",
-                stem=False,
-                remove_stop_words=False,
-                ascii_folding=False,
-            ),
-        )
-
-    output = exception_output(e)
-    assert "Invalid directory path:" in output
-    assert "LANCE_LANGUAGE_MODEL_HOME" in output
-    assert "jieba/default" in output
-
-
-def test_fts_lindera_tokenizer(
-    mem_db: DBConnection, language_model_home, lindera_ipadic
-):
-    data = pa.table({"text": ["成田国際空港", "東京国際空港", "羽田空港"]})
-    table = mem_db.create_table("test_lindera", data=data)
-    table.create_fts_index(
-        "text",
-        base_tokenizer="lindera/ipadic",
-        stem=False,
-        remove_stop_words=False,
-        ascii_folding=False,
-    )
-
-    results = table.search("成田", query_type="fts").limit(10).to_list()
-    assert [row["text"] for row in results] == ["成田国際空港"]
-
-
 def test_fts_query_to_json():
    """Test that FTS query to_json() produces valid JSON strings with exact format."""

--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -16,13 +16,11 @@ from lancedb.index import (
    IvfSq,
    IvfHnswPq,
    IvfHnswSq,
-    IvfHnswFlat,
    IvfRq,
    Bitmap,
    LabelList,
    HnswPq,
    HnswSq,
-    HnswFlat,
    FTS,
 )
 from lancedb.table import IndexStatistics
@@ -252,21 +250,6 @@ async def test_create_hnswpq_alias_index(some_table: AsyncTable):
    assert indices[0].index_type in {"HnswPq", "IvfHnswPq"}


-@pytest.mark.asyncio
-async def test_create_hnswflat_index(some_table: AsyncTable):
-    await some_table.create_index("vector", config=HnswFlat(num_partitions=10))
-    indices = await some_table.list_indices()
-    assert len(indices) == 1
-
-
-@pytest.mark.asyncio
-async def test_create_hnswflat_alias_index(some_table: AsyncTable):
-    await some_table.create_index("vector", config=IvfHnswFlat(num_partitions=5))
-    indices = await some_table.list_indices()
-    assert len(indices) == 1
-    assert indices[0].index_type in {"HnswFlat", "IvfHnswFlat"}
-
-
@pytest.mark.asyncio
 async def test_create_ivfsq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=IvfSq(num_partitions=10))
@@ -312,7 +295,6 @@ def test_index_statistics_index_type_lists_all_supported_values():
        "IVF_RQ",
        "IVF_HNSW_SQ",
        "IVF_HNSW_PQ",
-        "IVF_HNSW_FLAT",
        "FTS",
        "BTREE",
        "BITMAP",
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -11,7 +11,7 @@ from unittest.mock import patch

 import lancedb
 from lancedb.dependencies import _PANDAS_AVAILABLE
-from lancedb.index import HnswFlat, HnswPq, HnswSq, IvfPq
+from lancedb.index import HnswPq, HnswSq, IvfPq
 import numpy as np
 import polars as pl
 import pyarrow as pa
@@ -917,21 +917,6 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
        "my_vector", replace=True, config=expected_config, name=None, train=True
    )

-    table.create_index(
-        vector_column_name="my_vector",
-        metric="cosine",
-        index_type="IVF_HNSW_FLAT",
-        sample_rate=0.1,
-        m=29,
-        ef_construction=10,
-    )
-    expected_config = HnswFlat(
-        distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
-    )
-    mock_create_index.assert_called_with(
-        "my_vector", replace=True, config=expected_config, name=None, train=True
-    )
-

@patch("lancedb.table.AsyncTable.create_index")
 def test_create_index_name_and_train_parameters(
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,13 +1,11 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use lancedb::index::vector::{
-    IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
-    IvfPqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
-};
+use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
 use lancedb::index::{
    Index as LanceDbIndex,
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
+    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
 };
 use pyo3::IntoPyObject;
 use pyo3::types::PyStringMethods;
@@ -164,26 +162,8 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
            }
-            "HnswFlat" => {
-                let params = source.extract::<IvfHnswFlatParams>()?;
-                let distance_type = parse_distance_type(params.distance_type)?;
-                let mut hnsw_flat_builder = IvfHnswFlatIndexBuilder::default()
-                    .distance_type(distance_type)
-                    .max_iterations(params.max_iterations)
-                    .sample_rate(params.sample_rate)
-                    .num_edges(params.m)
-                    .ef_construction(params.ef_construction);
-                if let Some(num_partitions) = params.num_partitions {
-                    hnsw_flat_builder = hnsw_flat_builder.num_partitions(num_partitions);
-                }
-                if let Some(target_partition_size) = params.target_partition_size {
-                    hnsw_flat_builder =
-                        hnsw_flat_builder.target_partition_size(target_partition_size);
-                }
-                Ok(LanceDbIndex::IvfHnswFlat(hnsw_flat_builder))
-            }
            not_supported => Err(PyValueError::new_err(format!(
-                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, IvfHnswSq, or IvfHnswFlat",
+                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
                not_supported
            ))),
        }
@@ -270,17 +250,6 @@ struct IvfHnswSqParams {
    target_partition_size: Option<u32>,
 }

-#[derive(FromPyObject)]
-struct IvfHnswFlatParams {
-    distance_type: String,
-    num_partitions: Option<u32>,
-    max_iterations: u32,
-    sample_rate: u32,
-    m: u32,
-    ef_construction: u32,
-    target_partition_size: Option<u32>,
-}
-
 #[pyclass(get_all)]
 /// A description of an index currently configured on a column
 pub struct IndexConfig {
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.29.0"
+version = "0.28.0-beta.11"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -40,7 +40,7 @@ lance-datafusion.workspace = true
 lance-datagen = { workspace = true }
 lance-file = { workspace = true }
 lance-io = { workspace = true }
-lance-index = { workspace = true, features = ["tokenizer-jieba", "tokenizer-lindera"] }
+lance-index = { workspace = true }
 lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
@@ -108,7 +108,12 @@ test-log = "0.2"

 [features]
 default = []
-aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
+aws = [
+    "lance/aws",
+    "lance-io/aws",
+    "lance-namespace-impls/dir-aws",
+    "object_store/aws",
+]
 oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
 gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
 azure = [
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -505,15 +505,8 @@ impl ListingDatabase {
                // Filter out the commit store query param -- it's a lancedb param
                url.query_pairs_mut().clear();
                url.query_pairs_mut().extend_pairs(filtered_querys);
-                // Take a copy of the query string so we can propagate it to lance.
-                // `query_pairs_mut()` leaves the URL with `Some("")` even when no
-                // pairs survive (or none existed in the first place), so an empty
-                // string here must be treated the same as "no query" — otherwise
-                // every table URI ends up with a trailing `?`, which makes downstream
-                // sub-paths (e.g. MemWAL gen paths) re-parse as path=<base table> +
-                // query=<sub-path>, causing Lance to find the base table dataset
-                // when looking up the sub-path.
-                let query_string = url.query().filter(|q| !q.is_empty()).map(|s| s.to_string());
+                // Take a copy of the query string so we can propagate it to lance
+                let query_string = url.query().map(|s| s.to_string());
                // clear the query string so we can use the url as the base uri
                // use .set_query(None) instead of .set_query("") because the latter
                // will add a trailing '?' to the url
@@ -722,7 +715,7 @@ impl ListingDatabase {
        let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params)).await?;
        for name in names {
            let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
-            let full_path = self.base_path.child(dir_name.clone());
+            let full_path = self.base_path.clone().join(dir_name.clone());

            commit_handler.delete(&full_path).await?;

@@ -2220,133 +2213,6 @@ mod tests {
        assert_eq!(uri, expected);
    }

-    /// Regression: connecting via a URL-style URI (which goes through
-    /// `url::Url::parse` and the `query_pairs_mut()` path) must not
-    /// append a trailing `?` to per-table URIs when the input URI has
-    /// no query string.
-    ///
-    /// Earlier, `query_pairs_mut().clear()` left the URL with
-    /// `query=Some("")`, which then propagated as a trailing `?` on
-    /// every table URI. Sub-path lookups against that URI (e.g. MemWAL
-    /// `<table_uri>/_mem_wal/<shard>/<rand>_gen_<n>`) re-parsed as
-    /// `path=<base table>` + `query=/_mem_wal/...`, causing
-    /// `Dataset::write` to find the base table dataset and falsely
-    /// report `Dataset already exists`.
-    /// Mirrors the URL-mutation step from
-    /// [`ListingDatabase::connect_with_options`] so we can assert the
-    /// fix without going through filesystem setup (which is awkward
-    /// across platforms — see the `file://` test below).
-    fn capture_query_like_connect(input_uri: &str) -> Option<String> {
-        let mut url = url::Url::parse(input_uri).unwrap();
-        let mut filtered_querys = Vec::new();
-        for (key, value) in url.query_pairs() {
-            if key == ENGINE || key == MIRRORED_STORE {
-                continue;
-            }
-            filtered_querys.push((key.to_string(), value.to_string()));
-        }
-        url.query_pairs_mut().clear();
-        url.query_pairs_mut().extend_pairs(filtered_querys);
-        url.query().filter(|q| !q.is_empty()).map(|s| s.to_string())
-    }
-
-    #[test]
-    fn test_capture_query_treats_empty_as_none() {
-        // No query at all. With the bug, `query_pairs_mut()` left the
-        // URL with `query=Some("")` and we used to propagate that.
-        assert_eq!(
-            capture_query_like_connect("s3://bucket/prefix/"),
-            None,
-            "empty query after mutation must be treated as no query"
-        );
-
-        // Real query is propagated.
-        assert_eq!(
-            capture_query_like_connect("s3://bucket/prefix/?foo=bar"),
-            Some("foo=bar".to_string())
-        );
-
-        // lancedb-internal `engine=` is stripped; nothing remains, so
-        // query_string is None — not Some("").
-        assert_eq!(
-            capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem", ENGINE)),
-            None
-        );
-
-        // Mixed: drop `engine=`, keep the rest.
-        let captured =
-            capture_query_like_connect(&format!("s3://bucket/prefix/?{}=mem&foo=bar", ENGINE));
-        assert_eq!(captured.as_deref(), Some("foo=bar"));
-    }
-
-    /// Regression: connecting via a URL-style URI (which goes through
-    /// `url::Url::parse` and the `query_pairs_mut()` path) must not
-    /// append a trailing `?` to per-table URIs when the input URI has
-    /// no query string. Sub-path lookups against such a URI (e.g.
-    /// MemWAL `<table_uri>/_mem_wal/<shard>/<rand>_gen_<n>`) re-parse
-    /// as `path=<base table>` + `query=/_mem_wal/...`, causing
-    /// `Dataset::write` to find the base table dataset and falsely
-    /// report `Dataset already exists`.
-    ///
-    /// Skipped on Windows: `try_create_dir` does not understand
-    /// `file:///C:/…` paths so `connect_with_options` fails before
-    /// even reaching the URL-mutation logic. The pure URL-mutation
-    /// invariant is covered by
-    /// `test_capture_query_treats_empty_as_none` above, which runs
-    /// on all platforms.
-    #[cfg(not(windows))]
-    #[tokio::test]
-    async fn test_table_uri_url_path_has_no_trailing_question_mark() {
-        let tempdir = tempdir().unwrap();
-        let uri = format!("file://{}", tempdir.path().to_str().unwrap());
-
-        let request = ConnectRequest {
-            uri: uri.clone(),
-            #[cfg(feature = "remote")]
-            client_config: Default::default(),
-            options: Default::default(),
-            namespace_client_properties: Default::default(),
-            manifest_enabled: false,
-            read_consistency_interval: None,
-            session: None,
-        };
-        let db = ListingDatabase::connect_with_options(&request)
-            .await
-            .unwrap();
-
-        assert_eq!(
-            db.query_string, None,
-            "no input query → no captured query_string"
-        );
-
-        let table_uri = db.table_uri("test").unwrap();
-        assert!(
-            !table_uri.ends_with('?'),
-            "table_uri must not have a trailing `?`: {}",
-            table_uri
-        );
-        assert_eq!(table_uri, format!("{}/test.lance", uri));
-
-        // A real query string should still be propagated.
-        let with_query = format!("{}?foo=bar", uri);
-        let request_with_query = ConnectRequest {
-            uri: with_query,
-            #[cfg(feature = "remote")]
-            client_config: Default::default(),
-            options: Default::default(),
-            namespace_client_properties: Default::default(),
-            manifest_enabled: false,
-            read_consistency_interval: None,
-            session: None,
-        };
-        let db_with_query = ListingDatabase::connect_with_options(&request_with_query)
-            .await
-            .unwrap();
-        assert_eq!(db_with_query.query_string.as_deref(), Some("foo=bar"));
-        let table_uri = db_with_query.table_uri("test").unwrap();
-        assert_eq!(table_uri, format!("{}/test.lance?foo=bar", uri));
-    }
-
    #[tokio::test]
    async fn test_namespace_client() {
        let (_tempdir, db) = setup_database().await;
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -13,10 +13,7 @@ use crate::{DistanceType, Error, Result, table::BaseTable};

 use self::{
    scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
-    vector::{
-        IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
-        IvfSqIndexBuilder,
-    },
+    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, IvfSqIndexBuilder},
 };

 pub mod scalar;
@@ -70,10 +67,6 @@ pub enum Index {
    /// IVF-HNSW index with Scalar Quantization
    /// It is a variant of the HNSW algorithm that uses scalar quantization to compress the vectors.
    IvfHnswSq(IvfHnswSqIndexBuilder),
-
-    /// IVF-HNSW index without quantization.
-    /// Stores raw vectors, providing the highest recall at the cost of more memory and disk space.
-    IvfHnswFlat(IvfHnswFlatIndexBuilder),
 }

 /// Builder for the create_index operation
@@ -297,8 +290,6 @@ pub enum IndexType {
    IvfHnswPq,
    #[serde(alias = "IVF_HNSW_SQ")]
    IvfHnswSq,
-    #[serde(alias = "IVF_HNSW_FLAT")]
-    IvfHnswFlat,
    // Scalar
    #[serde(alias = "BTREE")]
    BTree,
@@ -320,7 +311,6 @@ impl std::fmt::Display for IndexType {
            Self::IvfRq => write!(f, "IVF_RQ"),
            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
-            Self::IvfHnswFlat => write!(f, "IVF_HNSW_FLAT"),
            Self::BTree => write!(f, "BTREE"),
            Self::Bitmap => write!(f, "BITMAP"),
            Self::LabelList => write!(f, "LABEL_LIST"),
@@ -344,7 +334,6 @@ impl std::str::FromStr for IndexType {
            "IVF_RQ" => Ok(Self::IvfRq),
            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
-            "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat),
            _ => Err(Error::InvalidInput {
                message: format!("the input value {} is not a valid IndexType", value),
            }),
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -474,46 +474,3 @@ impl IvfHnswSqIndexBuilder {
    impl_ivf_params_setter!();
    impl_hnsw_params_setter!();
 }
-
-/// Builder for an IVF_HNSW_FLAT index.
-///
-/// This index combines IVF partitioning with an HNSW graph per partition,
-/// storing raw (unquantized) vectors. It offers the highest recall among
-/// the IVF_HNSW family at the cost of more memory and disk space compared
-/// to [`IvfHnswSqIndexBuilder`] or [`IvfHnswPqIndexBuilder`].
-#[derive(Debug, Clone, Serialize)]
-pub struct IvfHnswFlatIndexBuilder {
-    // IVF
-    #[serde(rename = "metric_type")]
-    pub(crate) distance_type: DistanceType,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) num_partitions: Option<u32>,
-    pub(crate) sample_rate: u32,
-    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) target_partition_size: Option<u32>,
-
-    // HNSW
-    pub(crate) m: u32,
-    pub(crate) ef_construction: u32,
-}
-
-impl Default for IvfHnswFlatIndexBuilder {
-    fn default() -> Self {
-        Self {
-            distance_type: DistanceType::L2,
-            num_partitions: None,
-            sample_rate: 256,
-            max_iterations: 50,
-            m: 20,
-            ef_construction: 300,
-            target_partition_size: None,
-        }
-    }
-}
-
-impl IvfHnswFlatIndexBuilder {
-    impl_distance_type_setter!();
-    impl_ivf_params_setter!();
-    impl_hnsw_params_setter!();
-}
--- a/rust/lancedb/src/io/object_store.rs
+++ b/rust/lancedb/src/io/object_store.rs
@@ -5,11 +5,12 @@

 use std::{fmt::Formatter, sync::Arc};

-use futures::{TryFutureExt, stream::BoxStream};
+use futures::{StreamExt, TryFutureExt, stream::BoxStream};
 use lance::io::WrappingObjectStore;
 use object_store::{
-    Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
-    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path,
+    CopyOptions, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    ObjectStore, ObjectStoreExt, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result,
+    UploadPart, path::Path,
 };

 use async_trait::async_trait;
@@ -93,20 +94,6 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.get_opts(location, options).await
    }

-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        self.primary.head(location).await
-    }
-
-    async fn delete(&self, location: &Path) -> Result<()> {
-        if !location.primary_only() {
-            match self.secondary.delete(location).await {
-                Err(Error::NotFound { .. }) | Ok(_) => {}
-                Err(e) => return Err(e),
-            }
-        }
-        self.primary.delete(location).await
-    }
-
    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
        self.primary.list(prefix)
    }
@@ -115,21 +102,40 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.list_with_delimiter(prefix).await
    }

-    async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
-        if to.primary_only() {
-            self.primary.copy(from, to).await
-        } else {
-            self.secondary.copy(from, to).await?;
-            self.primary.copy(from, to).await?;
-            Ok(())
-        }
+    fn delete_stream(
+        &self,
+        locations: BoxStream<'static, Result<Path>>,
+    ) -> BoxStream<'static, Result<Path>> {
+        let primary = self.primary.clone();
+        let secondary = self.secondary.clone();
+        locations
+            .map(move |location| {
+                let primary = primary.clone();
+                let secondary = secondary.clone();
+                async move {
+                    let location = location?;
+                    if !location.primary_only() {
+                        match secondary.delete(&location).await {
+                            Err(Error::NotFound { .. }) | Ok(_) => {}
+                            Err(e) => return Err(e),
+                        }
+                    }
+                    primary.delete(&location).await?;
+                    Ok(location)
+                }
+            })
+            .buffered(10)
+            .boxed()
    }

-    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
-        if !to.primary_only() {
-            self.secondary.copy(from, to).await?;
+    async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> Result<()> {
+        if to.primary_only() {
+            self.primary.copy_opts(from, to, options).await
+        } else {
+            self.secondary.copy_opts(from, to, options.clone()).await?;
+            self.primary.copy_opts(from, to, options).await?;
+            Ok(())
        }
-        self.primary.copy_if_not_exists(from, to).await
    }
 }

--- a/rust/lancedb/src/io/object_store/io_tracking.rs
+++ b/rust/lancedb/src/io/object_store/io_tracking.rs
@@ -10,9 +10,9 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use lance::io::WrappingObjectStore;
 use object_store::{
-    GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
-    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
-    path::Path,
+    CopyOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    PutMultipartOptions, PutOptions, PutPayload, PutResult, RenameOptions, Result as OSResult,
+    UploadPart, path::Path,
 };

 #[derive(Debug, Default)]
@@ -81,11 +81,6 @@ impl IoTrackingStore {
 #[async_trait::async_trait]
 #[deny(clippy::missing_trait_methods)]
 impl ObjectStore for IoTrackingStore {
-    async fn put(&self, location: &Path, bytes: PutPayload) -> OSResult<PutResult> {
-        self.record_write(bytes.content_length() as u64);
-        self.target.put(location, bytes).await
-    }
-
    async fn put_opts(
        &self,
        location: &Path,
@@ -96,14 +91,6 @@ impl ObjectStore for IoTrackingStore {
        self.target.put_opts(location, bytes, opts).await
    }

-    async fn put_multipart(&self, location: &Path) -> OSResult<Box<dyn MultipartUpload>> {
-        let target = self.target.put_multipart(location).await?;
-        Ok(Box::new(IoTrackingMultipartUpload {
-            target,
-            stats: self.stats.clone(),
-        }))
-    }
-
    async fn put_multipart_opts(
        &self,
        location: &Path,
@@ -116,15 +103,6 @@ impl ObjectStore for IoTrackingStore {
        }))
    }

-    async fn get(&self, location: &Path) -> OSResult<GetResult> {
-        let result = self.target.get(location).await;
-        if let Ok(result) = &result {
-            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes);
-        }
-        result
-    }
-
    async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult<GetResult> {
        let result = self.target.get_opts(location, options).await;
        if let Ok(result) = &result {
@@ -134,14 +112,6 @@ impl ObjectStore for IoTrackingStore {
        result
    }

-    async fn get_range(&self, location: &Path, range: std::ops::Range<u64>) -> OSResult<Bytes> {
-        let result = self.target.get_range(location, range).await;
-        if let Ok(result) = &result {
-            self.record_read(result.len() as u64);
-        }
-        result
-    }
-
    async fn get_ranges(
        &self,
        location: &Path,
@@ -154,20 +124,11 @@ impl ObjectStore for IoTrackingStore {
        result
    }

-    async fn head(&self, location: &Path) -> OSResult<ObjectMeta> {
-        self.record_read(0);
-        self.target.head(location).await
-    }
-
-    async fn delete(&self, location: &Path) -> OSResult<()> {
+    fn delete_stream(
+        &self,
+        locations: BoxStream<'static, OSResult<Path>>,
+    ) -> BoxStream<'static, OSResult<Path>> {
        self.record_write(0);
-        self.target.delete(location).await
-    }
-
-    fn delete_stream<'a>(
-        &'a self,
-        locations: BoxStream<'a, OSResult<Path>>,
-    ) -> BoxStream<'a, OSResult<Path>> {
        self.target.delete_stream(locations)
    }

@@ -190,24 +151,14 @@ impl ObjectStore for IoTrackingStore {
        self.target.list_with_delimiter(prefix).await
    }

-    async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> {
+    async fn copy_opts(&self, from: &Path, to: &Path, options: CopyOptions) -> OSResult<()> {
        self.record_write(0);
-        self.target.copy(from, to).await
+        self.target.copy_opts(from, to, options).await
    }

-    async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> {
+    async fn rename_opts(&self, from: &Path, to: &Path, options: RenameOptions) -> OSResult<()> {
        self.record_write(0);
-        self.target.rename(from, to).await
-    }
-
-    async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
-        self.record_write(0);
-        self.target.rename_if_not_exists(from, to).await
-    }
-
-    async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
-        self.record_write(0);
-        self.target.copy_if_not_exists(from, to).await
+        self.target.rename_opts(from, to, options).await
    }
 }

--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1540,7 +1540,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
            Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
            Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
-            Index::IvfHnswFlat(p) => ("IVF_HNSW_FLAT", Some(to_json(p)?)),
            Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
            Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
            Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
@@ -2069,8 +2068,7 @@ mod tests {
    use serde_json::json;

    use crate::index::vector::{
-        IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder,
-        IvfSqIndexBuilder,
+        IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
    };
    use crate::remote::JSON_CONTENT_TYPE;
    use crate::remote::db::DEFAULT_SERVER_VERSION;
@@ -3323,35 +3321,6 @@ mod tests {
                        .ef_construction(500),
                ),
            ),
-            (
-                "IVF_HNSW_FLAT",
-                json!({
-                    "metric_type": "l2",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                    "m": 20,
-                    "ef_construction": 300,
-                }),
-                Index::IvfHnswFlat(Default::default()),
-            ),
-            (
-                "IVF_HNSW_FLAT",
-                json!({
-                    "metric_type": "cosine",
-                    "num_partitions": 64,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                    "m": 40,
-                    "ef_construction": 500,
-                }),
-                Index::IvfHnswFlat(
-                    IvfHnswFlatIndexBuilder::default()
-                        .distance_type(DistanceType::Cosine)
-                        .num_partitions(64)
-                        .num_edges(40)
-                        .ef_construction(500),
-                ),
-            ),
            (
                "IVF_SQ",
                json!({
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -2033,24 +2033,6 @@ impl NativeTable {
                );
                Ok(Box::new(lance_idx_params))
            }
-            Index::IvfHnswFlat(index) => {
-                Self::validate_index_type(field, "IVF HNSW FLAT", supported_vector_data_type)?;
-                let ivf_params = Self::build_ivf_params(
-                    index.num_partitions,
-                    index.target_partition_size,
-                    index.sample_rate,
-                    index.max_iterations,
-                );
-                let hnsw_params = HnswBuildParams::default()
-                    .num_edges(index.m as usize)
-                    .ef_construction(index.ef_construction as usize);
-                let lance_idx_params = VectorIndexParams::ivf_hnsw(
-                    index.distance_type.into(),
-                    ivf_params,
-                    hnsw_params,
-                );
-                Ok(Box::new(lance_idx_params))
-            }
        }
    }

@@ -2076,8 +2058,7 @@ impl NativeTable {
            | Index::IvfPq(_)
            | Index::IvfRq(_)
            | Index::IvfHnswPq(_)
-            | Index::IvfHnswSq(_)
-            | Index::IvfHnswFlat(_) => IndexType::Vector,
+            | Index::IvfHnswSq(_) => IndexType::Vector,
        }
    }

@@ -3195,56 +3176,6 @@ mod tests {
        assert_eq!(stats.num_unindexed_rows, 0);
    }

-    #[tokio::test]
-    async fn test_create_index_ivf_hnsw_flat() {
-        use arrow_array::RecordBatch;
-        use arrow_schema::{DataType, Field, Schema as ArrowSchema};
-        use rand;
-        use std::iter::repeat_with;
-
-        use crate::index::vector::IvfHnswFlatIndexBuilder;
-        use arrow_array::Float32Array;
-
-        let tmp_dir = tempdir().unwrap();
-        let uri = tmp_dir.path().to_str().unwrap();
-        let conn = connect(uri).execute().await.unwrap();
-
-        let dimension = 16;
-        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
-            "embeddings",
-            DataType::FixedSizeList(
-                Arc::new(Field::new("item", DataType::Float32, true)),
-                dimension,
-            ),
-            false,
-        )]));
-
-        let float_arr = Float32Array::from(
-            repeat_with(rand::random::<f32>)
-                .take(512 * dimension as usize)
-                .collect::<Vec<f32>>(),
-        );
-
-        let vectors = Arc::new(create_fixed_size_list(float_arr, dimension).unwrap());
-        let batch = RecordBatch::try_new(schema.clone(), vec![vectors.clone()]).unwrap();
-
-        let table = conn.create_table("test", batch).execute().await.unwrap();
-
-        let index = IvfHnswFlatIndexBuilder::default();
-        table
-            .create_index(&["embeddings"], Index::IvfHnswFlat(index))
-            .execute()
-            .await
-            .unwrap();
-
-        let index_configs = table.list_indices().await.unwrap();
-        assert_eq!(index_configs.len(), 1);
-        let index = index_configs.into_iter().next().unwrap();
-        assert_eq!(index.index_type, crate::index::IndexType::IvfHnswFlat);
-        assert_eq!(index.columns, vec!["embeddings".to_string()]);
-        assert_eq!(table.count_rows(None).await.unwrap(), 512);
-    }
-
    fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
        let list_type = DataType::FixedSizeList(
            Arc::new(Field::new("item", values.data_type().clone(), true)),