Bump version: 0.20.0 → 0.20.1-beta.0

2025-12-25 22:29:58 +00:00 · 2025-06-16 16:29:43 +00:00
63 changed files with 854 additions and 1494 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.1"
+current_version = "0.20.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -541,18 +541,10 @@ jobs:
        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
      - name: Checkout
        uses: actions/checkout@v4
-        with:
-          ref: main
      - name: Update package-lock.json
-        run: |
-          git config user.name 'Lance Release'
-          git config user.email 'lance-dev@lancedb.com'
-          bash ci/update_lockfiles.sh
+        run: bash ci/update_lockfiles.sh
      - name: Push new commit
        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: main
      - name: Notify Slack Action
        uses: ravsamhq/notify-slack-action@2.3.0
        if: ${{ always() }}
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,16 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.31.2", "features" = [
-    "dynamodb",
-], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.29.1", "features" = ["dynamodb"], tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-io = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-index = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-linalg = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-table = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-testing = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-datafusion = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
+lance-encoding = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -41,20 +39,20 @@ arrow-schema = "55.1"
 arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "48.0", default-features = false }
-datafusion-catalog = "48.0"
-datafusion-common = { version = "48.0", default-features = false }
-datafusion-execution = "48.0"
-datafusion-expr = "48.0"
-datafusion-physical-plan = "48.0"
+datafusion = { version = "47.0", default-features = false }
+datafusion-catalog = "47.0"
+datafusion-common = { version = "47.0", default-features = false }
+datafusion-execution = "47.0"
+datafusion-expr = "47.0"
+datafusion-physical-plan = "47.0"
 env_logger = "0.11"
-half = { "version" = "2.6.0", default-features = false, features = [
+half = { "version" = "=2.5.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.11.0"
 pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -47,10 +47,10 @@ def extract_features(line: str) -> list:
    """
    import re

-    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
+    match = re.search(r'"features"\s*=\s*\[(.*?)\]', line)
    if match:
        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
+        return [f.strip('"') for f in features_str.split(",")]
    return []


@@ -63,24 +63,10 @@ def update_cargo_toml(line_updater):
        lines = f.readlines()

    new_lines = []
-    lance_line = ""
-    is_parsing_lance_line = False
    for line in lines:
        if line.startswith("lance"):
            # Update the line using the provided function
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(line))
-            else:
-                lance_line = line
-                is_parsing_lance_line = True
-        elif is_parsing_lance_line:
-            lance_line += line
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(lance_line))
-                lance_line = ""
-                is_parsing_lance_line = False
-            else:
-                print("doesn't end with }:", line)
+            new_lines.append(line_updater(line))
        else:
            # Keep the line unchanged
            new_lines.append(line)
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
+        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.1-final.0</version>
+        <version>0.20.1-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.1-final.0</version>
+    <version>0.20.1-beta.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.21.1",
+  "version": "0.20.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.21.1",
+      "version": "0.20.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.1",
-        "@lancedb/vectordb-darwin-x64": "0.21.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.1"
+        "@lancedb/vectordb-darwin-arm64": "0.20.0",
+        "@lancedb/vectordb-darwin-x64": "0.20.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.20.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.20.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.20.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,65 +327,60 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.1.tgz",
-      "integrity": "sha512-eXeOKgK5s7MSKDzA7Hl4/9E2X8tWWMNV7UJiFdwxrUcop86tM5ePBi8tApRnaQ3wBXrs99XTVBJ7+j+2gzilVA==",
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.0.tgz",
+      "integrity": "sha512-PEL4vFY42PaWPPnOfOcFBv1E+zumhZPMlQW7/M00ZA8O2uKiTc1xhajhaPcwVDZBYo36SRSIxUz2eYjXWA9sIw==",
      "cpu": [
        "arm64"
      ],
-      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.1.tgz",
-      "integrity": "sha512-vLoPWfg7OPw5vazLH5/YD/yQkZiTiPniuQgsH+xTodRfLf926lny53G7LQ6nFXNKIzX/jYKtg7AfMU8IcDLSEQ==",
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.0.tgz",
+      "integrity": "sha512-4A1f9DiyGhziN9P81jSmMgzXSc1XXM9bIJw5q/b2NmDoiqIr8tYv1FKdm0JDhMYjtnzBeNpc67gVy3GlGCuUWA==",
      "cpu": [
        "x64"
      ],
-      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.1.tgz",
-      "integrity": "sha512-IMAxtXj5aHCv9peziN77IxQpkYFj83KvI8zQCHzbMMXv7BspkhAd0PaUViqHqtTf2TUHjYQ66a7clZrEn+xQuQ==",
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.0.tgz",
+      "integrity": "sha512-A3teZC/zU0tccluIJZsTasP8vBQWhXsmvLOo9UopSeyCrA1sR2vEyvXV9hMRJo7+9QjOrYFLiFWPjXEdFb+/1Q==",
      "cpu": [
        "arm64"
      ],
-      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "linux"
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.1.tgz",
-      "integrity": "sha512-9oPOxBsYGngIhtC/oC+fQ9V0w9mgFuj2Wyler8f5UYQdiAutsTNyOUA+XjtcROjVZrZ5oUeIrvOQSte9BbpRTg==",
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.0.tgz",
+      "integrity": "sha512-uREL9YF5iaeyfYh+5uvkSLQquFXYQoJyuDMPMZTwOE/Zghgw3lRl6KHIoMVCOfw+S8tkeyzU8UR4zgrbymbPGg==",
      "cpu": [
        "x64"
      ],
-      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "linux"
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.1.tgz",
-      "integrity": "sha512-XqDXFLfdjNpDZ5jaqLerdx+sDU4YLuPK3VF4TowwcOlWDrUtI/L1lAyCaKxcyz1qE3VGuZvhNU89N5ioEICb4Q==",
+      "version": "0.20.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.0.tgz",
+      "integrity": "sha512-0G5FD8X9S70hH4QK4S2m7TrWCIlVr4vox4Rjhfqdxk/5QWwYVT6WltvPgTJlektI7sUWeioDNmluHzqLZKDlHQ==",
      "cpu": [
        "x64"
      ],
-      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "win32"
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.21.1",
+  "version": "0.20.1-beta.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.1",
-    "@lancedb/vectordb-darwin-arm64": "0.21.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.1"
+    "@lancedb/vectordb-darwin-x64": "0.20.1-beta.0",
+    "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.1"
+version = "0.20.1-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -592,14 +592,14 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        ).rejects.toThrow("column vector was missing");
      });

-      it("will skip embedding application if already applied", async function () {
+      it("will provide a nice error if run twice", async function () {
        const records = sampleRecords();
        const table = await convertToTable(records, dummyEmbeddingConfig);

        // fromTableToBuffer will try and apply the embeddings again
-        // but should skip since the column already has non-null values
-        const result = await fromTableToBuffer(table, dummyEmbeddingConfig);
-        expect(result.byteLength).toBeGreaterThan(0);
+        await expect(
+          fromTableToBuffer(table, dummyEmbeddingConfig),
+        ).rejects.toThrow("already existed");
      });
    });

--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -368,9 +368,9 @@ describe("merge insert", () => {
      { a: 4, b: "z" },
    ];

-    const result = (await table.toArrow()).toArray().sort((a, b) => a.a - b.a);
-
-    expect(result.map((row) => ({ ...row }))).toEqual(expected);
+    expect(
+      JSON.parse(JSON.stringify((await table.toArrow()).toArray())),
+    ).toEqual(expected);
  });
  test("conditional update", async () => {
    const newData = [
@@ -1650,25 +1650,13 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(resultSet.has("fob")).toBe(true);
      expect(resultSet.has("fo")).toBe(true);
      expect(resultSet.has("food")).toBe(true);
-
-      const prefixResults = await table
-        .search(
-          new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
-        )
-        .toArray();
-      expect(prefixResults.length).toBe(2);
-      const resultSet2 = new Set(prefixResults.map((r) => r.text));
-      expect(resultSet2.has("foo")).toBe(true);
-      expect(resultSet2.has("food")).toBe(true);
    });

    test("full text search boolean query", async () => {
      const db = await connect(tmpDir.name);
      const data = [
-        { text: "The cat and dog are playing" },
-        { text: "The cat is sleeping" },
-        { text: "The dog is barking" },
-        { text: "The dog chases the cat" },
+        { text: "hello world", vector: [0.1, 0.2, 0.3] },
+        { text: "goodbye world", vector: [0.4, 0.5, 0.6] },
      ];
      const table = await db.createTable("test", data);
      await table.createIndex("text", {
@@ -1678,86 +1666,22 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const shouldResults = await table
        .search(
          new BooleanQuery([
-            [Occur.Should, new MatchQuery("cat", "text")],
-            [Occur.Should, new MatchQuery("dog", "text")],
+            [Occur.Should, new MatchQuery("hello", "text")],
+            [Occur.Should, new MatchQuery("goodbye", "text")],
          ]),
        )
        .toArray();
-      expect(shouldResults.length).toBe(4);
+      expect(shouldResults.length).toBe(2);

      const mustResults = await table
        .search(
          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.Must, new MatchQuery("dog", "text")],
+            [Occur.Must, new MatchQuery("hello", "text")],
+            [Occur.Must, new MatchQuery("world", "text")],
          ]),
        )
        .toArray();
-      expect(mustResults.length).toBe(2);
-
-      const mustNotResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.MustNot, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(mustNotResults.length).toBe(1);
-    });
-
-    test("full text search ngram", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "hello world", vector: [0.1, 0.2, 0.3] },
-        { text: "lance database", vector: [0.4, 0.5, 0.6] },
-        { text: "lance is cool", vector: [0.7, 0.8, 0.9] },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ baseTokenizer: "ngram" }),
-      });
-
-      const results = await table.search("lan").toArray();
-      expect(results.length).toBe(2);
-      const resultSet = new Set(results.map((r) => r.text));
-      expect(resultSet.has("lance database")).toBe(true);
-      expect(resultSet.has("lance is cool")).toBe(true);
-
-      const results2 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results2.length).toBe(2);
-      const resultSet2 = new Set(results2.map((r) => r.text));
-      expect(resultSet2.has("lance database")).toBe(true);
-      expect(resultSet2.has("lance is cool")).toBe(true);
-
-      // the default min_ngram_length is 3, so "la" should not match
-      const results3 = await table.search("la").toArray();
-      expect(results3.length).toBe(0);
-
-      // test setting min_ngram_length and prefix_only
-      await table.createIndex("text", {
-        config: Index.fts({
-          baseTokenizer: "ngram",
-          ngramMinLength: 2,
-          prefixOnly: true,
-        }),
-        replace: true,
-      });
-
-      const results4 = await table.search("lan").toArray();
-      expect(results4.length).toBe(2);
-      const resultSet4 = new Set(results4.map((r) => r.text));
-      expect(resultSet4.has("lance database")).toBe(true);
-      expect(resultSet4.has("lance is cool")).toBe(true);
-
-      const results5 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results5.length).toBe(0);
-
-      const results6 = await table.search("la").toArray();
-      expect(results6.length).toBe(2);
-      const resultSet6 = new Set(results6.map((r) => r.text));
-      expect(resultSet6.has("lance database")).toBe(true);
-      expect(resultSet6.has("lance is cool")).toBe(true);
+      expect(mustResults.length).toBe(1);
    });

    test.each([
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -417,9 +417,7 @@ function inferSchema(
        } else {
          const inferredType = inferType(value, path, opts);
          if (inferredType === undefined) {
-            throw new Error(`Failed to infer data type for field ${path.join(
-              ".",
-            )} at row ${rowI}. \
+            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
                             Consider providing an explicit schema.`);
          }
          pathTree.set(path, inferredType);
@@ -801,17 +799,11 @@ async function applyEmbeddingsFromMetadata(
        `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
      );
    }
-
-    // Check if destination column exists and handle accordingly
    if (columns[destColumn] !== undefined) {
-      const existingColumn = columns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application
-        continue;
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -911,23 +903,11 @@ async function applyEmbeddings<T>(
      );
    }
  } else {
-    // Check if destination column exists and handle accordingly
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      const existingColumn = newColumns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application and return table as-is
-        let newTable = new ArrowTable(newColumns);
-        if (schema != null) {
-          newTable = alignTable(newTable, schema as Schema);
-        }
-        return new ArrowTable(
-          new Schema(newTable.schema.fields, schemaMetadata),
-          newTable.batches,
-        );
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -439,7 +439,7 @@ export interface FtsOptions {
   *
   * "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
   */
-  baseTokenizer?: "simple" | "whitespace" | "raw" | "ngram";
+  baseTokenizer?: "simple" | "whitespace" | "raw";

  /**
   * language for stemming and stop words
@@ -472,21 +472,6 @@ export interface FtsOptions {
   * whether to remove punctuation
   */
  asciiFolding?: boolean;
-
-  /**
-   * ngram min length
-   */
-  ngramMinLength?: number;
-
-  /**
-   * ngram max length
-   */
-  ngramMaxLength?: number;
-
-  /**
-   * whether to only index the prefix of the token for ngram tokenizer
-   */
-  prefixOnly?: boolean;
 }

 export class Index {
@@ -623,9 +608,6 @@ export class Index {
        options?.stem,
        options?.removeStopWords,
        options?.asciiFolding,
-        options?.ngramMinLength,
-        options?.ngramMaxLength,
-        options?.prefixOnly,
      ),
    );
  }
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -812,12 +812,10 @@ export enum Operator {
 *
 * - `Must`: The term must be present in the document.
 * - `Should`: The term should contribute to the document score, but is not required.
- * - `MustNot`: The term must not be present in the document.
 */
 export enum Occur {
-  Should = "SHOULD",
  Must = "MUST",
-  MustNot = "MUST_NOT",
+  Should = "SHOULD",
 }

 /**
@@ -858,7 +856,6 @@ export class MatchQuery implements FullTextQuery {
   *   - `fuzziness`: The fuzziness level for the query (default is 0).
   *   - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
   *   - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-   *   - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
   */
  constructor(
    query: string,
@@ -868,7 +865,6 @@ export class MatchQuery implements FullTextQuery {
      fuzziness?: number;
      maxExpansions?: number;
      operator?: Operator;
-      prefixLength?: number;
    },
  ) {
    let fuzziness = options?.fuzziness;
@@ -882,7 +878,6 @@ export class MatchQuery implements FullTextQuery {
      fuzziness,
      options?.maxExpansions ?? 50,
      options?.operator ?? Operator.Or,
-      options?.prefixLength ?? 0,
    );
  }

--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -75,10 +75,10 @@ export interface OptimizeOptions {
   * // Delete all versions older than 1 day
   * const olderThan = new Date();
   * olderThan.setDate(olderThan.getDate() - 1));
-   * tbl.optimize({cleanupOlderThan: olderThan});
+   * tbl.cleanupOlderVersions(olderThan);
   *
   * // Delete all versions except the current version
-   * tbl.optimize({cleanupOlderThan: new Date()});
+   * tbl.cleanupOlderVersions(new Date());
   */
  cleanupOlderThan: Date;
  deleteUnverified: boolean;
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.1",
+  "version": "0.20.1-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.1",
+	"version": "0.20.1-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.1",
+  "version": "0.20.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.1",
+      "version": "0.20.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.1",
+  "version": "0.20.1-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -123,9 +123,6 @@ impl Index {
        stem: Option<bool>,
        remove_stop_words: Option<bool>,
        ascii_folding: Option<bool>,
-        ngram_min_length: Option<u32>,
-        ngram_max_length: Option<u32>,
-        prefix_only: Option<bool>,
    ) -> Self {
        let mut opts = FtsIndexBuilder::default();
        if let Some(with_position) = with_position {
@@ -152,15 +149,6 @@ impl Index {
        if let Some(ascii_folding) = ascii_folding {
            opts = opts.ascii_folding(ascii_folding);
        }
-        if let Some(ngram_min_length) = ngram_min_length {
-            opts = opts.ngram_min_length(ngram_min_length);
-        }
-        if let Some(ngram_max_length) = ngram_max_length {
-            opts = opts.ngram_max_length(ngram_max_length);
-        }
-        if let Some(prefix_only) = prefix_only {
-            opts = opts.ngram_prefix_only(prefix_only);
-        }

        Self {
            inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -335,7 +335,6 @@ impl JsFullTextQuery {
        fuzziness: Option<u32>,
        max_expansions: u32,
        operator: String,
-        prefix_length: u32,
    ) -> napi::Result<Self> {
        Ok(Self {
            inner: MatchQuery::new(query)
@@ -348,7 +347,6 @@ impl JsFullTextQuery {
                        napi::Error::from_reason(format!("Invalid operator: {}", e))
                    })?,
                )
-                .with_prefix_length(prefix_length)
                .into(),
        })
    }
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.24.2-beta.0"
+current_version = "0.23.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.24.2-beta.0"
+version = "0.23.1-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,7 +85,7 @@ embeddings = [
    "boto3>=1.28.57",
    "awscli>=1.29.57",
    "botocore>=1.31.57",
-    "ollama>=0.3.0",
+    "ollama",
    "ibm-watsonx-ai>=1.1.2",
 ]
 azure = ["adlfs>=2024.2.0"]
--- a/python/python/lancedb/embeddings/ollama.py
+++ b/python/python/lancedb/embeddings/ollama.py
@@ -2,15 +2,14 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 from functools import cached_property
-from typing import TYPE_CHECKING, List, Optional, Sequence, Union
-
-import numpy as np
+from typing import TYPE_CHECKING, List, Optional, Union

 from ..util import attempt_import_or_raise
 from .base import TextEmbeddingFunction
 from .registry import register

 if TYPE_CHECKING:
+    import numpy as np
    import ollama


@@ -29,21 +28,23 @@ class OllamaEmbeddings(TextEmbeddingFunction):
    keep_alive: Optional[Union[float, str]] = None
    ollama_client_kwargs: Optional[dict] = {}

-    def ndims(self) -> int:
+    def ndims(self):
        return len(self.generate_embeddings(["foo"])[0])

-    def _compute_embedding(self, text: Sequence[str]) -> Sequence[Sequence[float]]:
-        response = self._ollama_client.embed(
-            model=self.name,
-            input=text,
-            options=self.options,
-            keep_alive=self.keep_alive,
+    def _compute_embedding(self, text) -> Union["np.array", None]:
+        return (
+            self._ollama_client.embeddings(
+                model=self.name,
+                prompt=text,
+                options=self.options,
+                keep_alive=self.keep_alive,
+            )["embedding"]
+            or None
        )
-        return response.embeddings

    def generate_embeddings(
-        self, texts: Union[List[str], np.ndarray]
-    ) -> list[Union[np.array, None]]:
+        self, texts: Union[List[str], "np.ndarray"]
+    ) -> list[Union["np.array", None]]:
        """
        Get the embeddings for the given texts

@@ -53,8 +54,8 @@ class OllamaEmbeddings(TextEmbeddingFunction):
            The texts to embed
        """
        # TODO retry, rate limit, token limit
-        embeddings = self._compute_embedding(texts)
-        return list(embeddings)
+        embeddings = [self._compute_embedding(text) for text in texts]
+        return embeddings

    @cached_property
    def _ollama_client(self) -> "ollama.Client":
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -137,9 +137,6 @@ class FTS:
    stem: bool = True
    remove_stop_words: bool = True
    ascii_folding: bool = True
-    ngram_min_length: int = 3
-    ngram_max_length: int = 3
-    prefix_only: bool = False


@dataclass
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -101,9 +101,8 @@ class FullTextOperator(str, Enum):


 class Occur(str, Enum):
-    SHOULD = "SHOULD"
    MUST = "MUST"
-    MUST_NOT = "MUST_NOT"
+    SHOULD = "SHOULD"


@pydantic.dataclasses.dataclass
@@ -182,9 +181,6 @@ class MatchQuery(FullTextQuery):
        Can be either `AND` or `OR`.
        If `AND`, all terms in the query must match.
        If `OR`, at least one term in the query must match.
-    prefix_length : int, optional
-        The number of beginning characters being unchanged for fuzzy matching.
-        This is useful to achieve prefix matching.
    """

    query: str
@@ -193,7 +189,6 @@ class MatchQuery(FullTextQuery):
    fuzziness: int = pydantic.Field(0, kw_only=True)
    max_expansions: int = pydantic.Field(50, kw_only=True)
    operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
-    prefix_length: int = pydantic.Field(0, kw_only=True)

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.MATCH
@@ -1374,8 +1369,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        if query_string is not None and not isinstance(query_string, str):
            raise ValueError("Reranking currently only supports string queries")
        self._str_query = query_string if query_string is not None else self._str_query
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self

    def bypass_vector_index(self) -> LanceVectorQueryBuilder:
@@ -1453,13 +1446,10 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):

        query = self._query
        if self._phrase_query:
-            if isinstance(query, str):
-                if not query.startswith('"') or not query.endswith('"'):
-                    query = f'"{query}"'
-            elif isinstance(query, FullTextQuery) and not isinstance(
-                query, PhraseQuery
-            ):
-                raise TypeError("Please use PhraseQuery for phrase queries.")
+            raise NotImplementedError(
+                "Phrase query is not yet supported in Lance FTS. "
+                "Use tantivy-based index instead for now."
+            )
        query = self.to_query_object()
        results = self._table._execute_query(query, timeout=timeout)
        results = results.read_all()
@@ -1571,8 +1561,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
            The LanceQueryBuilder object.
        """
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self


@@ -1849,8 +1837,6 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):

        self._norm = normalize
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)

        return self

@@ -3048,21 +3034,15 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
        >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Vector Search Plan:
        ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
-          Take: columns="vector, _rowid, _distance, (text)"
-            CoalesceBatchesExec: target_batch_size=1024
-              GlobalLimitExec: skip=0, fetch=10
-                FilterExec: _distance@2 IS NOT NULL
-                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-                    KNNVectorDistance: metric=l2
-                      LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
-        <BLANKLINE>
+            Take: columns="vector, _rowid, _distance, (text)"
+                CoalesceBatchesExec: target_batch_size=1024
+                GlobalLimitExec: skip=0, fetch=10
+                    FilterExec: _distance@2 IS NOT NULL
+                    SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                        KNNVectorDistance: metric=l2
+                        LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
        FTS Search Plan:
-        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
-          Take: columns="_rowid, _score, (vector), (text)"
-            CoalesceBatchesExec: target_batch_size=1024
-              GlobalLimitExec: skip=0, fetch=10
-                MatchQuery: query=hello
-        <BLANKLINE>
+        LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true

        Parameters
        ----------
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -18,7 +18,7 @@ from lancedb._lancedb import (
    UpdateResult,
 )
 from lancedb.embeddings.base import EmbeddingFunctionConfig
-from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
+from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
 from lancedb.remote.db import LOOP
 import pyarrow as pa

@@ -89,7 +89,7 @@ class RemoteTable(Table):

    def to_pandas(self):
        """to_pandas() is not yet supported on LanceDB cloud."""
-        raise NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
+        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

    def checkout(self, version: Union[int, str]):
        return LOOP.run(self._table.checkout(version))
@@ -158,9 +158,6 @@ class RemoteTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        config = FTS(
            with_position=with_position,
@@ -171,9 +168,6 @@ class RemoteTable(Table):
            stem=stem,
            remove_stop_words=remove_stop_words,
            ascii_folding=ascii_folding,
-            ngram_min_length=ngram_min_length,
-            ngram_max_length=ngram_max_length,
-            prefix_only=prefix_only,
        )
        LOOP.run(
            self._table.create_index(
@@ -192,8 +186,6 @@ class RemoteTable(Table):
        accelerator: Optional[str] = None,
        index_type="vector",
        wait_timeout: Optional[timedelta] = None,
-        *,
-        num_bits: int = 8,
    ):
        """Create an index on the table.
        Currently, the only parameters that matter are
@@ -228,6 +220,11 @@ class RemoteTable(Table):
        >>> table.create_index("l2", "vector") # doctest: +SKIP
        """

+        if num_partitions is not None:
+            logging.warning(
+                "num_partitions is not supported on LanceDB cloud."
+                "This parameter will be tuned automatically."
+            )
        if num_sub_vectors is not None:
            logging.warning(
                "num_sub_vectors is not supported on LanceDB cloud."
@@ -247,21 +244,13 @@ class RemoteTable(Table):

        index_type = index_type.upper()
        if index_type == "VECTOR" or index_type == "IVF_PQ":
-            config = IvfPq(
-                distance_type=metric,
-                num_partitions=num_partitions,
-                num_sub_vectors=num_sub_vectors,
-                num_bits=num_bits,
-            )
+            config = IvfPq(distance_type=metric)
        elif index_type == "IVF_HNSW_PQ":
-            raise ValueError(
-                "IVF_HNSW_PQ is not supported on LanceDB cloud."
-                "Please use IVF_HNSW_SQ instead."
-            )
+            config = HnswPq(distance_type=metric)
        elif index_type == "IVF_HNSW_SQ":
-            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
+            config = HnswSq(distance_type=metric)
        elif index_type == "IVF_FLAT":
-            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
+            config = IvfFlat(distance_type=metric)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
--- a/python/python/lancedb/rerankers/answerdotai.py
+++ b/python/python/lancedb/rerankers/answerdotai.py
@@ -74,7 +74,9 @@ class AnswerdotaiRerankers(Reranker):
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
        elif self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
+            raise NotImplementedError(
+                "Answerdotai Reranker does not support score='all' yet"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/base.py
+++ b/python/python/lancedb/rerankers/base.py
@@ -232,39 +232,6 @@ class Reranker(ABC):

        return deduped_table

-    def _merge_and_keep_scores(self, vector_results: pa.Table, fts_results: pa.Table):
-        """
-        Merge the results from the vector and FTS search and keep the scores.
-        This op is slower than just keeping relevance score but can be useful
-        for debugging.
-        """
-        # add nulls to fts results for _distance
-        if "_distance" not in fts_results.column_names:
-            fts_results = fts_results.append_column(
-                "_distance",
-                pa.array([None] * len(fts_results), type=pa.float32()),
-            )
-        # add nulls to vector results for _score
-        if "_score" not in vector_results.column_names:
-            vector_results = vector_results.append_column(
-                "_score",
-                pa.array([None] * len(vector_results), type=pa.float32()),
-            )
-
-        # combine them and fill the scores
-        vector_results_dict = {row["_rowid"]: row for row in vector_results.to_pylist()}
-        fts_results_dict = {row["_rowid"]: row for row in fts_results.to_pylist()}
-
-        # merge them into vector_results
-        for key, value in fts_results_dict.items():
-            if key in vector_results_dict:
-                vector_results_dict[key]["_score"] = value["_score"]
-            else:
-                vector_results_dict[key] = value
-
-        combined = pa.Table.from_pylist(list(vector_results_dict.values()))
-        return combined
-
    def _keep_relevance_score(self, combined_results: pa.Table):
        if self.score == "relevance":
            if "_score" in combined_results.column_names:
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -92,14 +92,14 @@ class CohereReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for cohere reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/cross_encoder.py
+++ b/python/python/lancedb/rerankers/cross_encoder.py
@@ -81,15 +81,15 @@ class CrossEncoderReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        # sort the results by _score
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for CrossEncoderReranker"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/jinaai.py
+++ b/python/python/lancedb/rerankers/jinaai.py
@@ -97,14 +97,14 @@ class JinaReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for JinaReranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/openai.py
+++ b/python/python/lancedb/rerankers/openai.py
@@ -88,13 +88,14 @@ class OpenaiReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
+        elif self.score == "all":
+            raise NotImplementedError(
+                "OpenAI Reranker does not support score='all' yet"
+            )

        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -94,14 +94,14 @@ class VoyageAIReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for voyageai reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -827,7 +827,7 @@ class Table(ABC):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = False,
+        use_tantivy: bool = True,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
@@ -838,9 +838,6 @@ class Table(ABC):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
        wait_timeout: Optional[timedelta] = None,
    ):
        """Create a full-text search index on the table.
@@ -867,7 +864,7 @@ class Table(ABC):
            The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
            language code followed by "_stem". So for english it would be "en_stem".
            For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
-        use_tantivy: bool, default False
+        use_tantivy: bool, default True
            If True, use the legacy full-text search implementation based on tantivy.
            If False, use the new full-text search implementation based on lance-index.
        with_position: bool, default False
@@ -880,7 +877,6 @@ class Table(ABC):
            - "simple": Splits text by whitespace and punctuation.
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
-            - "ngram": N-Gram tokenizer.
        language : str, default "English"
            The language to use for tokenization.
        max_token_length : int, default 40
@@ -898,12 +894,6 @@ class Table(ABC):
        ascii_folding : bool, default True
            Whether to fold ASCII characters. This converts accented characters to
            their ASCII equivalent. For example, "café" would be converted to "cafe".
-        ngram_min_length: int, default 3
-            The minimum length of an n-gram.
-        ngram_max_length: int, default 3
-            The maximum length of an n-gram.
-        prefix_only: bool, default False
-            Whether to only index the prefix of the token for ngram tokenizer.
        wait_timeout: timedelta, optional
            The timeout to wait if indexing is asynchronous.
        """
@@ -1980,7 +1970,7 @@ class LanceTable(Table):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = False,
+        use_tantivy: bool = True,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
@@ -1991,9 +1981,6 @@ class LanceTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        if not use_tantivy:
            if not isinstance(field_names, str):
@@ -2009,9 +1996,6 @@ class LanceTable(Table):
                    "stem": stem,
                    "remove_stop_words": remove_stop_words,
                    "ascii_folding": ascii_folding,
-                    "ngram_min_length": ngram_min_length,
-                    "ngram_max_length": ngram_max_length,
-                    "prefix_only": prefix_only,
                }
            else:
                tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
@@ -2081,9 +2065,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "raw":
            return {
@@ -2094,9 +2075,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "whitespace":
            return {
@@ -2107,9 +2085,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }

        # or it's with language stemming with pattern like "en_stem"
@@ -2128,9 +2103,6 @@ class LanceTable(Table):
            "stem": True,
            "remove_stop_words": False,
            "ascii_folding": False,
-            "ngram_min_length": 3,
-            "ngram_max_length": 3,
-            "prefix_only": False,
        }

    def add(
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -25,4 +25,4 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = Literal["simple", "raw", "whitespace"]
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -6,7 +6,7 @@ import lancedb

 # --8<-- [end:import-lancedb]
 # --8<-- [start:import-numpy]
-from lancedb.query import BooleanQuery, BoostQuery, MatchQuery, Occur
+from lancedb.query import BoostQuery, MatchQuery
 import numpy as np
 import pyarrow as pa

@@ -191,15 +191,6 @@ def test_fts_fuzzy_query():
        "food",  # 1 insertion
    }

-    results = table.search(
-        MatchQuery("foo", "text", fuzziness=1, prefix_length=3)
-    ).to_pandas()
-    assert len(results) == 2
-    assert set(results["text"].to_list()) == {
-        "foo",
-        "food",
-    }
-

@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
@@ -249,60 +240,6 @@ def test_fts_boost_query():
    )


-@pytest.mark.skipif(
-    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
-)
-def test_fts_boolean_query(tmp_path):
-    uri = tmp_path / "boolean-example"
-    db = lancedb.connect(uri)
-    table = db.create_table(
-        "my_table_fts_boolean",
-        data=[
-            {"text": "The cat and dog are playing"},
-            {"text": "The cat is sleeping"},
-            {"text": "The dog is barking"},
-            {"text": "The dog chases the cat"},
-        ],
-        mode="overwrite",
-    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
-
-    # SHOULD
-    results = table.search(
-        MatchQuery("cat", "text") | MatchQuery("dog", "text")
-    ).to_pandas()
-    assert len(results) == 4
-    assert set(results["text"].to_list()) == {
-        "The cat and dog are playing",
-        "The cat is sleeping",
-        "The dog is barking",
-        "The dog chases the cat",
-    }
-    # MUST
-    results = table.search(
-        MatchQuery("cat", "text") & MatchQuery("dog", "text")
-    ).to_pandas()
-    assert len(results) == 2
-    assert set(results["text"].to_list()) == {
-        "The cat and dog are playing",
-        "The dog chases the cat",
-    }
-
-    # MUST NOT
-    results = table.search(
-        BooleanQuery(
-            [
-                (Occur.MUST, MatchQuery("cat", "text")),
-                (Occur.MUST_NOT, MatchQuery("dog", "text")),
-            ]
-        )
-    ).to_pandas()
-    assert len(results) == 1
-    assert set(results["text"].to_list()) == {
-        "The cat is sleeping",
-    }
-
-
@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
 )
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -669,46 +669,3 @@ def test_fts_on_list(mem_db: DBConnection):

    res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
    assert len(res) == 2
-
-
-def test_fts_ngram(mem_db: DBConnection):
-    data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
-    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    # the default min_ngram_length is 3, so "la" should not match
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 0
-
-    # test setting min_ngram_length and prefix_only
-    table.create_fts_index(
-        "text",
-        use_tantivy=False,
-        base_tokenizer="ngram",
-        replace=True,
-        ngram_min_length=2,
-        prefix_only=True,
-    )
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 0
-
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -272,9 +272,7 @@ async def test_distance_range_with_new_rows_async():
    # append more rows so that execution plan would be mixed with ANN & Flat KNN
    new_data = pa.table(
        {
-            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
-                np.random.rand(4, 2) + 1
-            ),
+            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
        }
    )
    await table.add(new_data)
@@ -777,82 +775,6 @@ async def test_explain_plan_async(table_async: AsyncTable):
    assert "KNN" in plan


-@pytest.mark.asyncio
-async def test_explain_plan_fts(table_async: AsyncTable):
-    """Test explain plan for FTS queries"""
-    # Create FTS index
-    from lancedb.index import FTS
-
-    await table_async.create_index("text", config=FTS())
-
-    # Test pure FTS query
-    query = await table_async.search("dog", query_type="fts", fts_columns="text")
-    plan = await query.explain_plan()
-    # Should show FTS details (issue #2465 is now fixed)
-    assert "MatchQuery: query=dog" in plan
-    assert "GlobalLimitExec" in plan  # Default limit
-
-    # Test FTS query with limit
-    query_with_limit = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_with_limit = await query_with_limit.limit(1).explain_plan()
-    assert "MatchQuery: query=dog" in plan_with_limit
-    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
-
-    # Test FTS query with offset and limit
-    query_with_offset = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_with_offset = await query_with_offset.offset(1).limit(1).explain_plan()
-    assert "MatchQuery: query=dog" in plan_with_offset
-    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
-
-
-@pytest.mark.asyncio
-async def test_explain_plan_vector_with_limit_offset(table_async: AsyncTable):
-    """Test explain plan for vector queries with limit and offset"""
-    # Test vector query with limit
-    plan_with_limit = await (
-        table_async.query().nearest_to(pa.array([1, 2])).limit(1).explain_plan()
-    )
-    assert "KNN" in plan_with_limit
-    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
-
-    # Test vector query with offset and limit
-    plan_with_offset = await (
-        table_async.query()
-        .nearest_to(pa.array([1, 2]))
-        .offset(1)
-        .limit(1)
-        .explain_plan()
-    )
-    assert "KNN" in plan_with_offset
-    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
-
-
-@pytest.mark.asyncio
-async def test_explain_plan_with_filters(table_async: AsyncTable):
-    """Test explain plan for queries with filters"""
-    # Test vector query with filter
-    plan_with_filter = await (
-        table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
-    )
-    assert "KNN" in plan_with_filter
-    assert "FilterExec" in plan_with_filter
-
-    # Test FTS query with filter
-    from lancedb.index import FTS
-
-    await table_async.create_index("text", config=FTS())
-    query_fts_filter = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
-    assert "MatchQuery: query=dog" in plan_fts_filter
-    assert "FilterExec: id@" in plan_fts_filter  # Should show filter details
-
-
@pytest.mark.asyncio
 async def test_query_camelcase_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -210,25 +210,6 @@ async def test_retry_error():
        assert cause.status_code == 429


-def test_table_unimplemented_functions():
-    def handler(request):
-        if request.path == "/v1/table/test/create/?mode=create":
-            request.send_response(200)
-            request.send_header("Content-Type", "application/json")
-            request.end_headers()
-            request.wfile.write(b"{}")
-        else:
-            request.send_response(404)
-            request.end_headers()
-
-    with mock_lancedb_connection(handler) as db:
-        table = db.create_table("test", [{"id": 1}])
-        with pytest.raises(NotImplementedError):
-            table.to_arrow()
-        with pytest.raises(NotImplementedError):
-            table.to_pandas()
-
-
 def test_table_add_in_threadpool():
    def handler(request):
        if request.path == "/v1/table/test/insert/":
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -499,19 +499,3 @@ def test_empty_result_reranker():
            .rerank(reranker)
            .to_arrow()
        )
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
-    pytest.importorskip("sentence_transformers")
-    reranker = CrossEncoderReranker(return_score="all")
-    table, schema = get_test_table(tmp_path, use_tantivy)
-    query = "single player experience"
-    result = (
-        table.search(query, query_type="hybrid", vector_column_name="vector")
-        .rerank(reranker=reranker)
-        .to_arrow()
-    )
-    assert "_relevance_score" in result.column_names
-    assert "_score" in result.column_names
-    assert "_distance" in result.column_names
--- a/python/python/tests/test_s3.py
+++ b/python/python/tests/test_s3.py
@@ -245,7 +245,7 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
        NotImplementedError,
        match="Full-text search is only supported on the local filesystem",
    ):
-        table.create_fts_index("x", use_tantivy=True)
+        table.create_fts_index("x")

    # make sure list tables still works
    assert db.table_names() == ["test_ddb_sync"]
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -47,10 +47,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    .max_token_length(params.max_token_length)
                    .remove_stop_words(params.remove_stop_words)
                    .stem(params.stem)
-                    .ascii_folding(params.ascii_folding)
-                    .ngram_min_length(params.ngram_min_length)
-                    .ngram_max_length(params.ngram_max_length)
-                    .ngram_prefix_only(params.prefix_only);
+                    .ascii_folding(params.ascii_folding);
                Ok(LanceDbIndex::FTS(inner_opts))
            },
            "IvfFlat" => {
@@ -133,9 +130,6 @@ struct FtsParams {
    stem: bool,
    remove_stop_words: bool,
    ascii_folding: bool,
-    ngram_min_length: u32,
-    ngram_max_length: u32,
-    prefix_only: bool,
 }

 #[derive(FromPyObject)]
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -50,9 +50,8 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let fuzziness = ob.getattr("fuzziness")?.extract()?;
                let max_expansions = ob.getattr("max_expansions")?.extract()?;
                let operator = ob.getattr("operator")?.extract::<String>()?;
-                let prefix_length = ob.getattr("prefix_length")?.extract()?;

-                Ok(Self(
+                Ok(PyLanceDB(
                    MatchQuery::new(query)
                        .with_column(Some(column))
                        .with_boost(boost)
@@ -61,7 +60,6 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                        .with_operator(Operator::try_from(operator.as_str()).map_err(|e| {
                            PyValueError::new_err(format!("Invalid operator: {}", e))
                        })?)
-                        .with_prefix_length(prefix_length)
                        .into(),
                ))
            }
@@ -70,7 +68,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let column = ob.getattr("column")?.extract()?;
                let slop = ob.getattr("slop")?.extract()?;

-                Ok(Self(
+                Ok(PyLanceDB(
                    PhraseQuery::new(query)
                        .with_column(Some(column))
                        .with_slop(slop)
@@ -78,10 +76,10 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                ))
            }
            "BoostQuery" => {
-                let positive: Self = ob.getattr("positive")?.extract()?;
-                let negative: Self = ob.getattr("negative")?.extract()?;
+                let positive: PyLanceDB<FtsQuery> = ob.getattr("positive")?.extract()?;
+                let negative: PyLanceDB<FtsQuery> = ob.getattr("negative")?.extract()?;
                let negative_boost = ob.getattr("negative_boost")?.extract()?;
-                Ok(Self(
+                Ok(PyLanceDB(
                    BoostQuery::new(positive.0, negative.0, negative_boost).into(),
                ))
            }
@@ -103,17 +101,18 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let op = Operator::try_from(operator.as_str())
                    .map_err(|e| PyValueError::new_err(format!("Invalid operator: {}", e)))?;

-                Ok(Self(q.with_operator(op).into()))
+                Ok(PyLanceDB(q.with_operator(op).into()))
            }
            "BooleanQuery" => {
-                let queries: Vec<(String, Self)> = ob.getattr("queries")?.extract()?;
+                let queries: Vec<(String, PyLanceDB<FtsQuery>)> =
+                    ob.getattr("queries")?.extract()?;
                let mut sub_queries = Vec::with_capacity(queries.len());
                for (occur, q) in queries {
                    let occur = Occur::try_from(occur.as_str())
                        .map_err(|e| PyValueError::new_err(e.to_string()))?;
                    sub_queries.push((occur, q.0));
                }
-                Ok(Self(BooleanQuery::new(sub_queries).into()))
+                Ok(PyLanceDB(BooleanQuery::new(sub_queries).into()))
            }
            name => Err(PyValueError::new_err(format!(
                "Unsupported FTS query type: {}",
@@ -140,8 +139,7 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                kwargs.set_item("boost", query.boost)?;
                kwargs.set_item("fuzziness", query.fuzziness)?;
                kwargs.set_item("max_expansions", query.max_expansions)?;
-                kwargs.set_item::<_, &str>("operator", query.operator.into())?;
-                kwargs.set_item("prefix_length", query.prefix_length)?;
+                kwargs.set_item("operator", operator_to_str(query.operator))?;
                namespace
                    .getattr(intern!(py, "MatchQuery"))?
                    .call((query.terms, query.column.unwrap()), Some(&kwargs))
@@ -154,8 +152,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                    .call((query.terms, query.column.unwrap()), Some(&kwargs))
            }
            FtsQuery::Boost(query) => {
-                let positive = Self(query.positive.as_ref().clone()).into_pyobject(py)?;
-                let negative = Self(query.negative.as_ref().clone()).into_pyobject(py)?;
+                let positive = PyLanceDB(query.positive.as_ref().clone()).into_pyobject(py)?;
+                let negative = PyLanceDB(query.negative.as_ref().clone()).into_pyobject(py)?;
                let kwargs = PyDict::new(py);
                kwargs.set_item("negative_boost", query.negative_boost)?;
                namespace
@@ -171,25 +169,19 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                    .unzip();
                let kwargs = PyDict::new(py);
                kwargs.set_item("boosts", boosts)?;
-                kwargs.set_item::<_, &str>("operator", first.operator.into())?;
+                kwargs.set_item("operator", operator_to_str(first.operator))?;
                namespace
                    .getattr(intern!(py, "MultiMatchQuery"))?
                    .call((first.terms.clone(), columns), Some(&kwargs))
            }
            FtsQuery::Boolean(query) => {
-                let mut queries: Vec<(&str, Bound<'py, PyAny>)> = Vec::with_capacity(
-                    query.should.len() + query.must.len() + query.must_not.len(),
-                );
-                for q in query.should {
-                    queries.push((Occur::Should.into(), Self(q).into_pyobject(py)?));
-                }
+                let mut queries = Vec::with_capacity(query.must.len() + query.should.len());
                for q in query.must {
-                    queries.push((Occur::Must.into(), Self(q).into_pyobject(py)?));
+                    queries.push((occur_to_str(Occur::Must), PyLanceDB(q).into_pyobject(py)?));
                }
-                for q in query.must_not {
-                    queries.push((Occur::MustNot.into(), Self(q).into_pyobject(py)?));
+                for q in query.should {
+                    queries.push((occur_to_str(Occur::Should), PyLanceDB(q).into_pyobject(py)?));
                }
-
                namespace
                    .getattr(intern!(py, "BooleanQuery"))?
                    .call1((queries,))
@@ -198,6 +190,20 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
    }
 }

+fn operator_to_str(op: Operator) -> &'static str {
+    match op {
+        Operator::And => "AND",
+        Operator::Or => "OR",
+    }
+}
+
+fn occur_to_str(occur: Occur) -> &'static str {
+    match occur {
+        Occur::Must => "MUST",
+        Occur::Should => "SHOULD",
+    }
+}
+
 // Python representation of query vector(s)
 #[derive(Clone)]
 pub struct PyQueryVectors(Vec<Arc<dyn Array>>);
@@ -562,10 +568,7 @@ impl FTSQuery {
    }

    pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_
-            .inner
-            .clone()
-            .full_text_search(self_.fts_query.clone());
+        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            inner
                .explain_plan(verbose)
@@ -575,10 +578,7 @@ impl FTSQuery {
    }

    pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_
-            .inner
-            .clone()
-            .full_text_search(self_.fts_query.clone());
+        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            inner
                .analyze_plan()
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.21.1"
+version = "0.20.1-beta.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.21.1"
+version = "0.20.1-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/catalog/listing.rs
+++ b/rust/lancedb/src/catalog/listing.rs
@@ -105,7 +105,7 @@ impl ListingCatalog {
    }

    async fn open_path(path: &str) -> Result<Self> {
-        let (object_store, base_path) = ObjectStore::from_uri(path).await?;
+        let (object_store, base_path) = ObjectStore::from_uri(path).await.unwrap();
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -8,7 +8,7 @@ use std::path::Path;
 use std::{collections::HashMap, sync::Arc};

 use lance::dataset::{ReadParams, WriteMode};
-use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
+use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry, WrappingObjectStore};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
 use lance_table::io::commit::commit_handler_from_url;
@@ -217,9 +217,6 @@ pub struct ListingDatabase {

    // Options for tables created by this connection
    new_table_config: NewTableConfig,
-
-    // Session for object stores and caching
-    session: Arc<lance::session::Session>,
 }

 impl std::fmt::Display for ListingDatabase {
@@ -316,17 +313,13 @@ impl ListingDatabase {

                let plain_uri = url.to_string();

-                let session = Arc::new(lance::session::Session::default());
+                let registry = Arc::new(ObjectStoreRegistry::default());
                let os_params = ObjectStoreParams {
                    storage_options: Some(options.storage_options.clone()),
                    ..Default::default()
                };
-                let (object_store, base_path) = ObjectStore::from_uri_and_params(
-                    session.store_registry(),
-                    &plain_uri,
-                    &os_params,
-                )
-                .await?;
+                let (object_store, base_path) =
+                    ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
                if object_store.is_local() {
                    Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
                }
@@ -349,7 +342,6 @@ impl ListingDatabase {
                    read_consistency_interval: request.read_consistency_interval,
                    storage_options: options.storage_options,
                    new_table_config: options.new_table_config,
-                    session,
                })
            }
            Err(_) => {
@@ -368,13 +360,7 @@ impl ListingDatabase {
        read_consistency_interval: Option<std::time::Duration>,
        new_table_config: NewTableConfig,
    ) -> Result<Self> {
-        let session = Arc::new(lance::session::Session::default());
-        let (object_store, base_path) = ObjectStore::from_uri_and_params(
-            session.store_registry(),
-            path,
-            &ObjectStoreParams::default(),
-        )
-        .await?;
+        let (object_store, base_path) = ObjectStore::from_uri(path).await?;
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
@@ -388,7 +374,6 @@ impl ListingDatabase {
            read_consistency_interval,
            storage_options: HashMap::new(),
            new_table_config,
-            session,
        })
    }

@@ -456,128 +441,6 @@ impl ListingDatabase {
        }
        Ok(())
    }
-
-    /// Inherit storage options from the connection into the target map
-    fn inherit_storage_options(&self, target: &mut HashMap<String, String>) {
-        for (key, value) in self.storage_options.iter() {
-            if !target.contains_key(key) {
-                target.insert(key.clone(), value.clone());
-            }
-        }
-    }
-
-    /// Extract storage option overrides from the request
-    fn extract_storage_overrides(
-        &self,
-        request: &CreateTableRequest,
-    ) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
-        let storage_options = request
-            .write_options
-            .lance_write_params
-            .as_ref()
-            .and_then(|p| p.store_params.as_ref())
-            .and_then(|sp| sp.storage_options.as_ref());
-
-        let storage_version_override = storage_options
-            .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
-            .map(|s| s.parse::<LanceFileVersion>())
-            .transpose()?;
-
-        let v2_manifest_override = storage_options
-            .and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS))
-            .map(|s| s.parse::<bool>())
-            .transpose()
-            .map_err(|_| Error::InvalidInput {
-                message: "enable_v2_manifest_paths must be a boolean".to_string(),
-            })?;
-
-        Ok((storage_version_override, v2_manifest_override))
-    }
-
-    /// Prepare write parameters for table creation
-    fn prepare_write_params(
-        &self,
-        request: &CreateTableRequest,
-        storage_version_override: Option<LanceFileVersion>,
-        v2_manifest_override: Option<bool>,
-    ) -> lance::dataset::WriteParams {
-        let mut write_params = request
-            .write_options
-            .lance_write_params
-            .clone()
-            .unwrap_or_default();
-
-        // Only modify the storage options if we actually have something to
-        // inherit. There is a difference between storage_options=None and
-        // storage_options=Some({}). Using storage_options=None will cause the
-        // connection's session store registry to be used. Supplying Some({})
-        // will cause a new connection to be created, and that connection will
-        // be dropped from the cache when python GCs the table object, which
-        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = write_params
-                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options
-                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        write_params.data_storage_version = self
-            .new_table_config
-            .data_storage_version
-            .or(storage_version_override);
-
-        if let Some(enable_v2_manifest_paths) = self
-            .new_table_config
-            .enable_v2_manifest_paths
-            .or(v2_manifest_override)
-        {
-            write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
-        }
-
-        if matches!(&request.mode, CreateTableMode::Overwrite) {
-            write_params.mode = WriteMode::Overwrite;
-        }
-
-        write_params.session = Some(self.session.clone());
-
-        write_params
-    }
-
-    /// Handle the case where table already exists based on the create mode
-    async fn handle_table_exists(
-        &self,
-        table_name: &str,
-        mode: CreateTableMode,
-        data_schema: &arrow_schema::Schema,
-    ) -> Result<Arc<dyn BaseTable>> {
-        match mode {
-            CreateTableMode::Create => Err(Error::TableAlreadyExists {
-                name: table_name.to_string(),
-            }),
-            CreateTableMode::ExistOk(callback) => {
-                let req = OpenTableRequest {
-                    name: table_name.to_string(),
-                    index_cache_size: None,
-                    lance_read_params: None,
-                };
-                let req = (callback)(req);
-                let table = self.open_table(req).await?;
-
-                let table_schema = table.schema().await?;
-
-                if table_schema.as_ref() != data_schema {
-                    return Err(Error::Schema {
-                        message: "Provided schema does not match existing table schema".to_string(),
-                    });
-                }
-
-                Ok(table)
-            }
-            CreateTableMode::Overwrite => unreachable!(),
-        }
-    }
 }

 #[async_trait::async_trait]
@@ -612,14 +475,50 @@ impl Database for ListingDatabase {
        Ok(f)
    }

-    async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
+    async fn create_table(&self, mut request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
        let table_uri = self.table_uri(&request.name)?;
+        // Inherit storage options from the connection
+        let storage_options = request
+            .write_options
+            .lance_write_params
+            .get_or_insert_with(Default::default)
+            .store_params
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
+        }

-        let (storage_version_override, v2_manifest_override) =
-            self.extract_storage_overrides(&request)?;
+        let storage_options = storage_options.clone();

-        let write_params =
-            self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
+        let mut write_params = request.write_options.lance_write_params.unwrap_or_default();
+
+        if let Some(storage_version) = &self.new_table_config.data_storage_version {
+            write_params.data_storage_version = Some(*storage_version);
+        } else {
+            // Allow the user to override the storage version via storage options (backwards compatibility)
+            if let Some(data_storage_version) = storage_options.get(OPT_NEW_TABLE_STORAGE_VERSION) {
+                write_params.data_storage_version = Some(data_storage_version.parse()?);
+            }
+        }
+        if let Some(enable_v2_manifest_paths) = self.new_table_config.enable_v2_manifest_paths {
+            write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
+        } else {
+            // Allow the user to override the storage version via storage options (backwards compatibility)
+            if let Some(enable_v2_manifest_paths) = storage_options
+                .get(OPT_NEW_TABLE_V2_MANIFEST_PATHS)
+                .map(|s| s.parse::<bool>().unwrap())
+            {
+                write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
+            }
+        }
+
+        if matches!(&request.mode, CreateTableMode::Overwrite) {
+            write_params.mode = WriteMode::Overwrite;
+        }

        let data_schema = request.data.arrow_schema();

@@ -634,10 +533,30 @@ impl Database for ListingDatabase {
        .await
        {
            Ok(table) => Ok(Arc::new(table)),
-            Err(Error::TableAlreadyExists { .. }) => {
-                self.handle_table_exists(&request.name, request.mode, &data_schema)
-                    .await
-            }
+            Err(Error::TableAlreadyExists { name }) => match request.mode {
+                CreateTableMode::Create => Err(Error::TableAlreadyExists { name }),
+                CreateTableMode::ExistOk(callback) => {
+                    let req = OpenTableRequest {
+                        name: request.name.clone(),
+                        index_cache_size: None,
+                        lance_read_params: None,
+                    };
+                    let req = (callback)(req);
+                    let table = self.open_table(req).await?;
+
+                    let table_schema = table.schema().await?;
+
+                    if table_schema != data_schema {
+                        return Err(Error::Schema {
+                            message: "Provided schema does not match existing table schema"
+                                .to_string(),
+                        });
+                    }
+
+                    Ok(table)
+                }
+                CreateTableMode::Overwrite => unreachable!(),
+            },
            Err(err) => Err(err),
        }
    }
@@ -645,22 +564,18 @@ impl Database for ListingDatabase {
    async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
        let table_uri = self.table_uri(&request.name)?;

-        // Only modify the storage options if we actually have something to
-        // inherit. There is a difference between storage_options=None and
-        // storage_options=Some({}). Using storage_options=None will cause the
-        // connection's session store registry to be used. Supplying Some({})
-        // will cause a new connection to be created, and that connection will
-        // be dropped from the cache when python GCs the table object, which
-        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = request
-                .lance_read_params
-                .get_or_insert_with(Default::default)
-                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options
-                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
+        // Inherit storage options from the connection
+        let storage_options = request
+            .lance_read_params
+            .get_or_insert_with(Default::default)
+            .store_options
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
        }

        // Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -669,14 +584,13 @@ impl Database for ListingDatabase {
        // If we have a user provided ReadParams use that
        // If we don't then start with the default ReadParams and customize it with
        // the options from the OpenTableBuilder
-        let mut read_params = request.lance_read_params.unwrap_or_else(|| {
+        let read_params = request.lance_read_params.unwrap_or_else(|| {
            let mut default_params = ReadParams::default();
            if let Some(index_cache_size) = request.index_cache_size {
                default_params.index_cache_size = index_cache_size as usize;
            }
            default_params
        });
-        read_params.session(self.session.clone());

        let native_table = Arc::new(
            NativeTable::open_with_params(
--- a/rust/lancedb/src/io/object_store.rs
+++ b/rust/lancedb/src/io/object_store.rs
@@ -107,7 +107,7 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.delete(location).await
    }

-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
        self.primary.list(prefix)
    }

--- a/rust/lancedb/src/io/object_store/io_tracking.rs
+++ b/rust/lancedb/src/io/object_store/io_tracking.rs
@@ -119,7 +119,7 @@ impl ObjectStore for IoTrackingStore {
        let result = self.target.get(location).await;
        if let Ok(result) = &result {
            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes);
+            self.record_read(num_bytes as u64);
        }
        result
    }
@@ -128,12 +128,12 @@ impl ObjectStore for IoTrackingStore {
        let result = self.target.get_opts(location, options).await;
        if let Ok(result) = &result {
            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes);
+            self.record_read(num_bytes as u64);
        }
        result
    }

-    async fn get_range(&self, location: &Path, range: std::ops::Range<u64>) -> OSResult<Bytes> {
+    async fn get_range(&self, location: &Path, range: std::ops::Range<usize>) -> OSResult<Bytes> {
        let result = self.target.get_range(location, range).await;
        if let Ok(result) = &result {
            self.record_read(result.len() as u64);
@@ -144,7 +144,7 @@ impl ObjectStore for IoTrackingStore {
    async fn get_ranges(
        &self,
        location: &Path,
-        ranges: &[std::ops::Range<u64>],
+        ranges: &[std::ops::Range<usize>],
    ) -> OSResult<Vec<Bytes>> {
        let result = self.target.get_ranges(location, ranges).await;
        if let Ok(result) = &result {
@@ -170,7 +170,7 @@ impl ObjectStore for IoTrackingStore {
        self.target.delete_stream(locations)
    }

-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, OSResult<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, OSResult<ObjectMeta>> {
        self.record_read(0);
        self.target.list(prefix)
    }
@@ -179,7 +179,7 @@ impl ObjectStore for IoTrackingStore {
        &self,
        prefix: Option<&Path>,
        offset: &Path,
-    ) -> BoxStream<'static, OSResult<ObjectMeta>> {
+    ) -> BoxStream<'_, OSResult<ObjectMeta>> {
        self.record_read(0);
        self.target.list_with_offset(prefix, offset)
    }
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -57,8 +57,6 @@ use crate::{
 };

 const REQUEST_TIMEOUT_HEADER: HeaderName = HeaderName::from_static("x-request-timeout-ms");
-const METRIC_TYPE_KEY: &str = "metric_type";
-const INDEX_TYPE_KEY: &str = "index_type";

 pub struct RemoteTags<'a, S: HttpSend = Sender> {
    inner: &'a RemoteTable<S>,
@@ -999,53 +997,23 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            "column": column
        });

-        match index.index {
+        let (index_type, distance_type) = match index.index {
            // TODO: Should we pass the actual index parameters? SaaS does not
            // yet support them.
-            Index::IvfFlat(index) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
-                body[METRIC_TYPE_KEY] =
-                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
-                if let Some(num_partitions) = index.num_partitions {
-                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
-                }
-            }
-            Index::IvfPq(index) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
-                body[METRIC_TYPE_KEY] =
-                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
-                if let Some(num_partitions) = index.num_partitions {
-                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
-                }
-                if let Some(num_bits) = index.num_bits {
-                    body["num_bits"] = serde_json::Value::Number(num_bits.into());
-                }
-            }
-            Index::IvfHnswSq(index) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
-                body[METRIC_TYPE_KEY] =
-                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
-                if let Some(num_partitions) = index.num_partitions {
-                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
-                }
-            }
-            Index::BTree(_) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
-            }
-            Index::Bitmap(_) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
-            }
-            Index::LabelList(_) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
-            }
+            Index::IvfFlat(index) => ("IVF_FLAT", Some(index.distance_type)),
+            Index::IvfPq(index) => ("IVF_PQ", Some(index.distance_type)),
+            Index::IvfHnswSq(index) => ("IVF_HNSW_SQ", Some(index.distance_type)),
+            Index::BTree(_) => ("BTREE", None),
+            Index::Bitmap(_) => ("BITMAP", None),
+            Index::LabelList(_) => ("LABEL_LIST", None),
            Index::FTS(fts) => {
-                body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
                let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
                    message: format!("failed to serialize FTS index params {:?}", e),
                })?;
                for (key, value) in params.as_object().unwrap() {
                    body[key] = value.clone();
                }
+                ("FTS", None)
            }
            Index::Auto => {
                let schema = self.schema().await?;
@@ -1055,11 +1023,9 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                        message: format!("Column {} not found in schema", column),
                    })?;
                if supported_vector_data_type(field.data_type()) {
-                    body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
-                    body[METRIC_TYPE_KEY] =
-                        serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
+                    ("IVF_PQ", Some(DistanceType::L2))
                } else if supported_btree_data_type(field.data_type()) {
-                    body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
+                    ("BTREE", None)
                } else {
                    return Err(Error::NotSupported {
                        message: format!(
@@ -1076,6 +1042,12 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                })
            }
        };
+        body["index_type"] = serde_json::Value::String(index_type.into());
+        if let Some(distance_type) = distance_type {
+            // Phalanx expects this to be lowercase right now.
+            body["metric_type"] =
+                serde_json::Value::String(distance_type.to_string().to_lowercase());
+        }

        let request = request.json(&body);

@@ -1457,12 +1429,11 @@ mod tests {
    use chrono::{DateTime, Utc};
    use futures::{future::BoxFuture, StreamExt, TryFutureExt};
    use lance_index::scalar::inverted::query::MatchQuery;
-    use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
+    use lance_index::scalar::FullTextSearchQuery;
    use reqwest::Body;
    use rstest::rstest;
-    use serde_json::json;

-    use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
+    use crate::index::vector::IvfFlatIndexBuilder;
    use crate::remote::db::DEFAULT_SERVER_VERSION;
    use crate::remote::JSON_CONTENT_TYPE;
    use crate::{
@@ -2347,7 +2318,6 @@ mod tests {
                                "fuzziness": 0,
                                "max_expansions": 50,
                                "operator": "Or",
-                                "prefix_length": 0,
                            },
                        }
                    },
@@ -2462,79 +2432,29 @@ mod tests {
        let cases = [
            (
                "IVF_FLAT",
-                json!({
-                    "metric_type": "hamming",
-                }),
+                Some("hamming"),
                Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
            ),
-            (
-                "IVF_FLAT",
-                json!({
-                    "metric_type": "hamming",
-                    "num_partitions": 128,
-                }),
-                Index::IvfFlat(
-                    IvfFlatIndexBuilder::default()
-                        .distance_type(DistanceType::Hamming)
-                        .num_partitions(128),
-                ),
-            ),
+            ("IVF_PQ", Some("l2"), Index::IvfPq(Default::default())),
            (
                "IVF_PQ",
-                json!({
-                    "metric_type": "l2",
-                }),
-                Index::IvfPq(Default::default()),
-            ),
-            (
-                "IVF_PQ",
-                json!({
-                    "metric_type": "cosine",
-                    "num_partitions": 128,
-                    "num_bits": 4,
-                }),
-                Index::IvfPq(
-                    IvfPqIndexBuilder::default()
-                        .distance_type(DistanceType::Cosine)
-                        .num_partitions(128)
-                        .num_bits(4),
-                ),
+                Some("cosine"),
+                Index::IvfPq(IvfPqIndexBuilder::default().distance_type(DistanceType::Cosine)),
            ),
            (
                "IVF_HNSW_SQ",
-                json!({
-                    "metric_type": "l2",
-                }),
+                Some("l2"),
                Index::IvfHnswSq(Default::default()),
            ),
-            (
-                "IVF_HNSW_SQ",
-                json!({
-                    "metric_type": "l2",
-                    "num_partitions": 128,
-                }),
-                Index::IvfHnswSq(
-                    IvfHnswSqIndexBuilder::default()
-                        .distance_type(DistanceType::L2)
-                        .num_partitions(128),
-                ),
-            ),
            // HNSW_PQ isn't yet supported on SaaS
-            ("BTREE", json!({}), Index::BTree(Default::default())),
-            ("BITMAP", json!({}), Index::Bitmap(Default::default())),
-            (
-                "LABEL_LIST",
-                json!({}),
-                Index::LabelList(Default::default()),
-            ),
-            (
-                "FTS",
-                serde_json::to_value(InvertedIndexParams::default()).unwrap(),
-                Index::FTS(Default::default()),
-            ),
+            ("BTREE", None, Index::BTree(Default::default())),
+            ("BITMAP", None, Index::Bitmap(Default::default())),
+            ("LABEL_LIST", None, Index::LabelList(Default::default())),
+            ("FTS", None, Index::FTS(Default::default())),
        ];

-        for (index_type, expected_body, index) in cases {
+        for (index_type, distance_type, index) in cases {
+            let params = index.clone();
            let table = Table::new_with_handler("my_table", move |request| {
                assert_eq!(request.method(), "POST");
                assert_eq!(request.url().path(), "/v1/table/my_table/create_index/");
@@ -2544,9 +2464,19 @@ mod tests {
                );
                let body = request.body().unwrap().as_bytes().unwrap();
                let body: serde_json::Value = serde_json::from_slice(body).unwrap();
-                let mut expected_body = expected_body.clone();
-                expected_body["column"] = "a".into();
-                expected_body[INDEX_TYPE_KEY] = index_type.into();
+                let mut expected_body = serde_json::json!({
+                    "column": "a",
+                    "index_type": index_type,
+                });
+                if let Some(distance_type) = distance_type {
+                    expected_body["metric_type"] = distance_type.to_lowercase().into();
+                }
+                if let Index::FTS(fts) = &params {
+                    let params = serde_json::to_value(fts).unwrap();
+                    for (key, value) in params.as_object().unwrap() {
+                        expected_body[key] = value.clone();
+                    }
+                }

                assert_eq!(body, expected_body);

--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -392,18 +392,9 @@ pub mod tests {
                } else {
                    expected_line.trim()
                };
-                assert_eq!(
-                    &actual_trimmed[..expected_trimmed.len()],
-                    expected_trimmed,
-                    "\nactual:\n{physical_plan}\nexpected:\n{expected}"
-                );
+                assert_eq!(&actual_trimmed[..expected_trimmed.len()], expected_trimmed);
            }
-            assert_eq!(
-                lines_checked,
-                expected.lines().count(),
-                "\nlines_checked:\n{lines_checked}\nexpected:\n{}",
-                expected.lines().count()
-            );
+            assert_eq!(lines_checked, expected.lines().count());
        }
    }

@@ -486,9 +477,9 @@ pub mod tests {
        TestFixture::check_plan(
            plan,
            "MetadataEraserExec
+             RepartitionExec:...
             CoalesceBatchesExec:...
             FilterExec: i@0 >= 5
-             RepartitionExec:...
             ProjectionExec:...
             LanceScan:...",
        )
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -129,9 +129,7 @@ impl DatasetRef {
                dataset: ref mut ds,
                ..
            } => {
-                if dataset.manifest().version > ds.manifest().version {
-                    *ds = dataset;
-                }
+                *ds = dataset;
            }
            _ => unreachable!("Dataset should be in latest mode at this point"),
        }
--- a/rust/lancedb/tests/object_store_test.rs
+++ b/rust/lancedb/tests/object_store_test.rs
@@ -281,46 +281,6 @@ async fn test_encryption() -> Result<()> {
    Ok(())
 }

-#[tokio::test]
-async fn test_table_storage_options_override() -> Result<()> {
-    // Test that table-level storage options override connection-level options
-    let bucket = S3Bucket::new("test-override").await;
-    let key1 = KMSKey::new().await;
-    let key2 = KMSKey::new().await;
-
-    let uri = format!("s3://{}", bucket.0);
-
-    // Create connection with key1 encryption
-    let db = lancedb::connect(&uri)
-        .storage_options(CONFIG.iter().cloned())
-        .storage_option("aws_server_side_encryption", "aws:kms")
-        .storage_option("aws_sse_kms_key_id", &key1.0)
-        .execute()
-        .await?;
-
-    // Create table overriding with key2 encryption
-    let data = test_data();
-    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
-    let _table = db
-        .create_table("test_override", data)
-        .storage_option("aws_sse_kms_key_id", &key2.0)
-        .execute()
-        .await?;
-
-    // Verify objects are encrypted with key2, not key1
-    validate_objects_encrypted(&bucket.0, "test_override", &key2.0).await;
-
-    // Also test that a table created without override uses connection settings
-    let data = test_data();
-    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
-    let _table2 = db.create_table("test_inherit", data).execute().await?;
-
-    // Verify this table uses key1 from connection
-    validate_objects_encrypted(&bucket.0, "test_inherit", &key1.0).await;
-
-    Ok(())
-}
-
 struct DynamoDBCommitTable(String);

 impl DynamoDBCommitTable {