Bump version: 0.24.0 → 0.24.1-beta.0

chore: update lance to 31.1-beta.2 (#2487 )
chore: upgrade lance to 0.31.1-beta.1 (#2486 )
2025-12-23 13:29:57 +00:00 · 2025-07-07 21:00:38 +00:00 · 2025-07-07 12:53:16 -07:00 · 2025-07-07 22:16:43 +08:00 · 2025-07-01 08:29:37 -07:00 · 2025-06-30 11:10:53 -07:00
42 changed files with 793 additions and 649 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.20.1-beta.0"
+current_version = "0.21.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -541,6 +541,8 @@ jobs:
        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
      - name: Checkout
        uses: actions/checkout@v4
+        with:
+          ref: main
      - name: Update package-lock.json
        run: |
          git config user.name 'Lance Release'
@@ -548,6 +550,9 @@ jobs:
          bash ci/update_lockfiles.sh
      - name: Push new commit
        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
+          branch: main
      - name: Notify Slack Action
        uses: ravsamhq/notify-slack-action@2.3.0
        if: ${{ always() }}
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,14 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.29.1", "features" = ["dynamodb"], tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-io = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-index = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-linalg = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-table = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-testing = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-datafusion = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
-lance-encoding = { version = "=0.29.1", tag = "v0.29.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git", features = ["dynamodb"] }
+lance-io = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -39,20 +39,20 @@ arrow-schema = "55.1"
 arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "47.0", default-features = false }
-datafusion-catalog = "47.0"
-datafusion-common = { version = "47.0", default-features = false }
-datafusion-execution = "47.0"
-datafusion-expr = "47.0"
-datafusion-physical-plan = "47.0"
+datafusion = { version = "48.0", default-features = false }
+datafusion-catalog = "48.0"
+datafusion-common = { version = "48.0", default-features = false }
+datafusion-execution = "48.0"
+datafusion-expr = "48.0"
+datafusion-physical-plan = "48.0"
 env_logger = "0.11"
-half = { "version" = "=2.5.0", default-features = false, features = [
+half = { "version" = "=2.6.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.11.0"
+object_store = "0.12.0"
 pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
+        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.20.1-beta.0</version>
+        <version>0.21.0-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.20.1-beta.0</version>
+    <version>0.21.0-final.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.20.1-beta.0",
+  "version": "0.21.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.20.1-beta.0",
+      "version": "0.21.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.20.1-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.0"
+        "@lancedb/vectordb-darwin-arm64": "0.21.0",
+        "@lancedb/vectordb-darwin-x64": "0.21.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.21.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.21.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.21.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.20.1-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.1-beta.0.tgz",
-      "integrity": "sha512-EZl1nvF/2MbLkB8DkNPg+9SpYWpqnNR9kY5a1JWtNWQWw735oT2VPnH3B2htDKU42gJ/9DJGBdEvIJwzeHT85w==",
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.0.tgz",
+      "integrity": "sha512-FTKbdYG36mvQ75tId+esyRfRjIBzryRhAp/6h51tiXy8gsq/TButuiPdqIXeonNModEjhu8wkzsGFwgjCcePow==",
      "cpu": [
        "arm64"
      ],
@@ -339,9 +339,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.20.1-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.1-beta.0.tgz",
-      "integrity": "sha512-1ZkMcsXsysLRohAeHGpbytVHUp4yEU89A34rrh48vcQUNvYtqxbAw+TLjAbN0vvNvOZOI4DRllxSL1O+Dbybbg==",
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.0.tgz",
+      "integrity": "sha512-vGaFBr2sQZWE0mudg3LGTHiRE7p2Qce2ogiE2VAf1DLAJ4MrIhgVmEttf966ausIwNCgml+5AzUntw6zC0Oyuw==",
      "cpu": [
        "x64"
      ],
@@ -351,9 +351,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.20.1-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.1-beta.0.tgz",
-      "integrity": "sha512-CxjSGaLJNRYxljdrC8MSirnHu73jctv3S3Q90CbsWMsij9za87zvnrjoiRIn7kv7UNS4ArwS9yyH6gNorCBf6Q==",
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.0.tgz",
+      "integrity": "sha512-KlxqhnX4eBN6rDqrPgf/x/vLpnHK2UcIzNLpiOZzSAhooCmKmnNpfs/EXt+KRFloEQMy25AHpMpqkSPv1Q2oDA==",
      "cpu": [
        "arm64"
      ],
@@ -363,9 +363,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.20.1-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.1-beta.0.tgz",
-      "integrity": "sha512-WI2XWYYO5ygL0Az7SlX98VpNqrz8hKuTK/xC/PoM99s1xnfcCukM28DaDGZJpXOGnLbVnexcO2RW4daJ2xDPaQ==",
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.0.tgz",
+      "integrity": "sha512-t7dkFV6kga3rqXR1rH460GdpSVuY0tw7CIc0KqsIIkBcXzUPA1n0QDoazdwPQ1MXzG/+F5WWCTp3dYWx2vP0Lw==",
      "cpu": [
        "x64"
      ],
@@ -375,9 +375,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.20.1-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.1-beta.0.tgz",
-      "integrity": "sha512-Mxd7V3Y8whEBoQFQZhZGFQi0avq8ujHRI2c0LhjhYTdwGylrBS3bfGD+/nbDGhAjp7dp5U8P4kiBi30QNwoedA==",
+      "version": "0.21.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.0.tgz",
+      "integrity": "sha512-yovkW61RECBTsu0S527BX1uW0jCAZK9MAsJTknXmDjp78figx4/AyI5ajT63u/Uo4EKoheeNiiLdyU4v+A9YVw==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.20.1-beta.0",
+  "version": "0.21.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.20.1-beta.0",
-    "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.0",
-    "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.0",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.0",
-    "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.0"
+    "@lancedb/vectordb-darwin-x64": "0.21.0",
+    "@lancedb/vectordb-darwin-arm64": "0.21.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.21.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.21.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.21.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.20.1-beta.0"
+version = "0.21.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -592,14 +592,14 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        ).rejects.toThrow("column vector was missing");
      });

-      it("will provide a nice error if run twice", async function () {
+      it("will skip embedding application if already applied", async function () {
        const records = sampleRecords();
        const table = await convertToTable(records, dummyEmbeddingConfig);

        // fromTableToBuffer will try and apply the embeddings again
-        await expect(
-          fromTableToBuffer(table, dummyEmbeddingConfig),
-        ).rejects.toThrow("already existed");
+        // but should skip since the column already has non-null values
+        const result = await fromTableToBuffer(table, dummyEmbeddingConfig);
+        expect(result.byteLength).toBeGreaterThan(0);
      });
    });

--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -368,9 +368,9 @@ describe("merge insert", () => {
      { a: 4, b: "z" },
    ];

-    expect(
-      JSON.parse(JSON.stringify((await table.toArrow()).toArray())),
-    ).toEqual(expected);
+    const result = (await table.toArrow()).toArray().sort((a, b) => a.a - b.a);
+
+    expect(result.map((row) => ({ ...row }))).toEqual(expected);
  });
  test("conditional update", async () => {
    const newData = [
@@ -1650,13 +1650,25 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(resultSet.has("fob")).toBe(true);
      expect(resultSet.has("fo")).toBe(true);
      expect(resultSet.has("food")).toBe(true);
+
+      const prefixResults = await table
+        .search(
+          new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
+        )
+        .toArray();
+      expect(prefixResults.length).toBe(2);
+      const resultSet2 = new Set(prefixResults.map((r) => r.text));
+      expect(resultSet2.has("foo")).toBe(true);
+      expect(resultSet2.has("food")).toBe(true);
    });

    test("full text search boolean query", async () => {
      const db = await connect(tmpDir.name);
      const data = [
-        { text: "hello world", vector: [0.1, 0.2, 0.3] },
-        { text: "goodbye world", vector: [0.4, 0.5, 0.6] },
+        { text: "The cat and dog are playing" },
+        { text: "The cat is sleeping" },
+        { text: "The dog is barking" },
+        { text: "The dog chases the cat" },
      ];
      const table = await db.createTable("test", data);
      await table.createIndex("text", {
@@ -1666,22 +1678,32 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const shouldResults = await table
        .search(
          new BooleanQuery([
-            [Occur.Should, new MatchQuery("hello", "text")],
-            [Occur.Should, new MatchQuery("goodbye", "text")],
+            [Occur.Should, new MatchQuery("cat", "text")],
+            [Occur.Should, new MatchQuery("dog", "text")],
          ]),
        )
        .toArray();
-      expect(shouldResults.length).toBe(2);
+      expect(shouldResults.length).toBe(4);

      const mustResults = await table
        .search(
          new BooleanQuery([
-            [Occur.Must, new MatchQuery("hello", "text")],
-            [Occur.Must, new MatchQuery("world", "text")],
+            [Occur.Must, new MatchQuery("cat", "text")],
+            [Occur.Must, new MatchQuery("dog", "text")],
          ]),
        )
        .toArray();
-      expect(mustResults.length).toBe(1);
+      expect(mustResults.length).toBe(2);
+
+      const mustNotResults = await table
+        .search(
+          new BooleanQuery([
+            [Occur.Must, new MatchQuery("cat", "text")],
+            [Occur.MustNot, new MatchQuery("dog", "text")],
+          ]),
+        )
+        .toArray();
+      expect(mustNotResults.length).toBe(1);
    });

    test.each([
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -417,7 +417,9 @@ function inferSchema(
        } else {
          const inferredType = inferType(value, path, opts);
          if (inferredType === undefined) {
-            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
+            throw new Error(`Failed to infer data type for field ${path.join(
+              ".",
+            )} at row ${rowI}. \
                             Consider providing an explicit schema.`);
          }
          pathTree.set(path, inferredType);
@@ -799,11 +801,17 @@ async function applyEmbeddingsFromMetadata(
        `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
      );
    }
+
+    // Check if destination column exists and handle accordingly
    if (columns[destColumn] !== undefined) {
-      throw new Error(
-        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
-      );
+      const existingColumn = columns[destColumn];
+      // If the column exists but is all null, we can fill it with embeddings
+      if (existingColumn.nullCount !== existingColumn.length) {
+        // Column has non-null values, skip embedding application
+        continue;
+      }
    }
+
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -903,11 +911,23 @@ async function applyEmbeddings<T>(
      );
    }
  } else {
+    // Check if destination column exists and handle accordingly
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      throw new Error(
-        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
-      );
+      const existingColumn = newColumns[destColumn];
+      // If the column exists but is all null, we can fill it with embeddings
+      if (existingColumn.nullCount !== existingColumn.length) {
+        // Column has non-null values, skip embedding application and return table as-is
+        let newTable = new ArrowTable(newColumns);
+        if (schema != null) {
+          newTable = alignTable(newTable, schema as Schema);
+        }
+        return new ArrowTable(
+          new Schema(newTable.schema.fields, schemaMetadata),
+          newTable.batches,
+        );
+      }
    }
+
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -812,10 +812,12 @@ export enum Operator {
 *
 * - `Must`: The term must be present in the document.
 * - `Should`: The term should contribute to the document score, but is not required.
+ * - `MustNot`: The term must not be present in the document.
 */
 export enum Occur {
-  Must = "MUST",
  Should = "SHOULD",
+  Must = "MUST",
+  MustNot = "MUST_NOT",
 }

 /**
@@ -856,6 +858,7 @@ export class MatchQuery implements FullTextQuery {
   *   - `fuzziness`: The fuzziness level for the query (default is 0).
   *   - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
   *   - `operator`: The logical operator to use for combining terms in the query (default is "OR").
+   *   - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
   */
  constructor(
    query: string,
@@ -865,6 +868,7 @@ export class MatchQuery implements FullTextQuery {
      fuzziness?: number;
      maxExpansions?: number;
      operator?: Operator;
+      prefixLength?: number;
    },
  ) {
    let fuzziness = options?.fuzziness;
@@ -878,6 +882,7 @@ export class MatchQuery implements FullTextQuery {
      fuzziness,
      options?.maxExpansions ?? 50,
      options?.operator ?? Operator.Or,
+      options?.prefixLength ?? 0,
    );
  }

--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.20.1-beta.0",
+  "version": "0.21.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.20.1-beta.0",
+	"version": "0.21.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.20.1-beta.0",
+  "version": "0.21.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.20.1-beta.0",
+      "version": "0.21.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.20.1-beta.0",
+  "version": "0.21.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -335,6 +335,7 @@ impl JsFullTextQuery {
        fuzziness: Option<u32>,
        max_expansions: u32,
        operator: String,
+        prefix_length: u32,
    ) -> napi::Result<Self> {
        Ok(Self {
            inner: MatchQuery::new(query)
@@ -347,6 +348,7 @@ impl JsFullTextQuery {
                        napi::Error::from_reason(format!("Invalid operator: {}", e))
                    })?,
                )
+                .with_prefix_length(prefix_length)
                .into(),
        })
    }
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.23.1-beta.1"
+current_version = "0.24.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.23.1-beta.1"
+version = "0.24.1-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,7 +85,7 @@ embeddings = [
    "boto3>=1.28.57",
    "awscli>=1.29.57",
    "botocore>=1.31.57",
-    "ollama",
+    "ollama>=0.3.0",
    "ibm-watsonx-ai>=1.1.2",
 ]
 azure = ["adlfs>=2024.2.0"]
--- a/python/python/lancedb/embeddings/ollama.py
+++ b/python/python/lancedb/embeddings/ollama.py
@@ -2,14 +2,15 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 from functools import cached_property
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Sequence, Union
+
+import numpy as np

 from ..util import attempt_import_or_raise
 from .base import TextEmbeddingFunction
 from .registry import register

 if TYPE_CHECKING:
-    import numpy as np
    import ollama


@@ -28,23 +29,21 @@ class OllamaEmbeddings(TextEmbeddingFunction):
    keep_alive: Optional[Union[float, str]] = None
    ollama_client_kwargs: Optional[dict] = {}

-    def ndims(self):
+    def ndims(self) -> int:
        return len(self.generate_embeddings(["foo"])[0])

-    def _compute_embedding(self, text) -> Union["np.array", None]:
-        return (
-            self._ollama_client.embeddings(
-                model=self.name,
-                prompt=text,
-                options=self.options,
-                keep_alive=self.keep_alive,
-            )["embedding"]
-            or None
+    def _compute_embedding(self, text: Sequence[str]) -> Sequence[Sequence[float]]:
+        response = self._ollama_client.embed(
+            model=self.name,
+            input=text,
+            options=self.options,
+            keep_alive=self.keep_alive,
        )
+        return response.embeddings

    def generate_embeddings(
-        self, texts: Union[List[str], "np.ndarray"]
-    ) -> list[Union["np.array", None]]:
+        self, texts: Union[List[str], np.ndarray]
+    ) -> list[Union[np.array, None]]:
        """
        Get the embeddings for the given texts

@@ -54,8 +53,8 @@ class OllamaEmbeddings(TextEmbeddingFunction):
            The texts to embed
        """
        # TODO retry, rate limit, token limit
-        embeddings = [self._compute_embedding(text) for text in texts]
-        return embeddings
+        embeddings = self._compute_embedding(texts)
+        return list(embeddings)

    @cached_property
    def _ollama_client(self) -> "ollama.Client":
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -101,8 +101,9 @@ class FullTextOperator(str, Enum):


 class Occur(str, Enum):
-    MUST = "MUST"
    SHOULD = "SHOULD"
+    MUST = "MUST"
+    MUST_NOT = "MUST_NOT"


@pydantic.dataclasses.dataclass
@@ -181,6 +182,9 @@ class MatchQuery(FullTextQuery):
        Can be either `AND` or `OR`.
        If `AND`, all terms in the query must match.
        If `OR`, at least one term in the query must match.
+    prefix_length : int, optional
+        The number of beginning characters being unchanged for fuzzy matching.
+        This is useful to achieve prefix matching.
    """

    query: str
@@ -189,6 +193,7 @@ class MatchQuery(FullTextQuery):
    fuzziness: int = pydantic.Field(0, kw_only=True)
    max_expansions: int = pydantic.Field(50, kw_only=True)
    operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
+    prefix_length: int = pydantic.Field(0, kw_only=True)

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.MATCH
@@ -1446,10 +1451,13 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):

        query = self._query
        if self._phrase_query:
-            raise NotImplementedError(
-                "Phrase query is not yet supported in Lance FTS. "
-                "Use tantivy-based index instead for now."
-            )
+            if isinstance(query, str):
+                if not query.startswith('"') or not query.endswith('"'):
+                    query = f'"{query}"'
+            elif isinstance(query, FullTextQuery) and not isinstance(
+                query, PhraseQuery
+            ):
+                raise TypeError("Please use PhraseQuery for phrase queries.")
        query = self.to_query_object()
        results = self._table._execute_query(query, timeout=timeout)
        results = results.read_all()
@@ -3034,15 +3042,21 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
        >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Vector Search Plan:
        ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
-            Take: columns="vector, _rowid, _distance, (text)"
-                CoalesceBatchesExec: target_batch_size=1024
-                GlobalLimitExec: skip=0, fetch=10
-                    FilterExec: _distance@2 IS NOT NULL
-                    SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-                        KNNVectorDistance: metric=l2
-                        LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+          Take: columns="vector, _rowid, _distance, (text)"
+            CoalesceBatchesExec: target_batch_size=1024
+              GlobalLimitExec: skip=0, fetch=10
+                FilterExec: _distance@2 IS NOT NULL
+                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                    KNNVectorDistance: metric=l2
+                      LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+        <BLANKLINE>
        FTS Search Plan:
-        LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true
+        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
+          Take: columns="_rowid, _score, (vector), (text)"
+            CoalesceBatchesExec: target_batch_size=1024
+              GlobalLimitExec: skip=0, fetch=10
+                MatchQuery: query=hello
+        <BLANKLINE>

        Parameters
        ----------
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -827,7 +827,7 @@ class Table(ABC):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = True,
+        use_tantivy: bool = False,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
@@ -864,7 +864,7 @@ class Table(ABC):
            The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
            language code followed by "_stem". So for english it would be "en_stem".
            For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
-        use_tantivy: bool, default True
+        use_tantivy: bool, default False
            If True, use the legacy full-text search implementation based on tantivy.
            If False, use the new full-text search implementation based on lance-index.
        with_position: bool, default False
@@ -1970,7 +1970,7 @@ class LanceTable(Table):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = True,
+        use_tantivy: bool = False,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -6,7 +6,7 @@ import lancedb

 # --8<-- [end:import-lancedb]
 # --8<-- [start:import-numpy]
-from lancedb.query import BoostQuery, MatchQuery
+from lancedb.query import BooleanQuery, BoostQuery, MatchQuery, Occur
 import numpy as np
 import pyarrow as pa

@@ -191,6 +191,15 @@ def test_fts_fuzzy_query():
        "food",  # 1 insertion
    }

+    results = table.search(
+        MatchQuery("foo", "text", fuzziness=1, prefix_length=3)
+    ).to_pandas()
+    assert len(results) == 2
+    assert set(results["text"].to_list()) == {
+        "foo",
+        "food",
+    }
+

@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
@@ -240,6 +249,60 @@ def test_fts_boost_query():
    )


+@pytest.mark.skipif(
+    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
+)
+def test_fts_boolean_query(tmp_path):
+    uri = tmp_path / "boolean-example"
+    db = lancedb.connect(uri)
+    table = db.create_table(
+        "my_table_fts_boolean",
+        data=[
+            {"text": "The cat and dog are playing"},
+            {"text": "The cat is sleeping"},
+            {"text": "The dog is barking"},
+            {"text": "The dog chases the cat"},
+        ],
+        mode="overwrite",
+    )
+    table.create_fts_index("text", use_tantivy=False, replace=True)
+
+    # SHOULD
+    results = table.search(
+        MatchQuery("cat", "text") | MatchQuery("dog", "text")
+    ).to_pandas()
+    assert len(results) == 4
+    assert set(results["text"].to_list()) == {
+        "The cat and dog are playing",
+        "The cat is sleeping",
+        "The dog is barking",
+        "The dog chases the cat",
+    }
+    # MUST
+    results = table.search(
+        MatchQuery("cat", "text") & MatchQuery("dog", "text")
+    ).to_pandas()
+    assert len(results) == 2
+    assert set(results["text"].to_list()) == {
+        "The cat and dog are playing",
+        "The dog chases the cat",
+    }
+
+    # MUST NOT
+    results = table.search(
+        BooleanQuery(
+            [
+                (Occur.MUST, MatchQuery("cat", "text")),
+                (Occur.MUST_NOT, MatchQuery("dog", "text")),
+            ]
+        )
+    ).to_pandas()
+    assert len(results) == 1
+    assert set(results["text"].to_list()) == {
+        "The cat is sleeping",
+    }
+
+
@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
 )
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -775,6 +775,82 @@ async def test_explain_plan_async(table_async: AsyncTable):
    assert "KNN" in plan


+@pytest.mark.asyncio
+async def test_explain_plan_fts(table_async: AsyncTable):
+    """Test explain plan for FTS queries"""
+    # Create FTS index
+    from lancedb.index import FTS
+
+    await table_async.create_index("text", config=FTS())
+
+    # Test pure FTS query
+    query = await table_async.search("dog", query_type="fts", fts_columns="text")
+    plan = await query.explain_plan()
+    # Should show FTS details (issue #2465 is now fixed)
+    assert "MatchQuery: query=dog" in plan
+    assert "GlobalLimitExec" in plan  # Default limit
+
+    # Test FTS query with limit
+    query_with_limit = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_with_limit = await query_with_limit.limit(1).explain_plan()
+    assert "MatchQuery: query=dog" in plan_with_limit
+    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
+
+    # Test FTS query with offset and limit
+    query_with_offset = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_with_offset = await query_with_offset.offset(1).limit(1).explain_plan()
+    assert "MatchQuery: query=dog" in plan_with_offset
+    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
+
+
+@pytest.mark.asyncio
+async def test_explain_plan_vector_with_limit_offset(table_async: AsyncTable):
+    """Test explain plan for vector queries with limit and offset"""
+    # Test vector query with limit
+    plan_with_limit = await (
+        table_async.query().nearest_to(pa.array([1, 2])).limit(1).explain_plan()
+    )
+    assert "KNN" in plan_with_limit
+    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
+
+    # Test vector query with offset and limit
+    plan_with_offset = await (
+        table_async.query()
+        .nearest_to(pa.array([1, 2]))
+        .offset(1)
+        .limit(1)
+        .explain_plan()
+    )
+    assert "KNN" in plan_with_offset
+    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
+
+
+@pytest.mark.asyncio
+async def test_explain_plan_with_filters(table_async: AsyncTable):
+    """Test explain plan for queries with filters"""
+    # Test vector query with filter
+    plan_with_filter = await (
+        table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
+    )
+    assert "KNN" in plan_with_filter
+    assert "FilterExec" in plan_with_filter
+
+    # Test FTS query with filter
+    from lancedb.index import FTS
+
+    await table_async.create_index("text", config=FTS())
+    query_fts_filter = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
+    assert "MatchQuery: query=dog" in plan_fts_filter
+    assert "FilterExec: id@" in plan_fts_filter  # Should show filter details
+
+
@pytest.mark.asyncio
 async def test_query_camelcase_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_s3.py
+++ b/python/python/tests/test_s3.py
@@ -245,7 +245,7 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
        NotImplementedError,
        match="Full-text search is only supported on the local filesystem",
    ):
-        table.create_fts_index("x")
+        table.create_fts_index("x", use_tantivy=True)

    # make sure list tables still works
    assert db.table_names() == ["test_ddb_sync"]
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -50,8 +50,9 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let fuzziness = ob.getattr("fuzziness")?.extract()?;
                let max_expansions = ob.getattr("max_expansions")?.extract()?;
                let operator = ob.getattr("operator")?.extract::<String>()?;
+                let prefix_length = ob.getattr("prefix_length")?.extract()?;

-                Ok(PyLanceDB(
+                Ok(Self(
                    MatchQuery::new(query)
                        .with_column(Some(column))
                        .with_boost(boost)
@@ -60,6 +61,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                        .with_operator(Operator::try_from(operator.as_str()).map_err(|e| {
                            PyValueError::new_err(format!("Invalid operator: {}", e))
                        })?)
+                        .with_prefix_length(prefix_length)
                        .into(),
                ))
            }
@@ -68,7 +70,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let column = ob.getattr("column")?.extract()?;
                let slop = ob.getattr("slop")?.extract()?;

-                Ok(PyLanceDB(
+                Ok(Self(
                    PhraseQuery::new(query)
                        .with_column(Some(column))
                        .with_slop(slop)
@@ -76,10 +78,10 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                ))
            }
            "BoostQuery" => {
-                let positive: PyLanceDB<FtsQuery> = ob.getattr("positive")?.extract()?;
-                let negative: PyLanceDB<FtsQuery> = ob.getattr("negative")?.extract()?;
+                let positive: Self = ob.getattr("positive")?.extract()?;
+                let negative: Self = ob.getattr("negative")?.extract()?;
                let negative_boost = ob.getattr("negative_boost")?.extract()?;
-                Ok(PyLanceDB(
+                Ok(Self(
                    BoostQuery::new(positive.0, negative.0, negative_boost).into(),
                ))
            }
@@ -101,18 +103,17 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
                let op = Operator::try_from(operator.as_str())
                    .map_err(|e| PyValueError::new_err(format!("Invalid operator: {}", e)))?;

-                Ok(PyLanceDB(q.with_operator(op).into()))
+                Ok(Self(q.with_operator(op).into()))
            }
            "BooleanQuery" => {
-                let queries: Vec<(String, PyLanceDB<FtsQuery>)> =
-                    ob.getattr("queries")?.extract()?;
+                let queries: Vec<(String, Self)> = ob.getattr("queries")?.extract()?;
                let mut sub_queries = Vec::with_capacity(queries.len());
                for (occur, q) in queries {
                    let occur = Occur::try_from(occur.as_str())
                        .map_err(|e| PyValueError::new_err(e.to_string()))?;
                    sub_queries.push((occur, q.0));
                }
-                Ok(PyLanceDB(BooleanQuery::new(sub_queries).into()))
+                Ok(Self(BooleanQuery::new(sub_queries).into()))
            }
            name => Err(PyValueError::new_err(format!(
                "Unsupported FTS query type: {}",
@@ -139,7 +140,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                kwargs.set_item("boost", query.boost)?;
                kwargs.set_item("fuzziness", query.fuzziness)?;
                kwargs.set_item("max_expansions", query.max_expansions)?;
-                kwargs.set_item("operator", operator_to_str(query.operator))?;
+                kwargs.set_item::<_, &str>("operator", query.operator.into())?;
+                kwargs.set_item("prefix_length", query.prefix_length)?;
                namespace
                    .getattr(intern!(py, "MatchQuery"))?
                    .call((query.terms, query.column.unwrap()), Some(&kwargs))
@@ -152,8 +154,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                    .call((query.terms, query.column.unwrap()), Some(&kwargs))
            }
            FtsQuery::Boost(query) => {
-                let positive = PyLanceDB(query.positive.as_ref().clone()).into_pyobject(py)?;
-                let negative = PyLanceDB(query.negative.as_ref().clone()).into_pyobject(py)?;
+                let positive = Self(query.positive.as_ref().clone()).into_pyobject(py)?;
+                let negative = Self(query.negative.as_ref().clone()).into_pyobject(py)?;
                let kwargs = PyDict::new(py);
                kwargs.set_item("negative_boost", query.negative_boost)?;
                namespace
@@ -169,19 +171,25 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
                    .unzip();
                let kwargs = PyDict::new(py);
                kwargs.set_item("boosts", boosts)?;
-                kwargs.set_item("operator", operator_to_str(first.operator))?;
+                kwargs.set_item::<_, &str>("operator", first.operator.into())?;
                namespace
                    .getattr(intern!(py, "MultiMatchQuery"))?
                    .call((first.terms.clone(), columns), Some(&kwargs))
            }
            FtsQuery::Boolean(query) => {
-                let mut queries = Vec::with_capacity(query.must.len() + query.should.len());
-                for q in query.must {
-                    queries.push((occur_to_str(Occur::Must), PyLanceDB(q).into_pyobject(py)?));
-                }
+                let mut queries: Vec<(&str, Bound<'py, PyAny>)> = Vec::with_capacity(
+                    query.should.len() + query.must.len() + query.must_not.len(),
+                );
                for q in query.should {
-                    queries.push((occur_to_str(Occur::Should), PyLanceDB(q).into_pyobject(py)?));
+                    queries.push((Occur::Should.into(), Self(q).into_pyobject(py)?));
                }
+                for q in query.must {
+                    queries.push((Occur::Must.into(), Self(q).into_pyobject(py)?));
+                }
+                for q in query.must_not {
+                    queries.push((Occur::MustNot.into(), Self(q).into_pyobject(py)?));
+                }
+
                namespace
                    .getattr(intern!(py, "BooleanQuery"))?
                    .call1((queries,))
@@ -190,21 +198,6 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
    }
 }

-fn operator_to_str(op: Operator) -> &'static str {
-    match op {
-        Operator::And => "AND",
-        Operator::Or => "OR",
-    }
-}
-
-fn occur_to_str(occur: Occur) -> &'static str {
-    match occur {
-        Occur::Must => "MUST",
-        Occur::Should => "SHOULD",
-        Occur::MustNot => "MUST NOT",
-    }
-}
-
 // Python representation of query vector(s)
 #[derive(Clone)]
 pub struct PyQueryVectors(Vec<Arc<dyn Array>>);
@@ -569,7 +562,10 @@ impl FTSQuery {
    }

    pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
+        let inner = self_
+            .inner
+            .clone()
+            .full_text_search(self_.fts_query.clone());
        future_into_py(self_.py(), async move {
            inner
                .explain_plan(verbose)
@@ -579,7 +575,10 @@ impl FTSQuery {
    }

    pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
+        let inner = self_
+            .inner
+            .clone()
+            .full_text_search(self_.fts_query.clone());
        future_into_py(self_.py(), async move {
            inner
                .analyze_plan()
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.20.1-beta.0"
+version = "0.21.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.20.1-beta.0"
+version = "0.21.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/catalog/listing.rs
+++ b/rust/lancedb/src/catalog/listing.rs
@@ -105,7 +105,7 @@ impl ListingCatalog {
    }

    async fn open_path(path: &str) -> Result<Self> {
-        let (object_store, base_path) = ObjectStore::from_uri(path).await.unwrap();
+        let (object_store, base_path) = ObjectStore::from_uri(path).await?;
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
--- a/rust/lancedb/src/io/object_store.rs
+++ b/rust/lancedb/src/io/object_store.rs
@@ -107,7 +107,7 @@ impl ObjectStore for MirroringObjectStore {
        self.primary.delete(location).await
    }

-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
        self.primary.list(prefix)
    }

--- a/rust/lancedb/src/io/object_store/io_tracking.rs
+++ b/rust/lancedb/src/io/object_store/io_tracking.rs
@@ -119,7 +119,7 @@ impl ObjectStore for IoTrackingStore {
        let result = self.target.get(location).await;
        if let Ok(result) = &result {
            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes as u64);
+            self.record_read(num_bytes);
        }
        result
    }
@@ -128,12 +128,12 @@ impl ObjectStore for IoTrackingStore {
        let result = self.target.get_opts(location, options).await;
        if let Ok(result) = &result {
            let num_bytes = result.range.end - result.range.start;
-            self.record_read(num_bytes as u64);
+            self.record_read(num_bytes);
        }
        result
    }

-    async fn get_range(&self, location: &Path, range: std::ops::Range<usize>) -> OSResult<Bytes> {
+    async fn get_range(&self, location: &Path, range: std::ops::Range<u64>) -> OSResult<Bytes> {
        let result = self.target.get_range(location, range).await;
        if let Ok(result) = &result {
            self.record_read(result.len() as u64);
@@ -144,7 +144,7 @@ impl ObjectStore for IoTrackingStore {
    async fn get_ranges(
        &self,
        location: &Path,
-        ranges: &[std::ops::Range<usize>],
+        ranges: &[std::ops::Range<u64>],
    ) -> OSResult<Vec<Bytes>> {
        let result = self.target.get_ranges(location, ranges).await;
        if let Ok(result) = &result {
@@ -170,7 +170,7 @@ impl ObjectStore for IoTrackingStore {
        self.target.delete_stream(locations)
    }

-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, OSResult<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, OSResult<ObjectMeta>> {
        self.record_read(0);
        self.target.list(prefix)
    }
@@ -179,7 +179,7 @@ impl ObjectStore for IoTrackingStore {
        &self,
        prefix: Option<&Path>,
        offset: &Path,
-    ) -> BoxStream<'_, OSResult<ObjectMeta>> {
+    ) -> BoxStream<'static, OSResult<ObjectMeta>> {
        self.record_read(0);
        self.target.list_with_offset(prefix, offset)
    }
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -392,9 +392,18 @@ pub mod tests {
                } else {
                    expected_line.trim()
                };
-                assert_eq!(&actual_trimmed[..expected_trimmed.len()], expected_trimmed);
+                assert_eq!(
+                    &actual_trimmed[..expected_trimmed.len()],
+                    expected_trimmed,
+                    "\nactual:\n{physical_plan}\nexpected:\n{expected}"
+                );
            }
-            assert_eq!(lines_checked, expected.lines().count());
+            assert_eq!(
+                lines_checked,
+                expected.lines().count(),
+                "\nlines_checked:\n{lines_checked}\nexpected:\n{}",
+                expected.lines().count()
+            );
        }
    }

@@ -477,9 +486,9 @@ pub mod tests {
        TestFixture::check_plan(
            plan,
            "MetadataEraserExec
-             RepartitionExec:...
             CoalesceBatchesExec:...
             FilterExec: i@0 >= 5
+             RepartitionExec:...
             ProjectionExec:...
             LanceScan:...",
        )
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -129,7 +129,9 @@ impl DatasetRef {
                dataset: ref mut ds,
                ..
            } => {
-                *ds = dataset;
+                if dataset.manifest().version > ds.manifest().version {
+                    *ds = dataset;
+                }
            }
            _ => unreachable!("Dataset should be in latest mode at this point"),
        }
Author	SHA1	Message	Date
Lance Release	d4bb59b542	Bump version: 0.24.0 → 0.24.1-beta.0	2025-07-07 21:00:38 +00:00
Wyatt Alt	6b2dd6de51	chore: update lance to 31.1-beta.2 (#2487 )	2025-07-07 12:53:16 -07:00
BubbleCal	dbccd9e4f1	chore: upgrade lance to 0.31.1-beta.1 (#2486 ) this also upgrades: - datafusion 47.0 -> 48.0 - half 2.5.0 -> 2.6.0 to be consistent with lance --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-07-07 22:16:43 +08:00
Will Jones	b12ebfed4c	fix: only monotonically update dataset (#2479 ) Make sure we only update the latest version if it's actually newer. This is important if there are concurrent queries, as they can take different amounts of time.	2025-07-01 08:29:37 -07:00
Weston Pace	1dadb2aefa	feat: upgrade to lance 0.31.0-beta.1 (#2469 ) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * Chores * Updated dependencies to newer versions for improved compatibility and stability. * Refactor * Improved internal handling of data ranges and stream lifetimes for enhanced performance and reliability. * Simplified code style for Python query object conversions without affecting functionality. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-30 11:10:53 -07:00
Haoyu Weng	eb9784d7f2	feat(python): batch Ollama embed calls (#2453 ) Other embedding integrations such as Cohere and OpenAI already send requests in batches. We should do that for Ollama too to improve throughput. The Ollama [`.embed` API](`63ca747622/ollama/_client.py (L359-L378)`) was added in version 0.3.0 (almost a year ago) so I updated the version requirement in pyproject. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Bug Fixes - Improved compatibility with newer versions of the "ollama" package by requiring version 0.3.0 or higher. - Enhanced embedding generation to process batches of texts more efficiently and reliably. - Refactor - Improved type consistency and clarity for embedding-related methods. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-30 08:28:14 -07:00
Kilerd Chan	ba755626cc	fix: expose parsing error coming from invalid object store uri (#2475 ) this PR is to expose the error from `ListingCatalog::open_path` which unwrap the Result coming from `ObjectStore::from_uri` to avoid panic	2025-06-30 10:33:18 +08:00
Keming	7760799cb8	docs: fix multivector notebook markdown style (#2447 ) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Documentation - Improved formatting and clarity in instructional text within the Multivector on LanceDB notebook. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-27 15:34:01 -07:00
Will Jones	4beb2d2877	fix(python): make sure `explain_plan` works with FTS queries (#2466 ) ## Summary Fixes issue #2465 where FTS explain plans only showed basic `LanceScan` instead of detailed execution plans with FTS query details, limits, and offsets. ## Root Cause The `FTSQuery::explain_plan()` and `analyze_plan()` methods were missing the `.full_text_search()` call before calling explain/analyze plan, causing them to operate on the base query without FTS context. ## Changes - Fixed `explain_plan()` and `analyze_plan()` in `src/query.rs` to call `.full_text_search()` - Added comprehensive test coverage for FTS explain plans with limits, offsets, and filters - Updated existing tests to expect correct behavior instead of buggy behavior ## Before/After Before (broken): ``` LanceScan: uri=..., projection=[...], row_id=false, row_addr=false, ordered=true ``` After (fixed): ``` ProjectionExec: expr=[id@2 as id, text@3 as text, _score@1 as _score] Take: columns="_rowid, _score, (id), (text)" CoalesceBatchesExec: target_batch_size=1024 GlobalLimitExec: skip=2, fetch=4 MatchQuery: query=test ``` ## Test Plan - [x] All new FTS explain plan tests pass - [x] Existing tests continue to pass - [x] FTS queries now show proper execution plans with MatchQuery, limits, filters Closes #2465 🤖 Generated with [Claude Code](https://claude.ai/code) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * Tests * Added new test cases to verify explain plan output for full-text search, vector queries with pagination, and queries with filters. * Bug Fixes * Improved the accuracy of explain plan and analysis output for full-text search queries, ensuring the correct query details are reflected. * Refactor * Enhanced the formatting and hierarchical structure of execution plans for hybrid queries, providing clearer and more detailed plan representations. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-06-26 23:35:14 -07:00
Lance Release	a00b8595d1	Bump version: 0.21.0-beta.0 → 0.21.0	2025-06-20 05:47:06 +00:00
Lance Release	9c8314b4fd	Bump version: 0.20.1-beta.2 → 0.21.0-beta.0	2025-06-20 05:46:27 +00:00
Lance Release	c625b6f2b2	Bump version: 0.24.0-beta.0 → 0.24.0	2025-06-20 05:46:05 +00:00
Lance Release	bec8fe6547	Bump version: 0.23.1-beta.2 → 0.24.0-beta.0	2025-06-20 05:46:04 +00:00
BubbleCal	dc1150c011	chore: upgrade lance to 0.30.0 (#2451 ) lance [release details](https://github.com/lancedb/lance/releases/tag/v0.30.0) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Chores - Updated dependency specifications to use exact version numbers instead of referencing a git repository and tag. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-06-20 11:27:20 +08:00
Will Jones	afaefc6264	ci: fix package lock again (#2449 ) We are able to push commits over here: `cb7293e073/.github/workflows/make-release-commit.yml (L88-L95)` So I think it's safe to assume this will work. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Chores - Updated workflow configuration to improve authentication and branch targeting for automated release processes. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-19 08:51:48 -07:00
BubbleCal	cb70ff8cee	feat!: switch default FTS to native lance FTS (#2428 ) This switches the default FTS to native lance FTS for Python sync table API, the other APIs have switched to native implementation already <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - New Features - The default behavior for creating a full-text search index now uses the new implementation rather than the legacy one. - Bug Fixes - Improved handling and error messages for phrase queries in full-text search. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-06-19 10:38:34 +08:00
BubbleCal	cbb5a841b1	feat: support prefix matching and must_not clause (#2441 )	2025-06-19 10:32:32 +08:00
Lance Release	c72f6770fd	Bump version: 0.20.1-beta.1 → 0.20.1-beta.2	2025-06-18 23:33:57 +00:00
Lance Release	e5a80a5e86	Bump version: 0.23.1-beta.1 → 0.23.1-beta.2	2025-06-18 23:33:05 +00:00
Will Jones	8d0a7fad1f	ci: try again to fix node lockfiles (#2445 ) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Chores - Updated the release workflow to explicitly check out the main branch during the publishing process. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-18 14:45:39 -07:00
LuQQiu	b80d4d0134	chore: update Lance to v0.30.0-beta.1 (#2444 ) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - Chores - Updated internal dependencies for improved stability and compatibility. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-18 14:15:39 -07:00
satya-nutella	9645fe52c2	fix: improve error handling and embedding logic in arrow.ts (#2433 ) - Enhanced error messages for schema inference failures to suggest providing an explicit schema. - Updated embedding application logic to check for existing destination columns, allowing for filling embeddings in columns that are all null. - Added comments for clarity on handling existing columns during embedding application. Fixes https://github.com/lancedb/lancedb/issues/2183 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Summary by CodeRabbit - Bug Fixes - Improved error messages for schema inference to enhance readability. - Prevented redundant embedding application by skipping columns that already contain data, avoiding unnecessary errors and computations. <!-- end of auto-generated comment: release notes by coderabbit.ai -->	2025-06-18 12:45:11 -07:00
Lance Release	b77314168d	Bump version: 0.20.1-beta.0 → 0.20.1-beta.1	2025-06-17 23:22:50 +00:00