feat: add prewarm_index function (#2342)

## Summary by CodeRabbit - **New Features** - Added the ability to prewarm (load into memory) table indexes via new methods in Python, Node.js, and Rust APIs, potentially reducing cold-start query latency. - **Bug Fixes** - Ensured prewarming an index does not interfere with subsequent search operations. - **Tests** - Introduced new test cases to verify full-text search index creation, prewarming, and search functionalities in both Python and Node.js. - **Chores** - Updated dependencies for improved compatibility and performance.  --------- Co-authored-by: Lu Qiu <luqiujob@gmail.com>
2026-01-06 03:42:57 +00:00 · 2025-04-17 17:14:36 -05:00
parent ef3a2b5357
commit 26080ee4c1
14 changed files with 215 additions and 54 deletions
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1312,6 +1312,28 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(results2[0].text).toBe(data[1].text);
    });

+    test("prewarm full text search index", async () => {
+      const db = await connect(tmpDir.name);
+      const data = [
+        { text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
+        { text: ["lance database"], vector: [0.4, 0.5, 0.6] },
+        { text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
+        { text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
+        { text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
+      ];
+      const table = await db.createTable("test", data);
+      await table.createIndex("text", {
+        config: Index.fts(),
+      });
+
+      // For the moment, we just confirm we can call prewarmIndex without error
+      // and still search it afterwards
+      await table.prewarmIndex("text_idx");
+
+      const results = await table.search("lance").toArray();
+      expect(results.length).toBe(3);
+    });
+
    test("full text index on list", async () => {
      const db = await connect(tmpDir.name);
      const data = [
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -235,6 +235,17 @@ export abstract class Table {
   */
  abstract dropIndex(name: string): Promise<void>;

+  /**
+   * Prewarm an index in the table.
+   *
+   * @param name The name of the index.
+   *
+   * This will load the index into memory.  This may reduce the cold-start time for
+   * future queries.  If the index does not fit in the cache then this call may be
+   * wasteful.
+   */
+  abstract prewarmIndex(name: string): Promise<void>;
+
  /**
   * Create a {@link Query} Builder.
   *
@@ -565,6 +576,10 @@ export class LocalTable extends Table {
    await this.inner.dropIndex(name);
  }

+  async prewarmIndex(name: string): Promise<void> {
+    await this.inner.prewarmIndex(name);
+  }
+
  query(): Query {
    return new Query(this.inner);
  }
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -327,6 +327,7 @@ impl JsFullTextQuery {
    }

    #[napi(factory)]
+    #[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
    pub fn boost_query(
        positive: &JsFullTextQuery,
        negative: &JsFullTextQuery,
@@ -349,11 +350,8 @@ impl JsFullTextQuery {
        boosts: Option<Vec<f64>>,
    ) -> napi::Result<Self> {
        let q = match boosts {
-            Some(boosts) => MultiMatchQuery::try_new_with_boosts(
-                query,
-                columns,
-                boosts.into_iter().map(|v| v as f32).collect(),
-            ),
+            Some(boosts) => MultiMatchQuery::try_new(query, columns)
+                .and_then(|q| q.try_with_boosts(boosts.into_iter().map(|v| v as f32).collect())),
            None => MultiMatchQuery::try_new(query, columns),
        }
        .map_err(|e| {
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -132,6 +132,14 @@ impl Table {
            .default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn prewarm_index(&self, index_name: String) -> napi::Result<()> {
+        self.inner_ref()?
+            .prewarm_index(&index_name)
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn update(
        &self,