feat: add prewarm_index function (#2342)

## Summary by CodeRabbit - **New Features** - Added the ability to prewarm (load into memory) table indexes via new methods in Python, Node.js, and Rust APIs, potentially reducing cold-start query latency. - **Bug Fixes** - Ensured prewarming an index does not interfere with subsequent search operations. - **Tests** - Introduced new test cases to verify full-text search index creation, prewarming, and search functionalities in both Python and Node.js. - **Chores** - Updated dependencies for improved compatibility and performance.  --------- Co-authored-by: Lu Qiu <luqiujob@gmail.com>
2026-01-07 12:22:59 +00:00 · 2025-04-17 17:14:36 -05:00
parent ef3a2b5357
commit 26080ee4c1
14 changed files with 215 additions and 54 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -44,7 +44,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 pylance = [
-    "pylance>=0.23.2",
+    "pylance>=0.25",
 ]
 tests = [
    "aiohttp",
@@ -58,7 +58,7 @@ tests = [
    "polars>=0.19, <=1.3.0",
    "tantivy",
    "pyarrow-stubs",
-    "pylance>=0.23.2",
+    "pylance>=0.25",
    "requests",
 ]
 dev = [
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1745,8 +1745,32 @@ class LanceTable(Table):
        )

    def drop_index(self, name: str) -> None:
+        """
+        Drops an index from the table
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to drop
+        """
        return LOOP.run(self._table.drop_index(name))

+    def prewarm_index(self, name: str) -> None:
+        """
+        Prewarms an index in the table
+
+        This loads the entire index into memory
+
+        If the index does not fit into the available cache this call
+        may be wasteful
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to prewarm
+        """
+        return LOOP.run(self._table.prewarm_index(name))
+
    def create_scalar_index(
        self,
        column: str,
@@ -3002,6 +3026,23 @@ class AsyncTable:
        """
        await self._inner.drop_index(name)

+    async def prewarm_index(self, name: str) -> None:
+        """
+        Prewarm an index in the table.
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to prewarm
+
+        Notes
+        -----
+        This will load the index into memory.  This may reduce the cold-start time for
+        future queries.  If the index does not fit in the cache then this call may be
+        wasteful.
+        """
+        await self._inner.prewarm_index(name)
+
    async def add(
        self,
        data: DATA,
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -8,7 +8,7 @@ import pyarrow as pa
 import pytest
 import pytest_asyncio
 from lancedb import AsyncConnection, AsyncTable, connect_async
-from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq
+from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS


@pytest_asyncio.fixture
@@ -119,6 +119,18 @@ async def test_create_label_list_index(some_table: AsyncTable):
    assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'


+@pytest.mark.asyncio
+async def test_full_text_search_index(some_table: AsyncTable):
+    await some_table.create_index("tags", config=FTS(with_position=False))
+    indices = await some_table.list_indices()
+    assert str(indices) == '[Index(FTS, columns=["tags"], name="tags_idx")]'
+
+    await some_table.prewarm_index("tags_idx")
+
+    res = await (await some_table.search("tag0")).to_arrow()
+    assert res.num_rows > 0
+
+
@pytest.mark.asyncio
 async def test_create_vector_index(some_table: AsyncTable):
    # Can create
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -204,6 +204,14 @@ impl Table {
        })
    }

+    pub fn prewarm_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.prewarm_index(&index_name).await.infer_error()?;
+            Ok(())
+        })
+    }
+
    pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/python/src/util.rs
+++ b/python/src/util.rs
@@ -163,8 +163,9 @@ pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult<FtsQuery> {
                .ok_or(PyValueError::new_err("boost not found"))?
                .extract::<Vec<f32>>()?;

-            let query =
-                MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| {
+            let query = MultiMatchQuery::try_new(query, columns)
+                .and_then(|q| q.try_with_boosts(boost))
+                .map_err(|e| {
                    PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e))
                })?;
            Ok(query.into())