mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 12:22:59 +00:00
feat: add prewarm_index function (#2342)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Added the ability to prewarm (load into memory) table indexes via new methods in Python, Node.js, and Rust APIs, potentially reducing cold-start query latency. - **Bug Fixes** - Ensured prewarming an index does not interfere with subsequent search operations. - **Tests** - Introduced new test cases to verify full-text search index creation, prewarming, and search functionalities in both Python and Node.js. - **Chores** - Updated dependencies for improved compatibility and performance. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Lu Qiu <luqiujob@gmail.com>
This commit is contained in:
@@ -44,7 +44,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pylance = [
|
||||
"pylance>=0.23.2",
|
||||
"pylance>=0.25",
|
||||
]
|
||||
tests = [
|
||||
"aiohttp",
|
||||
@@ -58,7 +58,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=0.23.2",
|
||||
"pylance>=0.25",
|
||||
"requests",
|
||||
]
|
||||
dev = [
|
||||
|
||||
@@ -1745,8 +1745,32 @@ class LanceTable(Table):
|
||||
)
|
||||
|
||||
def drop_index(self, name: str) -> None:
|
||||
"""
|
||||
Drops an index from the table
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to drop
|
||||
"""
|
||||
return LOOP.run(self._table.drop_index(name))
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""
|
||||
Prewarms an index in the table
|
||||
|
||||
This loads the entire index into memory
|
||||
|
||||
If the index does not fit into the available cache this call
|
||||
may be wasteful
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def create_scalar_index(
|
||||
self,
|
||||
column: str,
|
||||
@@ -3002,6 +3026,23 @@ class AsyncTable:
|
||||
"""
|
||||
await self._inner.drop_index(name)
|
||||
|
||||
async def prewarm_index(self, name: str) -> None:
|
||||
"""
|
||||
Prewarm an index in the table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
|
||||
Notes
|
||||
-----
|
||||
This will load the index into memory. This may reduce the cold-start time for
|
||||
future queries. If the index does not fit in the cache then this call may be
|
||||
wasteful.
|
||||
"""
|
||||
await self._inner.prewarm_index(name)
|
||||
|
||||
async def add(
|
||||
self,
|
||||
data: DATA,
|
||||
|
||||
@@ -8,7 +8,7 @@ import pyarrow as pa
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from lancedb import AsyncConnection, AsyncTable, connect_async
|
||||
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq
|
||||
from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
@@ -119,6 +119,18 @@ async def test_create_label_list_index(some_table: AsyncTable):
|
||||
assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_full_text_search_index(some_table: AsyncTable):
|
||||
await some_table.create_index("tags", config=FTS(with_position=False))
|
||||
indices = await some_table.list_indices()
|
||||
assert str(indices) == '[Index(FTS, columns=["tags"], name="tags_idx")]'
|
||||
|
||||
await some_table.prewarm_index("tags_idx")
|
||||
|
||||
res = await (await some_table.search("tag0")).to_arrow()
|
||||
assert res.num_rows > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_vector_index(some_table: AsyncTable):
|
||||
# Can create
|
||||
|
||||
@@ -204,6 +204,14 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prewarm_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.prewarm_index(&index_name).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
|
||||
@@ -163,8 +163,9 @@ pub fn parse_fts_query(query: &Bound<'_, PyDict>) -> PyResult<FtsQuery> {
|
||||
.ok_or(PyValueError::new_err("boost not found"))?
|
||||
.extract::<Vec<f32>>()?;
|
||||
|
||||
let query =
|
||||
MultiMatchQuery::try_new_with_boosts(query, columns, boost).map_err(|e| {
|
||||
let query = MultiMatchQuery::try_new(query, columns)
|
||||
.and_then(|q| q.try_with_boosts(boost))
|
||||
.map_err(|e| {
|
||||
PyValueError::new_err(format!("Error creating MultiMatchQuery: {}", e))
|
||||
})?;
|
||||
Ok(query.into())
|
||||
|
||||
Reference in New Issue
Block a user