feat: support prewarm_index and prewarm_data on remote tables (#3110)

## Summary

- Implement `RemoteTable.prewarm_data(columns)` calling `POST
/v1/table/{id}/page_cache/prewarm/`
- Implement `RemoteTable.prewarm_index(name)` calling `POST
/v1/table/{id}/index/{name}/prewarm/` (previously returned
`NotSupported`)
- Add `BaseTable::prewarm_data(columns)` trait method and `Table` public
API in Rust core
- Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`,
`RemoteTable`) for `prewarm_data`
- Add type stubs for `prewarm_index` and `prewarm_data` in
`_lancedb.pyi`
- Upgrade Lance to 3.0.0-rc.3 with breaking change fixes

Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Esteban Gutierrez
2026-03-10 15:39:39 -05:00
committed by GitHub
parent 6530d82690
commit f951da2b00
8 changed files with 261 additions and 28 deletions

View File

@@ -166,6 +166,8 @@ class Table:
async def checkout(self, version: Union[int, str]): ...
async def checkout_latest(self): ...
async def restore(self, version: Optional[Union[int, str]] = None): ...
async def prewarm_index(self, index_name: str) -> None: ...
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
async def list_indices(self) -> list[IndexConfig]: ...
async def delete(self, filter: str) -> DeleteResult: ...
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...

View File

@@ -640,6 +640,45 @@ class RemoteTable(Table):
def drop_index(self, index_name: str):
return LOOP.run(self._table.drop_index(index_name))
def prewarm_index(self, name: str) -> None:
"""Prewarm an index in the table.
This is a hint to the database that the index will be accessed in the
future and should be loaded into memory if possible. This can reduce
cold-start latency for subsequent queries.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
Parameters
----------
name: str
The name of the index to prewarm
"""
return LOOP.run(self._table.prewarm_index(name))
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
"""Prewarm data for the table.
This is a hint to the database that the given columns will be accessed
in the future and the database should prefetch the data if possible.
Currently only supported on remote tables.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
This operation has a large upfront cost but can speed up future queries
that need to fetch the given columns. Large columns such as embeddings
or binary data may not be practical to prewarm. This feature is intended
for workloads that issue many queries against the same columns.
Parameters
----------
columns: list of str, optional
The columns to prewarm. If None, all columns are prewarmed.
"""
return LOOP.run(self._table.prewarm_data(columns))
def wait_for_index(
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
):

View File

@@ -2219,12 +2219,18 @@ class LanceTable(Table):
def prewarm_index(self, name: str) -> None:
"""
Prewarms an index in the table
Prewarm an index in the table.
This loads the entire index into memory
This is a hint to the database that the index will be accessed in the
future and should be loaded into memory if possible. This can reduce
cold-start latency for subsequent queries.
If the index does not fit into the available cache this call
may be wasteful
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
It is generally wasteful to call this if the index does not fit into the
available cache. Not all index types support prewarming; unsupported
indices will silently ignore the request.
Parameters
----------
@@ -2233,6 +2239,29 @@ class LanceTable(Table):
"""
return LOOP.run(self._table.prewarm_index(name))
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
"""
Prewarm data for the table.
This is a hint to the database that the given columns will be accessed
in the future and the database should prefetch the data if possible.
Currently only supported on remote tables.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
This operation has a large upfront cost but can speed up future queries
that need to fetch the given columns. Large columns such as embeddings
or binary data may not be practical to prewarm. This feature is intended
for workloads that issue many queries against the same columns.
Parameters
----------
columns: list of str, optional
The columns to prewarm. If None, all columns are prewarmed.
"""
return LOOP.run(self._table.prewarm_data(columns))
def wait_for_index(
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
) -> None:
@@ -3634,19 +3663,47 @@ class AsyncTable:
"""
Prewarm an index in the table.
This is a hint to the database that the index will be accessed in the
future and should be loaded into memory if possible. This can reduce
cold-start latency for subsequent queries.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
It is generally wasteful to call this if the index does not fit into the
available cache. Not all index types support prewarming; unsupported
indices will silently ignore the request.
Parameters
----------
name: str
The name of the index to prewarm
Notes
-----
This will load the index into memory. This may reduce the cold-start time for
future queries. If the index does not fit in the cache then this call may be
wasteful.
"""
await self._inner.prewarm_index(name)
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
"""
Prewarm data for the table.
This is a hint to the database that the given columns will be accessed
in the future and the database should prefetch the data if possible.
Currently only supported on remote tables.
This call initiates prewarming and returns once the request is accepted.
It is idempotent and safe to call from multiple clients concurrently.
This operation has a large upfront cost but can speed up future queries
that need to fetch the given columns. Large columns such as embeddings
or binary data may not be practical to prewarm. This feature is intended
for workloads that issue many queries against the same columns.
Parameters
----------
columns: list of str, optional
The columns to prewarm. If None, all columns are prewarmed.
"""
await self._inner.prewarm_data(columns)
async def wait_for_index(
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
) -> None:

View File

@@ -426,6 +426,17 @@ impl Table {
})
}
pub fn prewarm_data(
self_: PyRef<'_, Self>,
columns: Option<Vec<String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.prewarm_data(columns).await.infer_error()?;
Ok(())
})
}
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {