mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-26 02:20:40 +00:00
feat: support prewarm_index and prewarm_data on remote tables (#3110)
## Summary
- Implement `RemoteTable.prewarm_data(columns)` calling `POST
/v1/table/{id}/page_cache/prewarm/`
- Implement `RemoteTable.prewarm_index(name)` calling `POST
/v1/table/{id}/index/{name}/prewarm/` (previously returned
`NotSupported`)
- Add `BaseTable::prewarm_data(columns)` trait method and `Table` public
API in Rust core
- Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`,
`RemoteTable`) for `prewarm_data`
- Add type stubs for `prewarm_index` and `prewarm_data` in
`_lancedb.pyi`
- Upgrade Lance to 3.0.0-rc.3 with breaking change fixes
Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
6530d82690
commit
f951da2b00
@@ -166,6 +166,8 @@ class Table:
|
||||
async def checkout(self, version: Union[int, str]): ...
|
||||
async def checkout_latest(self): ...
|
||||
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
||||
async def prewarm_index(self, index_name: str) -> None: ...
|
||||
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
|
||||
async def list_indices(self) -> list[IndexConfig]: ...
|
||||
async def delete(self, filter: str) -> DeleteResult: ...
|
||||
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
||||
|
||||
@@ -640,6 +640,45 @@ class RemoteTable(Table):
|
||||
def drop_index(self, index_name: str):
|
||||
return LOOP.run(self._table.drop_index(index_name))
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""Prewarm an index in the table.
|
||||
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_data(columns))
|
||||
|
||||
def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
):
|
||||
|
||||
@@ -2219,12 +2219,18 @@ class LanceTable(Table):
|
||||
|
||||
def prewarm_index(self, name: str) -> None:
|
||||
"""
|
||||
Prewarms an index in the table
|
||||
Prewarm an index in the table.
|
||||
|
||||
This loads the entire index into memory
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
If the index does not fit into the available cache this call
|
||||
may be wasteful
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
It is generally wasteful to call this if the index does not fit into the
|
||||
available cache. Not all index types support prewarming; unsupported
|
||||
indices will silently ignore the request.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -2233,6 +2239,29 @@ class LanceTable(Table):
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_index(name))
|
||||
|
||||
def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
return LOOP.run(self._table.prewarm_data(columns))
|
||||
|
||||
def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
) -> None:
|
||||
@@ -3634,19 +3663,47 @@ class AsyncTable:
|
||||
"""
|
||||
Prewarm an index in the table.
|
||||
|
||||
This is a hint to the database that the index will be accessed in the
|
||||
future and should be loaded into memory if possible. This can reduce
|
||||
cold-start latency for subsequent queries.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
It is generally wasteful to call this if the index does not fit into the
|
||||
available cache. Not all index types support prewarming; unsupported
|
||||
indices will silently ignore the request.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the index to prewarm
|
||||
|
||||
Notes
|
||||
-----
|
||||
This will load the index into memory. This may reduce the cold-start time for
|
||||
future queries. If the index does not fit in the cache then this call may be
|
||||
wasteful.
|
||||
"""
|
||||
await self._inner.prewarm_index(name)
|
||||
|
||||
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Prewarm data for the table.
|
||||
|
||||
This is a hint to the database that the given columns will be accessed
|
||||
in the future and the database should prefetch the data if possible.
|
||||
Currently only supported on remote tables.
|
||||
|
||||
This call initiates prewarming and returns once the request is accepted.
|
||||
It is idempotent and safe to call from multiple clients concurrently.
|
||||
|
||||
This operation has a large upfront cost but can speed up future queries
|
||||
that need to fetch the given columns. Large columns such as embeddings
|
||||
or binary data may not be practical to prewarm. This feature is intended
|
||||
for workloads that issue many queries against the same columns.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns: list of str, optional
|
||||
The columns to prewarm. If None, all columns are prewarmed.
|
||||
"""
|
||||
await self._inner.prewarm_data(columns)
|
||||
|
||||
async def wait_for_index(
|
||||
self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
|
||||
) -> None:
|
||||
|
||||
@@ -426,6 +426,17 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn prewarm_data(
|
||||
self_: PyRef<'_, Self>,
|
||||
columns: Option<Vec<String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.prewarm_data(columns).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
|
||||
Reference in New Issue
Block a user