feat: support prewarm_index and prewarm_data on remote tables (#3110)

## Summary

- Implement `RemoteTable.prewarm_data(columns)` calling `POST
/v1/table/{id}/page_cache/prewarm/`
- Implement `RemoteTable.prewarm_index(name)` calling `POST
/v1/table/{id}/index/{name}/prewarm/` (previously returned
`NotSupported`)
- Add `BaseTable::prewarm_data(columns)` trait method and `Table` public
API in Rust core
- Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`,
`RemoteTable`) for `prewarm_data`
- Add type stubs for `prewarm_index` and `prewarm_data` in
`_lancedb.pyi`
- Upgrade Lance to 3.0.0-rc.3 with breaking change fixes

Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Esteban Gutierrez
2026-03-10 15:39:39 -05:00
committed by GitHub
parent 6530d82690
commit f951da2b00
8 changed files with 261 additions and 28 deletions

View File

@@ -426,14 +426,11 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
})?,
);
}
if db_prefix.is_some() {
if let Some(prefix) = db_prefix {
headers.insert(
HeaderName::from_static("x-lancedb-database-prefix"),
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
message: format!(
"non-ascii database prefix '{}' provided",
db_prefix.unwrap()
),
HeaderValue::from_str(prefix).map_err(|_| Error::InvalidInput {
message: format!("non-ascii database prefix '{}' provided", prefix),
})?,
);
}

View File

@@ -1645,10 +1645,33 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
Ok(())
}
async fn prewarm_index(&self, _index_name: &str) -> Result<()> {
Err(Error::NotSupported {
message: "prewarm_index is not yet supported on LanceDB cloud.".into(),
})
async fn prewarm_index(&self, index_name: &str) -> Result<()> {
let request = self.client.post(&format!(
"/v1/table/{}/index/{}/prewarm/",
self.identifier, index_name
));
let (request_id, response) = self.send(request, true).await?;
if response.status() == StatusCode::NOT_FOUND {
return Err(Error::IndexNotFound {
name: index_name.to_string(),
});
}
self.check_table_response(&request_id, response).await?;
Ok(())
}
async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
let mut request = self.client.post(&format!(
"/v1/table/{}/page_cache/prewarm/",
self.identifier
));
let body = serde_json::json!({
"columns": columns.unwrap_or_default(),
});
request = request.json(&body);
let (request_id, response) = self.send(request, true).await?;
self.check_table_response(&request_id, response).await?;
Ok(())
}
async fn table_definition(&self) -> Result<TableDefinition> {
@@ -3529,6 +3552,64 @@ mod tests {
assert_eq!(result.version, if old_server { 0 } else { 43 });
}
#[tokio::test]
async fn test_prewarm_index() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/v1/table/my_table/index/my_index/prewarm/"
);
http::Response::builder().status(200).body("{}").unwrap()
});
table.prewarm_index("my_index").await.unwrap();
}
#[tokio::test]
async fn test_prewarm_index_not_found() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(
request.url().path(),
"/v1/table/my_table/index/my_index/prewarm/"
);
http::Response::builder().status(404).body("{}").unwrap()
});
let e = table.prewarm_index("my_index").await.unwrap_err();
assert!(matches!(e, Error::IndexNotFound { .. }));
}
#[tokio::test]
async fn test_prewarm_data() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/v1/table/my_table/page_cache/prewarm/"
);
http::Response::builder().status(200).body("{}").unwrap()
});
table.prewarm_data(None).await.unwrap();
}
#[tokio::test]
async fn test_prewarm_data_with_columns() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/v1/table/my_table/page_cache/prewarm/"
);
let body = request.body().unwrap().as_bytes().unwrap();
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
assert_eq!(body["columns"], serde_json::json!(["col_a", "col_b"]));
http::Response::builder().status(200).body("{}").unwrap()
});
table
.prewarm_data(Some(vec!["col_a".into(), "col_b".into()]))
.await
.unwrap();
}
#[tokio::test]
async fn test_drop_index() {
let table = Table::new_with_handler("my_table", |request| {

View File

@@ -277,8 +277,13 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
/// Drop an index from the table.
async fn drop_index(&self, name: &str) -> Result<()>;
/// Prewarm an index in the table
/// Prewarm an index in the table.
async fn prewarm_index(&self, name: &str) -> Result<()>;
/// Prewarm data for the table.
///
/// Currently only supported on remote tables.
/// If `columns` is `None`, all columns are prewarmed.
async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()>;
/// Get statistics about the index.
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
/// Merge insert new records into the table.
@@ -1123,22 +1128,45 @@ impl Table {
self.inner.drop_index(name).await
}
/// Prewarm an index in the table
/// Prewarm an index in the table.
///
/// This is a hint to fully load the index into memory. It can be used to
/// avoid cold starts
/// This is a hint to the database that the index will be accessed in the
/// future and should be loaded into memory if possible. This can reduce
/// cold-start latency for subsequent queries.
///
/// This call initiates prewarming and returns once the request is accepted.
/// It is idempotent and safe to call from multiple clients concurrently.
///
/// It is generally wasteful to call this if the index does not fit into the
/// available cache.
///
/// Note: This function is not yet supported on all indices, in which case it
/// may do nothing.
/// available cache. Not all index types support prewarming; unsupported
/// indices will silently ignore the request.
///
/// Use [`Self::list_indices()`] to find the names of the indices.
pub async fn prewarm_index(&self, name: &str) -> Result<()> {
self.inner.prewarm_index(name).await
}
/// Prewarm data for the table.
///
/// This is a hint to the database that the given columns will be accessed in
/// the future and the database should prefetch the data if possible. This
/// can reduce cold-start latency for subsequent queries. Currently only
/// supported on remote tables.
///
/// This call initiates prewarming and returns once the request is accepted.
/// It is idempotent and safe to call from multiple clients concurrently —
/// calling it on already-prewarmed columns is a no-op on the server.
///
/// This operation has a large upfront cost but can speed up future queries
/// that need to fetch the given columns. Large columns such as embeddings
/// or binary data may not be practical to prewarm. This feature is intended
/// for workloads that issue many queries against the same columns.
///
/// If `columns` is `None`, all columns are prewarmed.
pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
self.inner.prewarm_data(columns).await
}
/// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
/// are not fully indexed within the timeout.
pub async fn wait_for_index(
@@ -2290,6 +2318,12 @@ impl BaseTable for NativeTable {
Ok(dataset.prewarm_index(index_name).await?)
}
async fn prewarm_data(&self, _columns: Option<Vec<String>>) -> Result<()> {
Err(Error::NotSupported {
message: "prewarm_data is currently only supported on remote tables.".into(),
})
}
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
// Delegate to the submodule implementation
update::execute_update(self, update).await