mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-24 07:20:40 +00:00
feat: support prewarm_index and prewarm_data on remote tables (#3110)
## Summary
- Implement `RemoteTable.prewarm_data(columns)` calling `POST
/v1/table/{id}/page_cache/prewarm/`
- Implement `RemoteTable.prewarm_index(name)` calling `POST
/v1/table/{id}/index/{name}/prewarm/` (previously returned
`NotSupported`)
- Add `BaseTable::prewarm_data(columns)` trait method and `Table` public
API in Rust core
- Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`,
`RemoteTable`) for `prewarm_data`
- Add type stubs for `prewarm_index` and `prewarm_data` in
`_lancedb.pyi`
- Upgrade Lance to 3.0.0-rc.3 with breaking change fixes
Co-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
6530d82690
commit
f951da2b00
@@ -426,14 +426,11 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
})?,
|
||||
);
|
||||
}
|
||||
if db_prefix.is_some() {
|
||||
if let Some(prefix) = db_prefix {
|
||||
headers.insert(
|
||||
HeaderName::from_static("x-lancedb-database-prefix"),
|
||||
HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
|
||||
message: format!(
|
||||
"non-ascii database prefix '{}' provided",
|
||||
db_prefix.unwrap()
|
||||
),
|
||||
HeaderValue::from_str(prefix).map_err(|_| Error::InvalidInput {
|
||||
message: format!("non-ascii database prefix '{}' provided", prefix),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1645,10 +1645,33 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn prewarm_index(&self, _index_name: &str) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "prewarm_index is not yet supported on LanceDB cloud.".into(),
|
||||
})
|
||||
async fn prewarm_index(&self, index_name: &str) -> Result<()> {
|
||||
let request = self.client.post(&format!(
|
||||
"/v1/table/{}/index/{}/prewarm/",
|
||||
self.identifier, index_name
|
||||
));
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
if response.status() == StatusCode::NOT_FOUND {
|
||||
return Err(Error::IndexNotFound {
|
||||
name: index_name.to_string(),
|
||||
});
|
||||
}
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
|
||||
let mut request = self.client.post(&format!(
|
||||
"/v1/table/{}/page_cache/prewarm/",
|
||||
self.identifier
|
||||
));
|
||||
let body = serde_json::json!({
|
||||
"columns": columns.unwrap_or_default(),
|
||||
});
|
||||
request = request.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn table_definition(&self) -> Result<TableDefinition> {
|
||||
@@ -3529,6 +3552,64 @@ mod tests {
|
||||
assert_eq!(result.version, if old_server { 0 } else { 43 });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prewarm_index() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/index/my_index/prewarm/"
|
||||
);
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table.prewarm_index("my_index").await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prewarm_index_not_found() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/index/my_index/prewarm/"
|
||||
);
|
||||
http::Response::builder().status(404).body("{}").unwrap()
|
||||
});
|
||||
let e = table.prewarm_index("my_index").await.unwrap_err();
|
||||
assert!(matches!(e, Error::IndexNotFound { .. }));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prewarm_data() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/page_cache/prewarm/"
|
||||
);
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table.prewarm_data(None).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prewarm_data_with_columns() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/page_cache/prewarm/"
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
assert_eq!(body["columns"], serde_json::json!(["col_a", "col_b"]));
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table
|
||||
.prewarm_data(Some(vec!["col_a".into(), "col_b".into()]))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_drop_index() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
|
||||
@@ -277,8 +277,13 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
|
||||
/// Drop an index from the table.
|
||||
async fn drop_index(&self, name: &str) -> Result<()>;
|
||||
/// Prewarm an index in the table
|
||||
/// Prewarm an index in the table.
|
||||
async fn prewarm_index(&self, name: &str) -> Result<()>;
|
||||
/// Prewarm data for the table.
|
||||
///
|
||||
/// Currently only supported on remote tables.
|
||||
/// If `columns` is `None`, all columns are prewarmed.
|
||||
async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()>;
|
||||
/// Get statistics about the index.
|
||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
|
||||
/// Merge insert new records into the table.
|
||||
@@ -1123,22 +1128,45 @@ impl Table {
|
||||
self.inner.drop_index(name).await
|
||||
}
|
||||
|
||||
/// Prewarm an index in the table
|
||||
/// Prewarm an index in the table.
|
||||
///
|
||||
/// This is a hint to fully load the index into memory. It can be used to
|
||||
/// avoid cold starts
|
||||
/// This is a hint to the database that the index will be accessed in the
|
||||
/// future and should be loaded into memory if possible. This can reduce
|
||||
/// cold-start latency for subsequent queries.
|
||||
///
|
||||
/// This call initiates prewarming and returns once the request is accepted.
|
||||
/// It is idempotent and safe to call from multiple clients concurrently.
|
||||
///
|
||||
/// It is generally wasteful to call this if the index does not fit into the
|
||||
/// available cache.
|
||||
///
|
||||
/// Note: This function is not yet supported on all indices, in which case it
|
||||
/// may do nothing.
|
||||
/// available cache. Not all index types support prewarming; unsupported
|
||||
/// indices will silently ignore the request.
|
||||
///
|
||||
/// Use [`Self::list_indices()`] to find the names of the indices.
|
||||
pub async fn prewarm_index(&self, name: &str) -> Result<()> {
|
||||
self.inner.prewarm_index(name).await
|
||||
}
|
||||
|
||||
/// Prewarm data for the table.
|
||||
///
|
||||
/// This is a hint to the database that the given columns will be accessed in
|
||||
/// the future and the database should prefetch the data if possible. This
|
||||
/// can reduce cold-start latency for subsequent queries. Currently only
|
||||
/// supported on remote tables.
|
||||
///
|
||||
/// This call initiates prewarming and returns once the request is accepted.
|
||||
/// It is idempotent and safe to call from multiple clients concurrently —
|
||||
/// calling it on already-prewarmed columns is a no-op on the server.
|
||||
///
|
||||
/// This operation has a large upfront cost but can speed up future queries
|
||||
/// that need to fetch the given columns. Large columns such as embeddings
|
||||
/// or binary data may not be practical to prewarm. This feature is intended
|
||||
/// for workloads that issue many queries against the same columns.
|
||||
///
|
||||
/// If `columns` is `None`, all columns are prewarmed.
|
||||
pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
|
||||
self.inner.prewarm_data(columns).await
|
||||
}
|
||||
|
||||
/// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
|
||||
/// are not fully indexed within the timeout.
|
||||
pub async fn wait_for_index(
|
||||
@@ -2290,6 +2318,12 @@ impl BaseTable for NativeTable {
|
||||
Ok(dataset.prewarm_index(index_name).await?)
|
||||
}
|
||||
|
||||
async fn prewarm_data(&self, _columns: Option<Vec<String>>) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "prewarm_data is currently only supported on remote tables.".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
|
||||
// Delegate to the submodule implementation
|
||||
update::execute_update(self, update).await
|
||||
|
||||
Reference in New Issue
Block a user