mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-04 11:30:46 +00:00
feat(refresh): batch_size is a per-refresh knob (refresh_column), not a function-only option
batch_size / num_workers / max_workers are invocation concerns (how to schedule THIS refresh), so expose batch_size on refresh_column through every layer (Python sync+async -> pyo3 -> Rust client -> the REST RefreshColumnRequest.batch_size, which the handler already forwards into the backfill). num_workers/max_workers were already invocation- placed; batch_size was the gap. The function may still carry a default; the refresh override wins (extends the batch_size_override model). Both crates cargo-check clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2338,6 +2338,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
where_clause: Option<String>,
|
||||
num_workers: Option<u32>,
|
||||
max_workers: Option<u32>,
|
||||
batch_size: Option<u32>,
|
||||
) -> Result<String> {
|
||||
let mut body = serde_json::json!({ "columns": columns });
|
||||
if let Some(w) = where_clause {
|
||||
@@ -2349,6 +2350,9 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
if let Some(n) = max_workers {
|
||||
body["max_workers"] = n.into();
|
||||
}
|
||||
if let Some(n) = batch_size {
|
||||
body["batch_size"] = n.into();
|
||||
}
|
||||
let request = self
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/refresh_column", self.identifier))
|
||||
@@ -2873,7 +2877,7 @@ mod tests {
|
||||
});
|
||||
|
||||
let job_id = table
|
||||
.refresh_column(&["vec".to_string()], None, Some(2), None)
|
||||
.refresh_column(&["vec".to_string()], None, Some(2), None, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(job_id, "j-9");
|
||||
|
||||
@@ -646,6 +646,7 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
_where_clause: Option<String>,
|
||||
_num_workers: Option<u32>,
|
||||
_max_workers: Option<u32>,
|
||||
_batch_size: Option<u32>,
|
||||
) -> Result<String> {
|
||||
Err(Error::NotSupported {
|
||||
message: "refresh_column is not supported by this table".into(),
|
||||
@@ -1513,9 +1514,10 @@ impl Table {
|
||||
where_clause: Option<String>,
|
||||
num_workers: Option<u32>,
|
||||
max_workers: Option<u32>,
|
||||
batch_size: Option<u32>,
|
||||
) -> Result<String> {
|
||||
self.inner
|
||||
.refresh_column(columns, where_clause, num_workers, max_workers)
|
||||
.refresh_column(columns, where_clause, num_workers, max_workers, batch_size)
|
||||
.await
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user