mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-01 10:00:42 +00:00
feat(refresh): batch_size is a per-refresh knob (refresh_column), not a function-only option
batch_size / num_workers / max_workers are invocation concerns (how to schedule THIS refresh), so expose batch_size on refresh_column through every layer (Python sync+async -> pyo3 -> Rust client -> the REST RefreshColumnRequest.batch_size, which the handler already forwards into the backfill). num_workers/max_workers were already invocation- placed; batch_size was the gap. The function may still carry a default; the refresh override wins (extends the batch_size_override model). Both crates cargo-check clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3808,6 +3808,7 @@ class LanceTable(Table):
|
||||
where: Optional[str] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Trigger recompute of computed columns (REFRESH COLUMN).
|
||||
|
||||
@@ -3815,6 +3816,10 @@ class LanceTable(Table):
|
||||
binding; columns bound to the same struct-returning function
|
||||
refresh together. Returns the refresh job id. Server-backed
|
||||
feature (LanceDB Enterprise / Cloud).
|
||||
|
||||
num_workers / max_workers / batch_size are per-refresh scheduling
|
||||
knobs (how to run THIS refresh) and override any default the
|
||||
function carries.
|
||||
"""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
@@ -3824,6 +3829,7 @@ class LanceTable(Table):
|
||||
where=where,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -5511,9 +5517,14 @@ class AsyncTable:
|
||||
where: Optional[str] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
batch_size: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Trigger recompute of computed columns (REFRESH COLUMN).
|
||||
Returns the refresh job id. Server-backed feature."""
|
||||
Returns the refresh job id. Server-backed feature.
|
||||
|
||||
num_workers / max_workers / batch_size are per-refresh scheduling
|
||||
knobs (how to run THIS refresh); they override any default the
|
||||
function carries."""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
return await self._inner.refresh_column(
|
||||
@@ -5521,6 +5532,7 @@ class AsyncTable:
|
||||
where_clause=where,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
async def add_columns(
|
||||
|
||||
@@ -1074,18 +1074,19 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (columns, where_clause=None, num_workers=None, max_workers=None))]
|
||||
#[pyo3(signature = (columns, where_clause=None, num_workers=None, max_workers=None, batch_size=None))]
|
||||
pub fn refresh_column(
|
||||
self_: PyRef<'_, Self>,
|
||||
columns: Vec<String>,
|
||||
where_clause: Option<String>,
|
||||
num_workers: Option<u32>,
|
||||
max_workers: Option<u32>,
|
||||
batch_size: Option<u32>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner
|
||||
.refresh_column(&columns, where_clause, num_workers, max_workers)
|
||||
.refresh_column(&columns, where_clause, num_workers, max_workers, batch_size)
|
||||
.await
|
||||
.infer_error()
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user