feat(refresh): batch_size is a per-refresh knob (refresh_column), not a function-only option

batch_size / num_workers / max_workers are invocation concerns (how to schedule THIS
refresh), so expose batch_size on refresh_column through every layer (Python sync+async
-> pyo3 -> Rust client -> the REST RefreshColumnRequest.batch_size, which the handler
already forwards into the backfill). num_workers/max_workers were already invocation-
placed; batch_size was the gap. The function may still carry a default; the refresh
override wins (extends the batch_size_override model). Both crates cargo-check clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Wyatt Alt
2026-06-14 06:30:08 -07:00
committed by Jack Ye
parent fbe6a5a3fd
commit d4f4fef3ba
4 changed files with 24 additions and 5 deletions

View File

@@ -3808,6 +3808,7 @@ class LanceTable(Table):
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
batch_size: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
@@ -3815,6 +3816,10 @@ class LanceTable(Table):
binding; columns bound to the same struct-returning function
refresh together. Returns the refresh job id. Server-backed
feature (LanceDB Enterprise / Cloud).
num_workers / max_workers / batch_size are per-refresh scheduling
knobs (how to run THIS refresh) and override any default the
function carries.
"""
if isinstance(columns, str):
columns = [columns]
@@ -3824,6 +3829,7 @@ class LanceTable(Table):
where=where,
num_workers=num_workers,
max_workers=max_workers,
batch_size=batch_size,
)
)
@@ -5511,9 +5517,14 @@ class AsyncTable:
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
batch_size: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
Returns the refresh job id. Server-backed feature."""
Returns the refresh job id. Server-backed feature.
num_workers / max_workers / batch_size are per-refresh scheduling
knobs (how to run THIS refresh); they override any default the
function carries."""
if isinstance(columns, str):
columns = [columns]
return await self._inner.refresh_column(
@@ -5521,6 +5532,7 @@ class AsyncTable:
where_clause=where,
num_workers=num_workers,
max_workers=max_workers,
batch_size=batch_size,
)
async def add_columns(