From 8b38500b07aebda69fded4620626ec076ecf4bc1 Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Sun, 14 Jun 2026 13:56:08 -0700 Subject: [PATCH] feat(view): full=True force-rebuild on refresh_materialized_view View.refresh(full=True) (sync + async) now works -- it previously raised NotImplementedError. Thread the flag through the client: RefreshMaterialized- ViewRequest.full -> the REST body (RemoteRefreshMaterializedViewRequest.full); pyo3 refresh_materialized_view(full=...); Connection.refresh_materialized_view( name, full=) sync + async. A full refresh forces a recompute-and-replace and preserves the view's indexes (reindexed by the distributed indexer). Co-Authored-By: Claude Opus 4.8 (1M context) --- python/python/lancedb/db.py | 18 +++++++++++++++--- python/python/lancedb/udf.py | 27 +++++++++++++-------------- python/src/connection.rs | 4 +++- rust/lancedb/src/database.rs | 4 ++++ rust/lancedb/src/remote/db.rs | 3 +++ 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index 080eb8aed..070842670 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -562,7 +562,6 @@ class DBConnection(EnforceOverrides): """ raise NotImplementedError("serialize is not supported for this connection type") - # -- Derived compute: functions, materialized views, jobs ------------- # Server-backed features (LanceDB Enterprise / Cloud); local # connections raise NotImplementedError for now. @@ -711,14 +710,20 @@ class DBConnection(EnforceOverrides): self, name: str, *, + full: bool = False, src_version: Optional[int] = None, num_workers: Optional[int] = None, max_workers: Optional[int] = None, ) -> str: - """Refresh a materialized view; returns the refresh job id.""" + """Refresh a materialized view; returns the refresh job id. + + ``full=True`` forces a full rebuild (recompute and replace every row) + instead of the default incremental refresh. + """ return LOOP.run( self._conn.refresh_materialized_view( name, + full=full, src_version=src_version, num_workers=num_workers, max_workers=max_workers, @@ -767,6 +772,7 @@ class DBConnection(EnforceOverrides): """ return LOOP.run(self._conn.cancel_job(job_id)) + class LanceDBConnection(DBConnection): """ A connection to a LanceDB database. @@ -2093,13 +2099,19 @@ class AsyncConnection(object): self, name: str, *, + full: bool = False, src_version: Optional[int] = None, num_workers: Optional[int] = None, max_workers: Optional[int] = None, ) -> str: - """Refresh a materialized view; returns the refresh job id.""" + """Refresh a materialized view; returns the refresh job id. + + ``full=True`` forces a full rebuild (recompute and replace every row) + instead of the default incremental refresh. + """ return await self._inner.refresh_materialized_view( name, + full=full, src_version=src_version, num_workers=num_workers, max_workers=max_workers, diff --git a/python/python/lancedb/udf.py b/python/python/lancedb/udf.py index 44417fc57..f392df293 100644 --- a/python/python/lancedb/udf.py +++ b/python/python/lancedb/udf.py @@ -499,14 +499,13 @@ class View: self.name = name def refresh(self, full: bool = False): - if full: - # full/force-rebuild is not honored on any surface yet (the - # refresh event carries no `full` flag) -- do not pretend. - raise NotImplementedError( - "full=True refresh is not supported yet (engine gap: the " - "refresh event has no full-rebuild flag)" - ) - return self.conn.refresh_materialized_view(self.name) + """Refresh the materialized view; returns the refresh job id. + + ``full=True`` forces a full rebuild (recompute and replace every row) + instead of the default incremental refresh. A full rebuild preserves + the view's indexes -- they are reindexed by the distributed indexer. + """ + return self.conn.refresh_materialized_view(self.name, full=full) def explain_refresh(self, full: bool = False): """Plan a refresh without running it (EXPLAIN REFRESH).""" @@ -616,12 +615,12 @@ class AsyncView: self.name = name async def refresh(self, full: bool = False): - if full: - raise NotImplementedError( - "full=True refresh is not supported yet (engine gap: the " - "refresh event has no full-rebuild flag)" - ) - return await self.conn.refresh_materialized_view(self.name) + """Refresh the materialized view; returns the refresh job id. + + ``full=True`` forces a full rebuild instead of an incremental refresh + (indexes are preserved and reindexed by the distributed indexer). + """ + return await self.conn.refresh_materialized_view(self.name, full=full) async def explain_refresh(self, full: bool = False): return await self.conn.explain_refresh_materialized_view(self.name, full=full) diff --git a/python/src/connection.rs b/python/src/connection.rs index 3b34060f4..191fee077 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -437,10 +437,11 @@ impl Connection { }) } - #[pyo3(signature = (name, src_version=None, num_workers=None, max_workers=None))] + #[pyo3(signature = (name, full=false, src_version=None, num_workers=None, max_workers=None))] pub fn refresh_materialized_view( self_: PyRef<'_, Self>, name: String, + full: bool, src_version: Option, num_workers: Option, max_workers: Option, @@ -450,6 +451,7 @@ impl Connection { inner .refresh_materialized_view(RefreshMaterializedViewRequest { name, + full, src_version, num_workers, max_workers, diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index ccf756532..64215f982 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -269,6 +269,9 @@ impl CreateMaterializedViewRequest { pub struct RefreshMaterializedViewRequest { /// View name. pub name: String, + /// Force a full rebuild (recompute and replace every row) instead of the + /// default incremental refresh. + pub full: bool, /// Pin the refresh to a source-table version; latest when absent. pub src_version: Option, /// Initial worker count. @@ -281,6 +284,7 @@ impl RefreshMaterializedViewRequest { pub fn new(name: impl Into) -> Self { Self { name: name.into(), + full: false, src_version: None, num_workers: None, max_workers: None, diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 19b686915..46cfd3089 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -76,6 +76,8 @@ struct RemoteCreateMaterializedViewResponse { #[derive(serde::Serialize)] struct RemoteRefreshMaterializedViewRequest { + #[serde(skip_serializing_if = "std::ops::Not::not")] + full: bool, #[serde(skip_serializing_if = "Option::is_none")] src_version: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -840,6 +842,7 @@ impl Database for RemoteDatabase { request: RefreshMaterializedViewRequest, ) -> Result { let body = RemoteRefreshMaterializedViewRequest { + full: request.full, src_version: request.src_version, num_workers: request.num_workers, max_workers: request.max_workers,