diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index c3c683319..8884ffc24 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -642,6 +642,23 @@ class DBConnection(EnforceOverrides): ) ) + def explain_refresh_materialized_view( + self, + name: str, + *, + full: bool = False, + src_version: Optional[int] = None, + ): + """Plan a refresh without running it (EXPLAIN REFRESH). Returns a + plan with .has_work / .source_version / .last_refreshed_version / + .full_refresh / .rebuild / .units_total. `full=True` plans a full + rebuild (incremental planning needs stable row IDs on the source).""" + return LOOP.run( + self._conn.explain_refresh_materialized_view( + name, full=full, src_version=src_version + ) + ) + def alter_materialized_view(self, name: str, *, auto_refresh: bool): """Update a materialized view's options (ALTER MATERIALIZED VIEW).""" LOOP.run(self._conn.alter_materialized_view(name, auto_refresh=auto_refresh)) @@ -1944,6 +1961,18 @@ class AsyncConnection(object): max_workers=max_workers, ) + async def explain_refresh_materialized_view( + self, + name: str, + *, + full: bool = False, + src_version: Optional[int] = None, + ): + """Plan a refresh without running it (EXPLAIN REFRESH).""" + return await self._inner.explain_refresh_materialized_view( + name, full=full, src_version=src_version + ) + async def alter_materialized_view(self, name: str, *, auto_refresh: bool): """Update a materialized view's options.""" await self._inner.alter_materialized_view(name, auto_refresh) diff --git a/python/src/connection.rs b/python/src/connection.rs index 42bbdfd7e..01a1b741d 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -70,6 +70,19 @@ pub struct JobInfo { pub error: Option, } +/// The plan a REFRESH MATERIALIZED VIEW would execute (EXPLAIN REFRESH). +#[pyclass(get_all)] +#[derive(Clone)] +pub struct MvRefreshPlan { + pub table_name: String, + pub has_work: bool, + pub source_version: u64, + pub last_refreshed_version: Option, + pub full_refresh: bool, + pub rebuild: bool, + pub units_total: u64, +} + #[pyclass] pub struct Connection { inner: Option, @@ -444,6 +457,31 @@ impl Connection { }) } + #[pyo3(signature = (name, full=false, src_version=None))] + pub fn explain_refresh_materialized_view( + self_: PyRef<'_, Self>, + name: String, + full: bool, + src_version: Option, + ) -> PyResult> { + let inner = self_.get_inner()?.clone(); + future_into_py(self_.py(), async move { + let p = inner + .explain_refresh_materialized_view(&name, full, src_version) + .await + .infer_error()?; + Ok(MvRefreshPlan { + table_name: p.table_name, + has_work: p.has_work, + source_version: p.source_version, + last_refreshed_version: p.last_refreshed_version, + full_refresh: p.full_refresh, + rebuild: p.rebuild, + units_total: p.units_total, + }) + }) + } + pub fn alter_materialized_view( self_: PyRef<'_, Self>, name: String, diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 7af407d74..43bd1a417 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -24,7 +24,7 @@ use crate::data::scannable::Scannable; use crate::database::listing::ListingDatabase; use crate::database::{ CloneTableRequest, CreateFunctionRequest, CreateMaterializedViewRequest, Database, - DatabaseOptions, FunctionInfo, JobInfo, MaterializedViewInfo, OpenTableRequest, + DatabaseOptions, FunctionInfo, JobInfo, MaterializedViewInfo, MvRefreshPlan, OpenTableRequest, ReadConsistency, RefreshMaterializedViewRequest, TableNamesRequest, }; use crate::embeddings::{EmbeddingRegistry, MemoryRegistry}; @@ -525,6 +525,19 @@ impl Connection { self.internal.refresh_materialized_view(request).await } + /// Plan a materialized-view refresh without submitting work + /// (EXPLAIN REFRESH). + pub async fn explain_refresh_materialized_view( + &self, + name: &str, + full: bool, + src_version: Option, + ) -> Result { + self.internal + .explain_refresh_materialized_view(name, full, src_version) + .await + } + /// Update a materialized view's options (ALTER MATERIALIZED VIEW). pub async fn alter_materialized_view(&self, name: &str, auto_refresh: bool) -> Result<()> { self.internal diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index 2d20bb283..843c8eb43 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -317,6 +317,22 @@ pub struct JobInfo { pub error: Option, } +/// The plan a `REFRESH MATERIALIZED VIEW` would execute, as returned by +/// `explain_refresh_materialized_view` (EXPLAIN REFRESH). No work is run. +#[derive(Debug, Clone)] +pub struct MvRefreshPlan { + pub table_name: String, + /// Whether a refresh would do anything (rebuild or non-empty units). + pub has_work: bool, + pub source_version: u64, + pub last_refreshed_version: Option, + pub full_refresh: bool, + /// Source changed non-append-only since the last refresh -> rebuild. + pub rebuild: bool, + /// Number of row-range work units the refresh would process. + pub units_total: u64, +} + fn not_supported(what: &str) -> Result { Err(Error::NotSupported { message: format!("{} is not supported by this database", what), @@ -402,6 +418,17 @@ pub trait Database: ) -> Result { not_supported("refresh_materialized_view") } + /// Plan a materialized-view refresh without submitting work + /// (EXPLAIN REFRESH). `full` plans a full rebuild (incremental + /// planning requires stable row IDs on the source). + async fn explain_refresh_materialized_view( + &self, + _name: &str, + _full: bool, + _src_version: Option, + ) -> Result { + not_supported("explain_refresh_materialized_view") + } /// Update a materialized view's options (ALTER MATERIALIZED VIEW). async fn alter_materialized_view(&self, _name: &str, _auto_refresh: bool) -> Result<()> { not_supported("alter_materialized_view") diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index c2234b663..8abdab8f0 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -21,7 +21,8 @@ use crate::Error; use crate::database::{ CloneTableRequest, CreateFunctionRequest, CreateMaterializedViewRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, FunctionInfo, JobInfo, MaterializedViewInfo, - OpenTableRequest, ReadConsistency, RefreshMaterializedViewRequest, TableNamesRequest, + MvRefreshPlan, OpenTableRequest, ReadConsistency, RefreshMaterializedViewRequest, + TableNamesRequest, }; use crate::error::Result; use crate::remote::util::stream_as_body; @@ -86,6 +87,25 @@ struct RemoteRefreshMaterializedViewResponse { job_id: String, } +#[derive(serde::Serialize)] +struct RemoteExplainRefreshRequest { + #[serde(skip_serializing_if = "Option::is_none")] + full: Option, + #[serde(skip_serializing_if = "Option::is_none")] + src_version: Option, +} + +#[derive(serde::Deserialize)] +struct RemoteExplainRefreshResponse { + table_name: String, + has_work: bool, + source_version: u64, + last_refreshed_version: Option, + full_refresh: bool, + rebuild: bool, + units_total: u64, +} + #[derive(serde::Serialize)] struct RemoteAlterMaterializedViewRequest { auto_refresh: bool, @@ -832,6 +852,34 @@ impl Database for RemoteDatabase { Ok(body.job_id) } + async fn explain_refresh_materialized_view( + &self, + name: &str, + full: bool, + src_version: Option, + ) -> Result { + let body = RemoteExplainRefreshRequest { + full: Some(full), + src_version, + }; + let req = self + .client + .post(&format!("/v1/materialized_view/{}/explain_refresh", name)) + .json(&body); + let (request_id, rsp) = self.client.send(req).await?; + let rsp = self.client.check_response(&request_id, rsp).await?; + let body: RemoteExplainRefreshResponse = rsp.json().await.err_to_http(request_id)?; + Ok(MvRefreshPlan { + table_name: body.table_name, + has_work: body.has_work, + source_version: body.source_version, + last_refreshed_version: body.last_refreshed_version, + full_refresh: body.full_refresh, + rebuild: body.rebuild, + units_total: body.units_total, + }) + } + async fn alter_materialized_view(&self, name: &str, auto_refresh: bool) -> Result<()> { let req = self .client