From 78aa00509344d55cd806e091ba8b2a04fb26d743 Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Tue, 16 Jun 2026 08:27:23 -0700 Subject: [PATCH] client: slice 3 -- thread table_lineage through the remote client + pyo3 A new Database::table_lineage(TableLineageRequest) -> Result threaded end to end: default not_supported in the trait; the remote impl issues GET /v1/table/{name}/lineage with column/direction/depth query params and returns the body verbatim; connection.rs exposes a pub wrapper; the pyo3 binding hands the JSON string to Python. The lineage payload is carried as opaque JSON on purpose: the open-source lancedb client must not depend on the sophon-internal derived_jobs crate that defines the lineage schema, so the wire format is the contract and the Python layer deserializes it. Co-Authored-By: Claude Opus 4.8 (1M context) --- python/src/connection.rs | 25 ++++++++++++++++++++++++- rust/lancedb/src/connection.rs | 8 +++++++- rust/lancedb/src/database.rs | 20 ++++++++++++++++++++ rust/lancedb/src/remote/db.rs | 22 +++++++++++++++++++++- 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/python/src/connection.rs b/python/src/connection.rs index 191fee077..2335c4f6e 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -20,7 +20,7 @@ use lancedb::{ database::namespace::LanceNamespaceDatabase, database::{ CreateFunctionRequest, CreateMaterializedViewRequest, CreateTableMode, Database, - ReadConsistency, RefreshMaterializedViewRequest, + ReadConsistency, RefreshMaterializedViewRequest, TableLineageRequest, }, }; use pyo3::{ @@ -461,6 +461,29 @@ impl Connection { }) } + /// Derived-compute lineage of a table/view (or column), returned as the + /// server's lineage JSON string (the Python layer parses it). + pub fn table_lineage( + self_: PyRef<'_, Self>, + name: String, + column: Option, + direction: Option, + depth: Option, + ) -> PyResult> { + let inner = self_.get_inner()?.clone(); + future_into_py(self_.py(), async move { + inner + .table_lineage(TableLineageRequest { + name, + column, + direction, + depth, + }) + .await + .infer_error() + }) + } + #[pyo3(signature = (name, full=false, src_version=None))] pub fn explain_refresh_materialized_view( self_: PyRef<'_, Self>, diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 43bd1a417..58128c023 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -25,7 +25,7 @@ use crate::database::listing::ListingDatabase; use crate::database::{ CloneTableRequest, CreateFunctionRequest, CreateMaterializedViewRequest, Database, DatabaseOptions, FunctionInfo, JobInfo, MaterializedViewInfo, MvRefreshPlan, OpenTableRequest, - ReadConsistency, RefreshMaterializedViewRequest, TableNamesRequest, + ReadConsistency, RefreshMaterializedViewRequest, TableLineageRequest, TableNamesRequest, }; use crate::embeddings::{EmbeddingRegistry, MemoryRegistry}; use crate::error::{Error, Result}; @@ -525,6 +525,12 @@ impl Connection { self.internal.refresh_materialized_view(request).await } + /// Derived-compute lineage of a table/view (or column), as server-defined + /// JSON. Read-only. + pub async fn table_lineage(&self, request: TableLineageRequest) -> Result { + self.internal.table_lineage(request).await + } + /// Plan a materialized-view refresh without submitting work /// (EXPLAIN REFRESH). pub async fn explain_refresh_materialized_view( diff --git a/rust/lancedb/src/database.rs b/rust/lancedb/src/database.rs index 64215f982..6907aefd3 100644 --- a/rust/lancedb/src/database.rs +++ b/rust/lancedb/src/database.rs @@ -280,6 +280,21 @@ pub struct RefreshMaterializedViewRequest { pub max_workers: Option, } +/// A request for the derived-compute lineage of a table/view (or one of its +/// columns). The response is server-defined lineage JSON, returned opaque so +/// this client need not model the server's lineage schema. +#[derive(Debug, Clone, Default)] +pub struct TableLineageRequest { + /// Table or view name. + pub name: String, + /// Column for column-level lineage; whole table/view when absent. + pub column: Option, + /// "upstream" | "downstream" | "both" (server default when absent). + pub direction: Option, + /// Column-hops to walk; transitive when absent. + pub depth: Option, +} + impl RefreshMaterializedViewRequest { pub fn new(name: impl Into) -> Self { Self { @@ -427,6 +442,11 @@ pub trait Database: ) -> Result { not_supported("refresh_materialized_view") } + /// Derived-compute lineage of a table/view (or column), as server-defined + /// JSON. Read-only. + async fn table_lineage(&self, _request: TableLineageRequest) -> Result { + not_supported("table_lineage") + } /// Plan a materialized-view refresh without submitting work /// (EXPLAIN REFRESH). `full` plans a full rebuild (incremental /// planning requires stable row IDs on the source). diff --git a/rust/lancedb/src/remote/db.rs b/rust/lancedb/src/remote/db.rs index 46cfd3089..ce9760391 100644 --- a/rust/lancedb/src/remote/db.rs +++ b/rust/lancedb/src/remote/db.rs @@ -22,7 +22,7 @@ use crate::database::{ CloneTableRequest, CreateFunctionRequest, CreateMaterializedViewRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, FunctionInfo, JobInfo, MaterializedViewInfo, MvRefreshPlan, OpenTableRequest, ReadConsistency, RefreshMaterializedViewRequest, - TableNamesRequest, + TableLineageRequest, TableNamesRequest, }; use crate::error::Result; use crate::remote::util::stream_as_body; @@ -858,6 +858,26 @@ impl Database for RemoteDatabase { Ok(body.job_id) } + async fn table_lineage(&self, request: TableLineageRequest) -> Result { + let mut req = self + .client + .get(&format!("/v1/table/{}/lineage", request.name)); + if let Some(column) = &request.column { + req = req.query(&[("column", column)]); + } + if let Some(direction) = &request.direction { + req = req.query(&[("direction", direction)]); + } + if let Some(depth) = request.depth { + req = req.query(&[("depth", depth.to_string())]); + } + let (request_id, rsp) = self.client.send(req).await?; + let rsp = self.client.check_response(&request_id, rsp).await?; + // Server-defined lineage JSON, returned opaque (the client does not + // model the lineage schema; the Python layer deserializes it). + rsp.text().await.err_to_http(request_id) + } + async fn explain_refresh_materialized_view( &self, name: &str,