feat: SDK surface for functions, materialized views, jobs, refresh_column

Adds the derived-compute interface to the SDK:

- Database trait: create/list/drop_function, create/refresh/alter/
  drop/list_materialized_view, list_jobs -- default implementations
  return Error::NotSupported (NotImplementedError in python), so
  existing Database impls are unaffected; local single-node
  implementations are planned. BaseTable gains refresh_column with
  the same default.
- RemoteDatabase/RemoteTable implement them against the server REST
  routes (/v1/function/*, /v1/materialized_view/*, /v1/job/list,
  /v1/table/{id}/refresh_column), with mock-HTTP unit tests.
- Connection/Table public methods, pyo3 bindings (FunctionInfo,
  MaterializedViewInfo, JobInfo pyclasses), and python wrappers:
  sync on the DBConnection base (shared by local and remote
  connections), async on AsyncConnection; refresh_column on
  LanceTable, RemoteTable, and AsyncTable.
This commit is contained in:
Wyatt Alt
2026-06-12 10:00:07 -07:00
committed by Jack Ye
parent 10fecdf051
commit ff3c7111b9
11 changed files with 1182 additions and 6 deletions

View File

@@ -563,6 +563,101 @@ class DBConnection(EnforceOverrides):
raise NotImplementedError("serialize is not supported for this connection type")
# -- Derived compute: functions, materialized views, jobs -------------
# Server-backed features (LanceDB Enterprise / Cloud); local
# connections raise NotImplementedError for now.
def create_function(
self,
name: str,
language: str,
return_type: str,
body: str,
options: Optional[Dict[str, str]] = None,
):
"""Register a UDF (CREATE FUNCTION).
Parameters
----------
name: str
Function name.
language: str
Implementation language (currently "python").
return_type: str
SQL return type, e.g. "FLOAT", "FLOAT[1536]",
"STRUCT(a FLOAT, b VARCHAR)", "TABLE(chunk VARCHAR, idx INT)".
body: str
Function body: source text, or base64 cloudpickle bytes when
options["body_format"] == "cloudpickle".
options: dict, optional
input_columns, pip, num_gpus, batch_size, timeout,
error_policy, docker_image, body_format, ...
"""
LOOP.run(self._conn.create_function(name, language, return_type, body, options))
def list_functions(self):
"""List registered functions (SHOW FUNCTIONS)."""
return LOOP.run(self._conn.list_functions())
def drop_function(self, name: str):
"""Drop a registered function (DROP FUNCTION)."""
LOOP.run(self._conn.drop_function(name))
def create_materialized_view(
self,
name: str,
query: str,
*,
auto_refresh: bool = False,
with_no_data: bool = False,
) -> Optional[str]:
"""Create a materialized view (CREATE MATERIALIZED VIEW).
`query` is the view's SELECT statement, e.g.
"SELECT id, embed(body) AS vec FROM articles WHERE id > 1".
Returns the initial-population job id, or None when
with_no_data=True.
"""
return LOOP.run(
self._conn.create_materialized_view(
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
)
)
def refresh_materialized_view(
self,
name: str,
*,
src_version: Optional[int] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Refresh a materialized view; returns the refresh job id."""
return LOOP.run(
self._conn.refresh_materialized_view(
name,
src_version=src_version,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_materialized_view(self, name: str, *, auto_refresh: bool):
"""Update a materialized view's options (ALTER MATERIALIZED VIEW)."""
LOOP.run(self._conn.alter_materialized_view(name, auto_refresh=auto_refresh))
def drop_materialized_view(self, name: str):
"""Drop a materialized view definition (DROP MATERIALIZED VIEW)."""
LOOP.run(self._conn.drop_materialized_view(name))
def list_materialized_views(self):
"""List registered materialized view definitions."""
return LOOP.run(self._conn.list_materialized_views())
def list_jobs(self):
"""List inflight server-side jobs across the database's tables."""
return LOOP.run(self._conn.list_jobs())
class LanceDBConnection(DBConnection):
"""
A connection to a LanceDB database.
@@ -1787,6 +1882,75 @@ class AsyncConnection(object):
)
return AsyncTable(table)
# -- Derived compute: functions, materialized views, jobs -------------
# Server-backed features (LanceDB Enterprise / Cloud); local
# connections raise NotImplementedError for now.
async def create_function(
self,
name: str,
language: str,
return_type: str,
body: str,
options: Optional[Dict[str, str]] = None,
):
"""Register a UDF (CREATE FUNCTION)."""
await self._inner.create_function(name, language, return_type, body, options)
async def list_functions(self):
"""List registered functions (SHOW FUNCTIONS)."""
return await self._inner.list_functions()
async def drop_function(self, name: str):
"""Drop a registered function (DROP FUNCTION)."""
await self._inner.drop_function(name)
async def create_materialized_view(
self,
name: str,
query: str,
*,
auto_refresh: bool = False,
with_no_data: bool = False,
) -> Optional[str]:
"""Create a materialized view; returns the initial-population
job id, or None when with_no_data=True."""
return await self._inner.create_materialized_view(
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
)
async def refresh_materialized_view(
self,
name: str,
*,
src_version: Optional[int] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Refresh a materialized view; returns the refresh job id."""
return await self._inner.refresh_materialized_view(
name,
src_version=src_version,
num_workers=num_workers,
max_workers=max_workers,
)
async def alter_materialized_view(self, name: str, *, auto_refresh: bool):
"""Update a materialized view's options."""
await self._inner.alter_materialized_view(name, auto_refresh)
async def drop_materialized_view(self, name: str):
"""Drop a materialized view definition."""
await self._inner.drop_materialized_view(name)
async def list_materialized_views(self):
"""List registered materialized view definitions."""
return await self._inner.list_materialized_views()
async def list_jobs(self):
"""List inflight server-side jobs across the database's tables."""
return await self._inner.list_jobs()
async def rename_table(
self,
cur_name: str,

View File

@@ -887,6 +887,33 @@ class RemoteTable(Table):
def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
The expression is resolved server-side from each column's stored
binding; columns bound to the same struct-returning function
refresh together. Returns the refresh job id. Server-backed
feature (LanceDB Enterprise / Cloud).
"""
if isinstance(columns, str):
columns = [columns]
return LOOP.run(
self._table.refresh_column(
list(columns),
where=where,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_columns(
self, *alterations: Iterable[Dict[str, str]]
) -> AlterColumnsResult:

View File

@@ -3714,6 +3714,33 @@ class LanceTable(Table):
) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
The expression is resolved server-side from each column's stored
binding; columns bound to the same struct-returning function
refresh together. Returns the refresh job id. Server-backed
feature (LanceDB Enterprise / Cloud).
"""
if isinstance(columns, str):
columns = [columns]
return LOOP.run(
self._table.refresh_column(
list(columns),
where=where,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_columns(
self, *alterations: Iterable[Dict[str, str]]
) -> AlterColumnsResult:
@@ -5390,6 +5417,25 @@ class AsyncTable:
return await self._inner.update(updates_sql, where)
async def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
Returns the refresh job id. Server-backed feature."""
if isinstance(columns, str):
columns = [columns]
return await self._inner.refresh_column(
list(columns),
where_clause=where,
num_workers=num_workers,
max_workers=max_workers,
)
async def add_columns(
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
) -> AddColumnsResult:

View File

@@ -18,7 +18,10 @@ use lancedb::{
connection::Connection as LanceConnection,
connection::NamespaceClientPushdownOperation,
database::namespace::LanceNamespaceDatabase,
database::{CreateTableMode, Database, ReadConsistency},
database::{
CreateFunctionRequest, CreateMaterializedViewRequest, CreateTableMode, Database,
ReadConsistency, RefreshMaterializedViewRequest,
},
};
use pyo3::{
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
@@ -27,6 +30,46 @@ use pyo3::{
types::{PyDict, PyDictMethods},
};
/// A registered function, as returned by `list_functions`.
#[pyclass(get_all)]
#[derive(Clone)]
pub struct FunctionInfo {
pub name: String,
pub language: String,
pub return_type: String,
pub description: String,
}
/// A registered materialized view definition.
#[pyclass(get_all)]
#[derive(Clone)]
pub struct MaterializedViewInfo {
pub name: String,
pub source_table: String,
pub projection: Vec<String>,
pub udf_columns: Vec<String>,
pub filter: Option<String>,
pub auto_refresh: bool,
}
/// One inflight server-side job.
#[pyclass(get_all)]
#[derive(Clone)]
pub struct JobInfo {
pub table: String,
pub job_id: String,
pub job_type: String,
pub state: String,
pub column: Option<String>,
pub age_seconds: Option<i64>,
pub command: Option<String>,
pub units_done: Option<i64>,
pub units_total: Option<i64>,
pub committed: bool,
pub rows_skipped: u64,
pub error: Option<String>,
}
#[pyclass]
pub struct Connection {
inner: Option<LanceConnection>,
@@ -310,6 +353,163 @@ impl Connection {
})
}
#[pyo3(signature = (name, language, return_type, body, options=None))]
pub fn create_function(
self_: PyRef<'_, Self>,
name: String,
language: String,
return_type: String,
body: String,
options: Option<HashMap<String, String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner
.create_function(CreateFunctionRequest {
name,
language,
return_type,
body,
options: options.unwrap_or_default(),
})
.await
.infer_error()
})
}
pub fn list_functions(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
let functions = inner.list_functions().await.infer_error()?;
Ok(functions
.into_iter()
.map(|f| FunctionInfo {
name: f.name,
language: f.language,
return_type: f.return_type,
description: f.description,
})
.collect::<Vec<_>>())
})
}
pub fn drop_function(self_: PyRef<'_, Self>, name: String) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner.drop_function(&name).await.infer_error()
})
}
#[pyo3(signature = (name, query, auto_refresh=false, with_no_data=false))]
pub fn create_materialized_view(
self_: PyRef<'_, Self>,
name: String,
query: String,
auto_refresh: bool,
with_no_data: bool,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner
.create_materialized_view(CreateMaterializedViewRequest {
name,
query,
auto_refresh,
with_no_data,
})
.await
.infer_error()
})
}
#[pyo3(signature = (name, src_version=None, num_workers=None, max_workers=None))]
pub fn refresh_materialized_view(
self_: PyRef<'_, Self>,
name: String,
src_version: Option<u64>,
num_workers: Option<u32>,
max_workers: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner
.refresh_materialized_view(RefreshMaterializedViewRequest {
name,
src_version,
num_workers,
max_workers,
})
.await
.infer_error()
})
}
pub fn alter_materialized_view(
self_: PyRef<'_, Self>,
name: String,
auto_refresh: bool,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner
.alter_materialized_view(&name, auto_refresh)
.await
.infer_error()
})
}
pub fn drop_materialized_view(
self_: PyRef<'_, Self>,
name: String,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner.drop_materialized_view(&name).await.infer_error()
})
}
pub fn list_materialized_views(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
let views = inner.list_materialized_views().await.infer_error()?;
Ok(views
.into_iter()
.map(|v| MaterializedViewInfo {
name: v.name,
source_table: v.source_table,
projection: v.projection,
udf_columns: v.udf_columns,
filter: v.filter,
auto_refresh: v.auto_refresh,
})
.collect::<Vec<_>>())
})
}
pub fn list_jobs(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
let jobs = inner.list_jobs().await.infer_error()?;
Ok(jobs
.into_iter()
.map(|j| JobInfo {
table: j.table,
job_id: j.job_id,
job_type: j.job_type,
state: j.state,
column: j.column,
age_seconds: j.age_seconds,
command: j.command,
units_done: j.units_done,
units_total: j.units_total,
committed: j.committed,
rows_skipped: j.rows_skipped,
error: j.error,
})
.collect::<Vec<_>>())
})
}
#[pyo3(signature = (cur_name, new_name, cur_namespace_path=None, new_namespace_path=None))]
pub fn rename_table(
self_: PyRef<'_, Self>,

View File

@@ -41,6 +41,9 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
.write_style("LANCEDB_LOG_STYLE");
env_logger::init_from_env(env);
m.add_class::<Connection>()?;
m.add_class::<connection::FunctionInfo>()?;
m.add_class::<connection::MaterializedViewInfo>()?;
m.add_class::<connection::JobInfo>()?;
m.add_class::<Session>()?;
m.add_class::<Table>()?;
m.add_class::<IndexConfig>()?;

View File

@@ -1060,6 +1060,23 @@ impl Table {
})
}
#[pyo3(signature = (columns, where_clause=None, num_workers=None, max_workers=None))]
pub fn refresh_column(
self_: PyRef<'_, Self>,
columns: Vec<String>,
where_clause: Option<String>,
num_workers: Option<u32>,
max_workers: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner
.refresh_column(&columns, where_clause, num_workers, max_workers)
.await
.infer_error()
})
}
pub fn add_columns(
self_: PyRef<'_, Self>,
definitions: Vec<(String, String)>,