feat: SDK surface for functions, materialized views, jobs, refresh_column

Adds the derived-compute interface to the SDK:

- Database trait: create/list/drop_function, create/refresh/alter/
  drop/list_materialized_view, list_jobs -- default implementations
  return Error::NotSupported (NotImplementedError in python), so
  existing Database impls are unaffected; local single-node
  implementations are planned. BaseTable gains refresh_column with
  the same default.
- RemoteDatabase/RemoteTable implement them against the server REST
  routes (/v1/function/*, /v1/materialized_view/*, /v1/job/list,
  /v1/table/{id}/refresh_column), with mock-HTTP unit tests.
- Connection/Table public methods, pyo3 bindings (FunctionInfo,
  MaterializedViewInfo, JobInfo pyclasses), and python wrappers:
  sync on the DBConnection base (shared by local and remote
  connections), async on AsyncConnection; refresh_column on
  LanceTable, RemoteTable, and AsyncTable.
This commit is contained in:
Wyatt Alt
2026-06-12 10:00:07 -07:00
committed by Jack Ye
parent 10fecdf051
commit ff3c7111b9
11 changed files with 1182 additions and 6 deletions

View File

@@ -563,6 +563,101 @@ class DBConnection(EnforceOverrides):
raise NotImplementedError("serialize is not supported for this connection type")
# -- Derived compute: functions, materialized views, jobs -------------
# Server-backed features (LanceDB Enterprise / Cloud); local
# connections raise NotImplementedError for now.
def create_function(
self,
name: str,
language: str,
return_type: str,
body: str,
options: Optional[Dict[str, str]] = None,
):
"""Register a UDF (CREATE FUNCTION).
Parameters
----------
name: str
Function name.
language: str
Implementation language (currently "python").
return_type: str
SQL return type, e.g. "FLOAT", "FLOAT[1536]",
"STRUCT(a FLOAT, b VARCHAR)", "TABLE(chunk VARCHAR, idx INT)".
body: str
Function body: source text, or base64 cloudpickle bytes when
options["body_format"] == "cloudpickle".
options: dict, optional
input_columns, pip, num_gpus, batch_size, timeout,
error_policy, docker_image, body_format, ...
"""
LOOP.run(self._conn.create_function(name, language, return_type, body, options))
def list_functions(self):
"""List registered functions (SHOW FUNCTIONS)."""
return LOOP.run(self._conn.list_functions())
def drop_function(self, name: str):
"""Drop a registered function (DROP FUNCTION)."""
LOOP.run(self._conn.drop_function(name))
def create_materialized_view(
self,
name: str,
query: str,
*,
auto_refresh: bool = False,
with_no_data: bool = False,
) -> Optional[str]:
"""Create a materialized view (CREATE MATERIALIZED VIEW).
`query` is the view's SELECT statement, e.g.
"SELECT id, embed(body) AS vec FROM articles WHERE id > 1".
Returns the initial-population job id, or None when
with_no_data=True.
"""
return LOOP.run(
self._conn.create_materialized_view(
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
)
)
def refresh_materialized_view(
self,
name: str,
*,
src_version: Optional[int] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Refresh a materialized view; returns the refresh job id."""
return LOOP.run(
self._conn.refresh_materialized_view(
name,
src_version=src_version,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_materialized_view(self, name: str, *, auto_refresh: bool):
"""Update a materialized view's options (ALTER MATERIALIZED VIEW)."""
LOOP.run(self._conn.alter_materialized_view(name, auto_refresh=auto_refresh))
def drop_materialized_view(self, name: str):
"""Drop a materialized view definition (DROP MATERIALIZED VIEW)."""
LOOP.run(self._conn.drop_materialized_view(name))
def list_materialized_views(self):
"""List registered materialized view definitions."""
return LOOP.run(self._conn.list_materialized_views())
def list_jobs(self):
"""List inflight server-side jobs across the database's tables."""
return LOOP.run(self._conn.list_jobs())
class LanceDBConnection(DBConnection):
"""
A connection to a LanceDB database.
@@ -1787,6 +1882,75 @@ class AsyncConnection(object):
)
return AsyncTable(table)
# -- Derived compute: functions, materialized views, jobs -------------
# Server-backed features (LanceDB Enterprise / Cloud); local
# connections raise NotImplementedError for now.
async def create_function(
self,
name: str,
language: str,
return_type: str,
body: str,
options: Optional[Dict[str, str]] = None,
):
"""Register a UDF (CREATE FUNCTION)."""
await self._inner.create_function(name, language, return_type, body, options)
async def list_functions(self):
"""List registered functions (SHOW FUNCTIONS)."""
return await self._inner.list_functions()
async def drop_function(self, name: str):
"""Drop a registered function (DROP FUNCTION)."""
await self._inner.drop_function(name)
async def create_materialized_view(
self,
name: str,
query: str,
*,
auto_refresh: bool = False,
with_no_data: bool = False,
) -> Optional[str]:
"""Create a materialized view; returns the initial-population
job id, or None when with_no_data=True."""
return await self._inner.create_materialized_view(
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
)
async def refresh_materialized_view(
self,
name: str,
*,
src_version: Optional[int] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Refresh a materialized view; returns the refresh job id."""
return await self._inner.refresh_materialized_view(
name,
src_version=src_version,
num_workers=num_workers,
max_workers=max_workers,
)
async def alter_materialized_view(self, name: str, *, auto_refresh: bool):
"""Update a materialized view's options."""
await self._inner.alter_materialized_view(name, auto_refresh)
async def drop_materialized_view(self, name: str):
"""Drop a materialized view definition."""
await self._inner.drop_materialized_view(name)
async def list_materialized_views(self):
"""List registered materialized view definitions."""
return await self._inner.list_materialized_views()
async def list_jobs(self):
"""List inflight server-side jobs across the database's tables."""
return await self._inner.list_jobs()
async def rename_table(
self,
cur_name: str,

View File

@@ -887,6 +887,33 @@ class RemoteTable(Table):
def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
The expression is resolved server-side from each column's stored
binding; columns bound to the same struct-returning function
refresh together. Returns the refresh job id. Server-backed
feature (LanceDB Enterprise / Cloud).
"""
if isinstance(columns, str):
columns = [columns]
return LOOP.run(
self._table.refresh_column(
list(columns),
where=where,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_columns(
self, *alterations: Iterable[Dict[str, str]]
) -> AlterColumnsResult:

View File

@@ -3714,6 +3714,33 @@ class LanceTable(Table):
) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
The expression is resolved server-side from each column's stored
binding; columns bound to the same struct-returning function
refresh together. Returns the refresh job id. Server-backed
feature (LanceDB Enterprise / Cloud).
"""
if isinstance(columns, str):
columns = [columns]
return LOOP.run(
self._table.refresh_column(
list(columns),
where=where,
num_workers=num_workers,
max_workers=max_workers,
)
)
def alter_columns(
self, *alterations: Iterable[Dict[str, str]]
) -> AlterColumnsResult:
@@ -5390,6 +5417,25 @@ class AsyncTable:
return await self._inner.update(updates_sql, where)
async def refresh_column(
self,
columns,
*,
where: Optional[str] = None,
num_workers: Optional[int] = None,
max_workers: Optional[int] = None,
) -> str:
"""Trigger recompute of computed columns (REFRESH COLUMN).
Returns the refresh job id. Server-backed feature."""
if isinstance(columns, str):
columns = [columns]
return await self._inner.refresh_column(
list(columns),
where_clause=where,
num_workers=num_workers,
max_workers=max_workers,
)
async def add_columns(
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
) -> AddColumnsResult: