mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 02:50:41 +00:00
feat: SDK surface for functions, materialized views, jobs, refresh_column
Adds the derived-compute interface to the SDK:
- Database trait: create/list/drop_function, create/refresh/alter/
drop/list_materialized_view, list_jobs -- default implementations
return Error::NotSupported (NotImplementedError in python), so
existing Database impls are unaffected; local single-node
implementations are planned. BaseTable gains refresh_column with
the same default.
- RemoteDatabase/RemoteTable implement them against the server REST
routes (/v1/function/*, /v1/materialized_view/*, /v1/job/list,
/v1/table/{id}/refresh_column), with mock-HTTP unit tests.
- Connection/Table public methods, pyo3 bindings (FunctionInfo,
MaterializedViewInfo, JobInfo pyclasses), and python wrappers:
sync on the DBConnection base (shared by local and remote
connections), async on AsyncConnection; refresh_column on
LanceTable, RemoteTable, and AsyncTable.
This commit is contained in:
@@ -563,6 +563,101 @@ class DBConnection(EnforceOverrides):
|
||||
raise NotImplementedError("serialize is not supported for this connection type")
|
||||
|
||||
|
||||
# -- Derived compute: functions, materialized views, jobs -------------
|
||||
# Server-backed features (LanceDB Enterprise / Cloud); local
|
||||
# connections raise NotImplementedError for now.
|
||||
|
||||
def create_function(
|
||||
self,
|
||||
name: str,
|
||||
language: str,
|
||||
return_type: str,
|
||||
body: str,
|
||||
options: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""Register a UDF (CREATE FUNCTION).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
Function name.
|
||||
language: str
|
||||
Implementation language (currently "python").
|
||||
return_type: str
|
||||
SQL return type, e.g. "FLOAT", "FLOAT[1536]",
|
||||
"STRUCT(a FLOAT, b VARCHAR)", "TABLE(chunk VARCHAR, idx INT)".
|
||||
body: str
|
||||
Function body: source text, or base64 cloudpickle bytes when
|
||||
options["body_format"] == "cloudpickle".
|
||||
options: dict, optional
|
||||
input_columns, pip, num_gpus, batch_size, timeout,
|
||||
error_policy, docker_image, body_format, ...
|
||||
"""
|
||||
LOOP.run(self._conn.create_function(name, language, return_type, body, options))
|
||||
|
||||
def list_functions(self):
|
||||
"""List registered functions (SHOW FUNCTIONS)."""
|
||||
return LOOP.run(self._conn.list_functions())
|
||||
|
||||
def drop_function(self, name: str):
|
||||
"""Drop a registered function (DROP FUNCTION)."""
|
||||
LOOP.run(self._conn.drop_function(name))
|
||||
|
||||
def create_materialized_view(
|
||||
self,
|
||||
name: str,
|
||||
query: str,
|
||||
*,
|
||||
auto_refresh: bool = False,
|
||||
with_no_data: bool = False,
|
||||
) -> Optional[str]:
|
||||
"""Create a materialized view (CREATE MATERIALIZED VIEW).
|
||||
|
||||
`query` is the view's SELECT statement, e.g.
|
||||
"SELECT id, embed(body) AS vec FROM articles WHERE id > 1".
|
||||
Returns the initial-population job id, or None when
|
||||
with_no_data=True.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_materialized_view(
|
||||
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
|
||||
)
|
||||
)
|
||||
|
||||
def refresh_materialized_view(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
src_version: Optional[int] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Refresh a materialized view; returns the refresh job id."""
|
||||
return LOOP.run(
|
||||
self._conn.refresh_materialized_view(
|
||||
name,
|
||||
src_version=src_version,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
)
|
||||
|
||||
def alter_materialized_view(self, name: str, *, auto_refresh: bool):
|
||||
"""Update a materialized view's options (ALTER MATERIALIZED VIEW)."""
|
||||
LOOP.run(self._conn.alter_materialized_view(name, auto_refresh=auto_refresh))
|
||||
|
||||
def drop_materialized_view(self, name: str):
|
||||
"""Drop a materialized view definition (DROP MATERIALIZED VIEW)."""
|
||||
LOOP.run(self._conn.drop_materialized_view(name))
|
||||
|
||||
def list_materialized_views(self):
|
||||
"""List registered materialized view definitions."""
|
||||
return LOOP.run(self._conn.list_materialized_views())
|
||||
|
||||
def list_jobs(self):
|
||||
"""List inflight server-side jobs across the database's tables."""
|
||||
return LOOP.run(self._conn.list_jobs())
|
||||
|
||||
class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
A connection to a LanceDB database.
|
||||
@@ -1787,6 +1882,75 @@ class AsyncConnection(object):
|
||||
)
|
||||
return AsyncTable(table)
|
||||
|
||||
# -- Derived compute: functions, materialized views, jobs -------------
|
||||
# Server-backed features (LanceDB Enterprise / Cloud); local
|
||||
# connections raise NotImplementedError for now.
|
||||
|
||||
async def create_function(
|
||||
self,
|
||||
name: str,
|
||||
language: str,
|
||||
return_type: str,
|
||||
body: str,
|
||||
options: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""Register a UDF (CREATE FUNCTION)."""
|
||||
await self._inner.create_function(name, language, return_type, body, options)
|
||||
|
||||
async def list_functions(self):
|
||||
"""List registered functions (SHOW FUNCTIONS)."""
|
||||
return await self._inner.list_functions()
|
||||
|
||||
async def drop_function(self, name: str):
|
||||
"""Drop a registered function (DROP FUNCTION)."""
|
||||
await self._inner.drop_function(name)
|
||||
|
||||
async def create_materialized_view(
|
||||
self,
|
||||
name: str,
|
||||
query: str,
|
||||
*,
|
||||
auto_refresh: bool = False,
|
||||
with_no_data: bool = False,
|
||||
) -> Optional[str]:
|
||||
"""Create a materialized view; returns the initial-population
|
||||
job id, or None when with_no_data=True."""
|
||||
return await self._inner.create_materialized_view(
|
||||
name, query, auto_refresh=auto_refresh, with_no_data=with_no_data
|
||||
)
|
||||
|
||||
async def refresh_materialized_view(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
src_version: Optional[int] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Refresh a materialized view; returns the refresh job id."""
|
||||
return await self._inner.refresh_materialized_view(
|
||||
name,
|
||||
src_version=src_version,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
async def alter_materialized_view(self, name: str, *, auto_refresh: bool):
|
||||
"""Update a materialized view's options."""
|
||||
await self._inner.alter_materialized_view(name, auto_refresh)
|
||||
|
||||
async def drop_materialized_view(self, name: str):
|
||||
"""Drop a materialized view definition."""
|
||||
await self._inner.drop_materialized_view(name)
|
||||
|
||||
async def list_materialized_views(self):
|
||||
"""List registered materialized view definitions."""
|
||||
return await self._inner.list_materialized_views()
|
||||
|
||||
async def list_jobs(self):
|
||||
"""List inflight server-side jobs across the database's tables."""
|
||||
return await self._inner.list_jobs()
|
||||
|
||||
async def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
|
||||
@@ -887,6 +887,33 @@ class RemoteTable(Table):
|
||||
def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
|
||||
return LOOP.run(self._table.add_columns(transforms))
|
||||
|
||||
def refresh_column(
|
||||
self,
|
||||
columns,
|
||||
*,
|
||||
where: Optional[str] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Trigger recompute of computed columns (REFRESH COLUMN).
|
||||
|
||||
The expression is resolved server-side from each column's stored
|
||||
binding; columns bound to the same struct-returning function
|
||||
refresh together. Returns the refresh job id. Server-backed
|
||||
feature (LanceDB Enterprise / Cloud).
|
||||
"""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
return LOOP.run(
|
||||
self._table.refresh_column(
|
||||
list(columns),
|
||||
where=where,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def alter_columns(
|
||||
self, *alterations: Iterable[Dict[str, str]]
|
||||
) -> AlterColumnsResult:
|
||||
|
||||
@@ -3714,6 +3714,33 @@ class LanceTable(Table):
|
||||
) -> AddColumnsResult:
|
||||
return LOOP.run(self._table.add_columns(transforms))
|
||||
|
||||
def refresh_column(
|
||||
self,
|
||||
columns,
|
||||
*,
|
||||
where: Optional[str] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Trigger recompute of computed columns (REFRESH COLUMN).
|
||||
|
||||
The expression is resolved server-side from each column's stored
|
||||
binding; columns bound to the same struct-returning function
|
||||
refresh together. Returns the refresh job id. Server-backed
|
||||
feature (LanceDB Enterprise / Cloud).
|
||||
"""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
return LOOP.run(
|
||||
self._table.refresh_column(
|
||||
list(columns),
|
||||
where=where,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def alter_columns(
|
||||
self, *alterations: Iterable[Dict[str, str]]
|
||||
) -> AlterColumnsResult:
|
||||
@@ -5390,6 +5417,25 @@ class AsyncTable:
|
||||
|
||||
return await self._inner.update(updates_sql, where)
|
||||
|
||||
async def refresh_column(
|
||||
self,
|
||||
columns,
|
||||
*,
|
||||
where: Optional[str] = None,
|
||||
num_workers: Optional[int] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Trigger recompute of computed columns (REFRESH COLUMN).
|
||||
Returns the refresh job id. Server-backed feature."""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
return await self._inner.refresh_column(
|
||||
list(columns),
|
||||
where_clause=where,
|
||||
num_workers=num_workers,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
async def add_columns(
|
||||
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
|
||||
) -> AddColumnsResult:
|
||||
|
||||
Reference in New Issue
Block a user