From fd0a3b97d0d43650a3083accdabe56bdf6f941ca Mon Sep 17 00:00:00 2001 From: Wyatt Alt Date: Sun, 14 Jun 2026 11:04:35 -0700 Subject: [PATCH] feat(view): materialized views are first-class indexable + searchable Add View.create_index / create_scalar_index / create_fts_index / search as pass-throughs to open_table(name). A materialized view is a real Lance dataset; these let it be indexed and searched like any other table, closing the parity gap with Geneva (whose create_materialized_view returns a first-class Table). The server-side create_index handler records indexes declared on a view so they survive a full refresh (which overwrites the dataset, dropping its indices); that re-apply is wired in the sophon engine. Co-Authored-By: Claude Opus 4.8 (1M context) --- python/python/lancedb/udf.py | 49 ++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/python/python/lancedb/udf.py b/python/python/lancedb/udf.py index ae38aa069..44417fc57 100644 --- a/python/python/lancedb/udf.py +++ b/python/python/lancedb/udf.py @@ -87,7 +87,11 @@ def _struct_fields(hint): if dataclasses.is_dataclass(hint): return [(f.name, f.type) for f in dataclasses.fields(hint)] # TypedDict detection: a dict subclass with __annotations__. - if isinstance(hint, type) and issubclass(hint, dict) and typing.get_type_hints(hint): + if ( + isinstance(hint, type) + and issubclass(hint, dict) + and typing.get_type_hints(hint) + ): return list(typing.get_type_hints(hint).items()) return None @@ -398,17 +402,22 @@ def _format_env(env: "dict[str, str] | list[str]") -> str: def _escape_body(body: str) -> str: # The server unescapes \n / \t in single-quoted bodies; encode real # newlines accordingly and escape quotes. - return body.replace("\\", "\\\\").replace("'", "''").replace("\n", "\\n").replace("\t", "\\t") + return ( + body.replace("\\", "\\\\") + .replace("'", "''") + .replace("\n", "\\n") + .replace("\t", "\\t") + ) def udf(fn=None, **kwargs): """Decorate a function as a scalar (or struct-returning) UDF. - @udf - def doubled(val: int) -> float: ... + @udf + def doubled(val: int) -> float: ... - @udf(pip=["torch>=2"], num_gpus=1) - def embed(body: str) -> list[float]: ... + @udf(pip=["torch>=2"], num_gpus=1) + def embed(body: str) -> list[float]: ... """ if fn is not None: return Udf(fn, **kwargs) @@ -509,6 +518,30 @@ class View: def drop(self) -> None: self.conn.drop_materialized_view(self.name) + # A materialized view is a first-class table: it can be indexed and + # searched like any other. These open the materialized dataset by name and + # delegate. Indexes declared this way are recorded against the view, so the + # engine re-applies them after a full refresh rebuilds the dataset (a full + # refresh overwrites the dataset, which would otherwise drop its indices). + def _table(self): + return self.conn.open_table(self.name) + + def create_index(self, *args, **kwargs): + """Build an index on the materialized view (see Table.create_index).""" + return self._table().create_index(*args, **kwargs) + + def create_scalar_index(self, *args, **kwargs): + """Build a scalar index on the materialized view.""" + return self._table().create_scalar_index(*args, **kwargs) + + def create_fts_index(self, *args, **kwargs): + """Build a full-text-search index on the materialized view.""" + return self._table().create_fts_index(*args, **kwargs) + + def search(self, *args, **kwargs): + """Search the materialized view (vector / FTS / hybrid).""" + return self._table().search(*args, **kwargs) + _PROGRESS = re.compile(r"(\d+)/(\d+)") @@ -645,7 +678,9 @@ class AsyncJobHandle: if job is not None and job.committed: return "finished" await asyncio.sleep(poll) - raise TimeoutError(f"job {self.id} still {await self.status()} after {timeout}s") + raise TimeoutError( + f"job {self.id} still {await self.status()} after {timeout}s" + ) async def cancel(self) -> None: job = await self._job()