feat(view): materialized views are first-class indexable + searchable

Add View.create_index / create_scalar_index / create_fts_index / search
as pass-throughs to open_table(name). A materialized view is a real Lance
dataset; these let it be indexed and searched like any other table,
closing the parity gap with Geneva (whose create_materialized_view returns
a first-class Table).

The server-side create_index handler records indexes declared on a view so
they survive a full refresh (which overwrites the dataset, dropping its
indices); that re-apply is wired in the sophon engine.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Wyatt Alt
2026-06-14 11:04:35 -07:00
committed by Jack Ye
parent b9f33ba1c9
commit fd0a3b97d0

View File

@@ -87,7 +87,11 @@ def _struct_fields(hint):
if dataclasses.is_dataclass(hint):
return [(f.name, f.type) for f in dataclasses.fields(hint)]
# TypedDict detection: a dict subclass with __annotations__.
if isinstance(hint, type) and issubclass(hint, dict) and typing.get_type_hints(hint):
if (
isinstance(hint, type)
and issubclass(hint, dict)
and typing.get_type_hints(hint)
):
return list(typing.get_type_hints(hint).items())
return None
@@ -398,17 +402,22 @@ def _format_env(env: "dict[str, str] | list[str]") -> str:
def _escape_body(body: str) -> str:
# The server unescapes \n / \t in single-quoted bodies; encode real
# newlines accordingly and escape quotes.
return body.replace("\\", "\\\\").replace("'", "''").replace("\n", "\\n").replace("\t", "\\t")
return (
body.replace("\\", "\\\\")
.replace("'", "''")
.replace("\n", "\\n")
.replace("\t", "\\t")
)
def udf(fn=None, **kwargs):
"""Decorate a function as a scalar (or struct-returning) UDF.
@udf
def doubled(val: int) -> float: ...
@udf
def doubled(val: int) -> float: ...
@udf(pip=["torch>=2"], num_gpus=1)
def embed(body: str) -> list[float]: ...
@udf(pip=["torch>=2"], num_gpus=1)
def embed(body: str) -> list[float]: ...
"""
if fn is not None:
return Udf(fn, **kwargs)
@@ -509,6 +518,30 @@ class View:
def drop(self) -> None:
self.conn.drop_materialized_view(self.name)
# A materialized view is a first-class table: it can be indexed and
# searched like any other. These open the materialized dataset by name and
# delegate. Indexes declared this way are recorded against the view, so the
# engine re-applies them after a full refresh rebuilds the dataset (a full
# refresh overwrites the dataset, which would otherwise drop its indices).
def _table(self):
return self.conn.open_table(self.name)
def create_index(self, *args, **kwargs):
"""Build an index on the materialized view (see Table.create_index)."""
return self._table().create_index(*args, **kwargs)
def create_scalar_index(self, *args, **kwargs):
"""Build a scalar index on the materialized view."""
return self._table().create_scalar_index(*args, **kwargs)
def create_fts_index(self, *args, **kwargs):
"""Build a full-text-search index on the materialized view."""
return self._table().create_fts_index(*args, **kwargs)
def search(self, *args, **kwargs):
"""Search the materialized view (vector / FTS / hybrid)."""
return self._table().search(*args, **kwargs)
_PROGRESS = re.compile(r"(\d+)/(\d+)")
@@ -645,7 +678,9 @@ class AsyncJobHandle:
if job is not None and job.committed:
return "finished"
await asyncio.sleep(poll)
raise TimeoutError(f"job {self.id} still {await self.status()} after {timeout}s")
raise TimeoutError(
f"job {self.id} still {await self.status()} after {timeout}s"
)
async def cancel(self) -> None:
job = await self._job()