mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 11:00:40 +00:00
feat(view): materialized views are first-class indexable + searchable
Add View.create_index / create_scalar_index / create_fts_index / search as pass-throughs to open_table(name). A materialized view is a real Lance dataset; these let it be indexed and searched like any other table, closing the parity gap with Geneva (whose create_materialized_view returns a first-class Table). The server-side create_index handler records indexes declared on a view so they survive a full refresh (which overwrites the dataset, dropping its indices); that re-apply is wired in the sophon engine. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,7 +87,11 @@ def _struct_fields(hint):
|
||||
if dataclasses.is_dataclass(hint):
|
||||
return [(f.name, f.type) for f in dataclasses.fields(hint)]
|
||||
# TypedDict detection: a dict subclass with __annotations__.
|
||||
if isinstance(hint, type) and issubclass(hint, dict) and typing.get_type_hints(hint):
|
||||
if (
|
||||
isinstance(hint, type)
|
||||
and issubclass(hint, dict)
|
||||
and typing.get_type_hints(hint)
|
||||
):
|
||||
return list(typing.get_type_hints(hint).items())
|
||||
return None
|
||||
|
||||
@@ -398,17 +402,22 @@ def _format_env(env: "dict[str, str] | list[str]") -> str:
|
||||
def _escape_body(body: str) -> str:
|
||||
# The server unescapes \n / \t in single-quoted bodies; encode real
|
||||
# newlines accordingly and escape quotes.
|
||||
return body.replace("\\", "\\\\").replace("'", "''").replace("\n", "\\n").replace("\t", "\\t")
|
||||
return (
|
||||
body.replace("\\", "\\\\")
|
||||
.replace("'", "''")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\t", "\\t")
|
||||
)
|
||||
|
||||
|
||||
def udf(fn=None, **kwargs):
|
||||
"""Decorate a function as a scalar (or struct-returning) UDF.
|
||||
|
||||
@udf
|
||||
def doubled(val: int) -> float: ...
|
||||
@udf
|
||||
def doubled(val: int) -> float: ...
|
||||
|
||||
@udf(pip=["torch>=2"], num_gpus=1)
|
||||
def embed(body: str) -> list[float]: ...
|
||||
@udf(pip=["torch>=2"], num_gpus=1)
|
||||
def embed(body: str) -> list[float]: ...
|
||||
"""
|
||||
if fn is not None:
|
||||
return Udf(fn, **kwargs)
|
||||
@@ -509,6 +518,30 @@ class View:
|
||||
def drop(self) -> None:
|
||||
self.conn.drop_materialized_view(self.name)
|
||||
|
||||
# A materialized view is a first-class table: it can be indexed and
|
||||
# searched like any other. These open the materialized dataset by name and
|
||||
# delegate. Indexes declared this way are recorded against the view, so the
|
||||
# engine re-applies them after a full refresh rebuilds the dataset (a full
|
||||
# refresh overwrites the dataset, which would otherwise drop its indices).
|
||||
def _table(self):
|
||||
return self.conn.open_table(self.name)
|
||||
|
||||
def create_index(self, *args, **kwargs):
|
||||
"""Build an index on the materialized view (see Table.create_index)."""
|
||||
return self._table().create_index(*args, **kwargs)
|
||||
|
||||
def create_scalar_index(self, *args, **kwargs):
|
||||
"""Build a scalar index on the materialized view."""
|
||||
return self._table().create_scalar_index(*args, **kwargs)
|
||||
|
||||
def create_fts_index(self, *args, **kwargs):
|
||||
"""Build a full-text-search index on the materialized view."""
|
||||
return self._table().create_fts_index(*args, **kwargs)
|
||||
|
||||
def search(self, *args, **kwargs):
|
||||
"""Search the materialized view (vector / FTS / hybrid)."""
|
||||
return self._table().search(*args, **kwargs)
|
||||
|
||||
|
||||
_PROGRESS = re.compile(r"(\d+)/(\d+)")
|
||||
|
||||
@@ -645,7 +678,9 @@ class AsyncJobHandle:
|
||||
if job is not None and job.committed:
|
||||
return "finished"
|
||||
await asyncio.sleep(poll)
|
||||
raise TimeoutError(f"job {self.id} still {await self.status()} after {timeout}s")
|
||||
raise TimeoutError(
|
||||
f"job {self.id} still {await self.status()} after {timeout}s"
|
||||
)
|
||||
|
||||
async def cancel(self) -> None:
|
||||
job = await self._job()
|
||||
|
||||
Reference in New Issue
Block a user