From 679a70231e843b64cd788c5bb3455e5abef5b6f5 Mon Sep 17 00:00:00 2001 From: Ryan Green Date: Mon, 14 Oct 2024 14:39:54 -0600 Subject: [PATCH] feat: allow fast_search on python remote table (#1747) Add `fast_search` parameter to query builder and remote table to support skipping flat search in remote search --- python/python/lancedb/query.py | 17 ++++++++++++++++- python/python/lancedb/remote/__init__.py | 2 ++ python/python/lancedb/remote/table.py | 8 ++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 8ef97897..c79b8846 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -88,6 +88,11 @@ class Query(pydantic.BaseModel): tuning advice. offset: int The offset to start fetching results from + fast_search: bool + Skip a flat search of unindexed data. This will improve + search performance but search results will not include unindexed data. + + - *default False*. """ vector_column: Optional[str] = None @@ -124,6 +129,8 @@ class Query(pydantic.BaseModel): offset: int = 0 + fast_search: bool = False + class LanceQueryBuilder(ABC): """An abstract query builder. Subclasses are defined for vector search, @@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC): vector_column_name: str, ordering_field_name: Optional[str] = None, fts_columns: Union[str, List[str]] = [], + fast_search: bool = False, ) -> LanceQueryBuilder: """ Create a query builder based on the given query and query type. @@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC): If "auto", the query type is inferred based on the query. vector_column_name: str The name of the vector column to use for vector search. + fast_search: bool + Skip flat search of unindexed data. """ # Check hybrid search first as it supports empty query pattern if query_type == "hybrid": @@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC): else: raise TypeError(f"Unsupported query type: {type(query)}") - return LanceVectorQueryBuilder(table, query, vector_column_name, str_query) + return LanceVectorQueryBuilder( + table, query, vector_column_name, str_query, fast_search + ) @classmethod def _resolve_query(cls, table, query, query_type, vector_column_name): @@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): query: Union[np.ndarray, list, "PIL.Image.Image"], vector_column: str, str_query: Optional[str] = None, + fast_search: bool = False, ): super().__init__(table) self._query = query @@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): self._prefilter = False self._reranker = None self._str_query = str_query + self._fast_search = fast_search def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder: """Set the distance metric to use. @@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): vector_column=self._vector_column, with_row_id=self._with_row_id, offset=self._offset, + fast_search=self._fast_search, ) result_set = self._table._execute_query(query, batch_size) if self._reranker is not None: diff --git a/python/python/lancedb/remote/__init__.py b/python/python/lancedb/remote/__init__.py index fdd0cfae..98cbd2e5 100644 --- a/python/python/lancedb/remote/__init__.py +++ b/python/python/lancedb/remote/__init__.py @@ -50,6 +50,8 @@ class VectorQuery(BaseModel): vector_column: str = VECTOR_COLUMN_NAME + fast_search: bool = False + @attrs.define class VectorQueryResult: diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 34e2fe7c..986fbced 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -270,6 +270,7 @@ class RemoteTable(Table): vector_column_name: Optional[str] = None, query_type="auto", fts_columns: Optional[Union[str, List[str]]] = None, + fast_search: bool = False, ) -> LanceVectorQueryBuilder: """Create a search query to find the nearest neighbors of the given query vector. We currently support [vector search][search] @@ -314,6 +315,12 @@ class RemoteTable(Table): - If the table has multiple vector columns then the *vector_column_name* needs to be specified. Otherwise, an error is raised. + fast_search: bool, optional + Skip a flat search of unindexed data. This may improve + search performance but search results will not include unindexed data. + + - *default False*. + Returns ------- LanceQueryBuilder @@ -343,6 +350,7 @@ class RemoteTable(Table): query_type, vector_column_name=vector_column_name, fts_columns=fts_columns, + fast_search=fast_search, ) def _execute_query(