feat: allow fast_search on python remote table (#1747)

Add `fast_search` parameter to query builder and remote table to support
skipping flat search in remote search
This commit is contained in:
Ryan Green
2024-10-14 14:39:54 -06:00
committed by GitHub
parent e7b56b7b2a
commit 679a70231e
3 changed files with 26 additions and 1 deletions

View File

@@ -88,6 +88,11 @@ class Query(pydantic.BaseModel):
tuning advice.
offset: int
The offset to start fetching results from
fast_search: bool
Skip a flat search of unindexed data. This will improve
search performance but search results will not include unindexed data.
- *default False*.
"""
vector_column: Optional[str] = None
@@ -124,6 +129,8 @@ class Query(pydantic.BaseModel):
offset: int = 0
fast_search: bool = False
class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search,
@@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC):
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
@@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC):
If "auto", the query type is inferred based on the query.
vector_column_name: str
The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
"""
# Check hybrid search first as it supports empty query pattern
if query_type == "hybrid":
@@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC):
else:
raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query)
return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
)
@classmethod
def _resolve_query(cls, table, query, query_type, vector_column_name):
@@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
):
super().__init__(table)
self._query = query
@@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._prefilter = False
self._reranker = None
self._str_query = str_query
self._fast_search = fast_search
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
@@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector_column=self._vector_column,
with_row_id=self._with_row_id,
offset=self._offset,
fast_search=self._fast_search,
)
result_set = self._table._execute_query(query, batch_size)
if self._reranker is not None:

View File

@@ -50,6 +50,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
fast_search: bool = False
@attrs.define
class VectorQueryResult:

View File

@@ -270,6 +270,7 @@ class RemoteTable(Table):
vector_column_name: Optional[str] = None,
query_type="auto",
fts_columns: Optional[Union[str, List[str]]] = None,
fast_search: bool = False,
) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search]
@@ -314,6 +315,12 @@ class RemoteTable(Table):
- If the table has multiple vector columns then the *vector_column_name*
needs to be specified. Otherwise, an error is raised.
fast_search: bool, optional
Skip a flat search of unindexed data. This may improve
search performance but search results will not include unindexed data.
- *default False*.
Returns
-------
LanceQueryBuilder
@@ -343,6 +350,7 @@ class RemoteTable(Table):
query_type,
vector_column_name=vector_column_name,
fts_columns=fts_columns,
fast_search=fast_search,
)
def _execute_query(