mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-09 21:32:58 +00:00
feat: allow fast_search on python remote table (#1747)
Add `fast_search` parameter to query builder and remote table to support skipping flat search in remote search
This commit is contained in:
@@ -88,6 +88,11 @@ class Query(pydantic.BaseModel):
|
||||
tuning advice.
|
||||
offset: int
|
||||
The offset to start fetching results from
|
||||
fast_search: bool
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
"""
|
||||
|
||||
vector_column: Optional[str] = None
|
||||
@@ -124,6 +129,8 @@ class Query(pydantic.BaseModel):
|
||||
|
||||
offset: int = 0
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
|
||||
class LanceQueryBuilder(ABC):
|
||||
"""An abstract query builder. Subclasses are defined for vector search,
|
||||
@@ -139,6 +146,7 @@ class LanceQueryBuilder(ABC):
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
) -> LanceQueryBuilder:
|
||||
"""
|
||||
Create a query builder based on the given query and query type.
|
||||
@@ -155,6 +163,8 @@ class LanceQueryBuilder(ABC):
|
||||
If "auto", the query type is inferred based on the query.
|
||||
vector_column_name: str
|
||||
The name of the vector column to use for vector search.
|
||||
fast_search: bool
|
||||
Skip flat search of unindexed data.
|
||||
"""
|
||||
# Check hybrid search first as it supports empty query pattern
|
||||
if query_type == "hybrid":
|
||||
@@ -196,7 +206,9 @@ class LanceQueryBuilder(ABC):
|
||||
else:
|
||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||
|
||||
return LanceVectorQueryBuilder(table, query, vector_column_name, str_query)
|
||||
return LanceVectorQueryBuilder(
|
||||
table, query, vector_column_name, str_query, fast_search
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _resolve_query(cls, table, query, query_type, vector_column_name):
|
||||
@@ -565,6 +577,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
@@ -575,6 +588,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._prefilter = False
|
||||
self._reranker = None
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
@@ -675,6 +689,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
vector_column=self._vector_column,
|
||||
with_row_id=self._with_row_id,
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
)
|
||||
result_set = self._table._execute_query(query, batch_size)
|
||||
if self._reranker is not None:
|
||||
|
||||
@@ -50,6 +50,8 @@ class VectorQuery(BaseModel):
|
||||
|
||||
vector_column: str = VECTOR_COLUMN_NAME
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
|
||||
@attrs.define
|
||||
class VectorQueryResult:
|
||||
|
||||
@@ -270,6 +270,7 @@ class RemoteTable(Table):
|
||||
vector_column_name: Optional[str] = None,
|
||||
query_type="auto",
|
||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||
fast_search: bool = False,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector. We currently support [vector search][search]
|
||||
@@ -314,6 +315,12 @@ class RemoteTable(Table):
|
||||
- If the table has multiple vector columns then the *vector_column_name*
|
||||
needs to be specified. Otherwise, an error is raised.
|
||||
|
||||
fast_search: bool, optional
|
||||
Skip a flat search of unindexed data. This may improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
@@ -343,6 +350,7 @@ class RemoteTable(Table):
|
||||
query_type,
|
||||
vector_column_name=vector_column_name,
|
||||
fts_columns=fts_columns,
|
||||
fast_search=fast_search,
|
||||
)
|
||||
|
||||
def _execute_query(
|
||||
|
||||
Reference in New Issue
Block a user