mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 07:09:57 +00:00
doc(python): document the method in fts (#982)
Co-authored-by: prrao87 <prrao87@gmail.com> Co-authored-by: Prashanth Rao <35005448+prrao87@users.noreply.github.com>
This commit is contained in:
@@ -106,8 +106,8 @@ class Query(pydantic.BaseModel):
|
||||
|
||||
|
||||
class LanceQueryBuilder(ABC):
|
||||
"""Build LanceDB query based on specific query type:
|
||||
vector or full text search.
|
||||
"""An abstract query builder. Subclasses are defined for vector search,
|
||||
full text search, hybrid, and plain SQL filtering.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@@ -118,6 +118,22 @@ class LanceQueryBuilder(ABC):
|
||||
query_type: str,
|
||||
vector_column_name: str,
|
||||
) -> LanceQueryBuilder:
|
||||
"""
|
||||
Create a query builder based on the given query and query type.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table: Table
|
||||
The table to query.
|
||||
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]]
|
||||
The query to use. If None, an empty query builder is returned
|
||||
which performs simple SQL filtering.
|
||||
query_type: str
|
||||
The type of query to perform. One of "vector", "fts", "hybrid", or "auto".
|
||||
If "auto", the query type is inferred based on the query.
|
||||
vector_column_name: str
|
||||
The name of the vector column to use for vector search.
|
||||
"""
|
||||
if query is None:
|
||||
return LanceEmptyQueryBuilder(table)
|
||||
|
||||
@@ -636,6 +652,16 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
|
||||
class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
"""
|
||||
A query builder that performs hybrid vector and full text search.
|
||||
Results are combined and reranked based on the specified reranker.
|
||||
By default, the results are reranked using the LinearCombinationReranker.
|
||||
|
||||
To make the vector and fts results comparable, the scores are normalized.
|
||||
Instead of normalizing scores, the `normalize` parameter can be set to "rank"
|
||||
in the `rerank` method to convert the scores to ranks and then normalize them.
|
||||
"""
|
||||
|
||||
def __init__(self, table: "Table", query: str, vector_column: str):
|
||||
super().__init__(table)
|
||||
self._validate_fts_index()
|
||||
|
||||
@@ -177,10 +177,18 @@ def test_syntax(table):
|
||||
table.create_fts_index("text")
|
||||
with pytest.raises(ValueError, match="Syntax Error"):
|
||||
table.search("they could have been dogs OR cats").limit(10).to_list()
|
||||
|
||||
# these should work
|
||||
|
||||
# terms queries
|
||||
table.search('"they could have been dogs" OR cats').limit(10).to_list()
|
||||
table.search("(they AND could) OR (have AND been AND dogs) OR cats").limit(
|
||||
10
|
||||
).to_list()
|
||||
|
||||
# phrase queries
|
||||
table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
|
||||
# this should work
|
||||
table.search('"they could have been dogs OR cats"').limit(10).to_list()
|
||||
# this should work too
|
||||
table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
|
||||
10
|
||||
).to_list()
|
||||
|
||||
Reference in New Issue
Block a user