doc(python): document the method in fts (#982)

Co-authored-by: prrao87 <prrao87@gmail.com>
Co-authored-by: Prashanth Rao <35005448+prrao87@users.noreply.github.com>
This commit is contained in:
Chang She
2024-03-04 16:42:24 -08:00
committed by Weston Pace
parent b5326d31e9
commit 10089481c0
4 changed files with 87 additions and 16 deletions

View File

@@ -106,8 +106,8 @@ class Query(pydantic.BaseModel):
class LanceQueryBuilder(ABC):
"""Build LanceDB query based on specific query type:
vector or full text search.
"""An abstract query builder. Subclasses are defined for vector search,
full text search, hybrid, and plain SQL filtering.
"""
@classmethod
@@ -118,6 +118,22 @@ class LanceQueryBuilder(ABC):
query_type: str,
vector_column_name: str,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
Parameters
----------
table: Table
The table to query.
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]]
The query to use. If None, an empty query builder is returned
which performs simple SQL filtering.
query_type: str
The type of query to perform. One of "vector", "fts", "hybrid", or "auto".
If "auto", the query type is inferred based on the query.
vector_column_name: str
The name of the vector column to use for vector search.
"""
if query is None:
return LanceEmptyQueryBuilder(table)
@@ -636,6 +652,16 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
class LanceHybridQueryBuilder(LanceQueryBuilder):
"""
A query builder that performs hybrid vector and full text search.
Results are combined and reranked based on the specified reranker.
By default, the results are reranked using the LinearCombinationReranker.
To make the vector and fts results comparable, the scores are normalized.
Instead of normalizing scores, the `normalize` parameter can be set to "rank"
in the `rerank` method to convert the scores to ranks and then normalize them.
"""
def __init__(self, table: "Table", query: str, vector_column: str):
super().__init__(table)
self._validate_fts_index()

View File

@@ -177,10 +177,18 @@ def test_syntax(table):
table.create_fts_index("text")
with pytest.raises(ValueError, match="Syntax Error"):
table.search("they could have been dogs OR cats").limit(10).to_list()
# these should work
# terms queries
table.search('"they could have been dogs" OR cats').limit(10).to_list()
table.search("(they AND could) OR (have AND been AND dogs) OR cats").limit(
10
).to_list()
# phrase queries
table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list()
# this should work
table.search('"they could have been dogs OR cats"').limit(10).to_list()
# this should work too
table.search('''"the cats OR dogs were not really 'pets' at all"''').limit(
10
).to_list()