mirror of
https://github.com/lancedb/lancedb.git
synced 2026-04-02 14:00:41 +00:00
feat!: better api for manual hybrid queries (#1575)
Currently, the only documented way of performing hybrid search is by using embedding API and passing string queries that get automatically embedded. There are use cases where users might like to pass vectors and text manually instead. This ticket contains more information and historical context - https://github.com/lancedb/lancedb/issues/937 This breaks a undocumented pathway that allowed passing (vector, text) tuple queries which was intended to be temporary, so this is marked as a breaking change. For all practical purposes, this should not really impact most users ### usage ``` results = table.search(query_type="hybrid") .vector(vector_query) .text(text_query) .limit(5) .to_pandas() ```
This commit is contained in:
@@ -111,7 +111,9 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
|
||||
|
||||
query_vector = table.to_pandas()["vector"][0]
|
||||
result = (
|
||||
table.search((query_vector, query), vector_column_name="vector")
|
||||
table.search(query_type="hybrid", vector_column_name="vector")
|
||||
.vector(query_vector)
|
||||
.text(query)
|
||||
.limit(30)
|
||||
.rerank(reranker=reranker)
|
||||
.to_arrow()
|
||||
@@ -207,14 +209,26 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
|
||||
query = "Our father who art in heaven"
|
||||
query_vector = table.to_pandas()["vector"][0]
|
||||
result = (
|
||||
table.search((query_vector, query), vector_column_name="vector")
|
||||
table.search(query_type="hybrid", vector_column_name="vector")
|
||||
.vector(query_vector)
|
||||
.text(query)
|
||||
.limit(30)
|
||||
.rerank(normalize="score")
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
assert len(result) == 30
|
||||
|
||||
# Fail if both query and (vector or text) are provided
|
||||
with pytest.raises(ValueError):
|
||||
table.search(query, query_type="hybrid", vector_column_name="vector").vector(
|
||||
query_vector
|
||||
).to_arrow()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
table.search(query, query_type="hybrid", vector_column_name="vector").text(
|
||||
query
|
||||
).to_arrow()
|
||||
|
||||
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
|
||||
"The _relevance_score column of the results returned by the reranker "
|
||||
"represents the relevance of the result to the query & should "
|
||||
|
||||
Reference in New Issue
Block a user