mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-03 20:30:42 +00:00
fix: robust handling of empty result when reranking (#2313)
I found some edge cases while running experiments that - depending on the base reranking libraries, some of them don't handle empty lists well. This PR manually checks if the result set to be reranked is empty <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Bug Fixes** - Enhanced search result processing by ensuring that reordering only occurs when valid, non-empty results are available, thereby preventing unnecessary operations and potential errors. - **Tests** - Added automated tests to verify that empty search result sets are handled correctly, ensuring consistent behavior across various rerankers. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -457,3 +457,45 @@ def test_voyageai_reranker(tmp_path, use_tantivy):
|
||||
reranker = VoyageAIReranker(model_name="rerank-2")
|
||||
table, schema = get_test_table(tmp_path, use_tantivy)
|
||||
_run_test_reranker(reranker, table, "single player experience", None, schema)
|
||||
|
||||
|
||||
def test_empty_result_reranker():
|
||||
pytest.importorskip("sentence_transformers")
|
||||
db = lancedb.connect("memory://")
|
||||
|
||||
# Define schema
|
||||
schema = pa.schema(
|
||||
[
|
||||
("id", pa.int64()),
|
||||
("text", pa.string()),
|
||||
("vector", pa.list_(pa.float32(), 128)), # 128-dimensional vector
|
||||
]
|
||||
)
|
||||
|
||||
# Create empty table with schema
|
||||
empty_table = db.create_table("empty_table", schema=schema, mode="overwrite")
|
||||
empty_table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||
for reranker in [
|
||||
CrossEncoderReranker(),
|
||||
# ColbertReranker(),
|
||||
# AnswerdotaiRerankers(),
|
||||
# OpenaiReranker(),
|
||||
# JinaReranker(),
|
||||
# VoyageAIReranker(model_name="rerank-2"),
|
||||
]:
|
||||
results = (
|
||||
empty_table.search(list(range(128)))
|
||||
.limit(3)
|
||||
.rerank(reranker, "query")
|
||||
.to_arrow()
|
||||
)
|
||||
# check if empty set contains _relevance_score column
|
||||
assert "_relevance_score" in results.column_names
|
||||
assert len(results) == 0
|
||||
|
||||
results = (
|
||||
empty_table.search("query", query_type="fts")
|
||||
.limit(3)
|
||||
.rerank(reranker)
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user