feat: add reciprocal rank fusion reranker (#1456)

Implements https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf

Refactors the hybrid search only rerrankers test to avoid repetition.
This commit is contained in:
Ayush Chaurasia
2024-07-23 21:37:17 +05:30
committed by GitHub
parent 4ee229490c
commit 0255221086
3 changed files with 75 additions and 1 deletions

View File

@@ -7,6 +7,8 @@ from lancedb.conftest import MockTextEmbeddingFunction # noqa
from lancedb.embeddings import EmbeddingFunctionRegistry
from lancedb.pydantic import LanceModel, Vector
from lancedb.rerankers import (
LinearCombinationReranker,
RRFReranker,
CohereReranker,
ColbertReranker,
CrossEncoderReranker,
@@ -140,7 +142,7 @@ def _run_test_reranker(reranker, table, query, query_vector, schema):
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), err
def test_linear_combination(tmp_path):
def _run_test_hybrid_reranker(reranker, tmp_path):
table, schema = get_test_table(tmp_path)
# The default reranker
result1 = (
@@ -177,6 +179,16 @@ def test_linear_combination(tmp_path):
)
def test_linear_combination(tmp_path):
reranker = LinearCombinationReranker()
_run_test_hybrid_reranker(reranker, tmp_path)
def test_rrf_reranker(tmp_path):
reranker = RRFReranker()
_run_test_hybrid_reranker(reranker, tmp_path)
@pytest.mark.skipif(
os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
)