mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-06 05:40:40 +00:00
fix: linear reranker applies wrong score to combine (#2035)
related to #2014 this fixes: - linear reranker may lost some results if the merging consumes all vector results earlier than fts results - linear reranker inverts the fts score but only vector distance can be inverted --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
@@ -3,6 +3,7 @@ import random
|
||||
|
||||
import lancedb
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from lancedb.conftest import MockTextEmbeddingFunction # noqa
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
@@ -281,6 +282,31 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
|
||||
@pytest.mark.parametrize("use_tantivy", [True, False])
|
||||
def test_linear_combination(tmp_path, use_tantivy):
|
||||
reranker = LinearCombinationReranker()
|
||||
|
||||
vector_results = pa.Table.from_pydict(
|
||||
{
|
||||
"_rowid": [0, 1, 2, 3, 4],
|
||||
"_distance": [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
"_text": ["a", "b", "c", "d", "e"],
|
||||
}
|
||||
)
|
||||
|
||||
fts_results = pa.Table.from_pydict(
|
||||
{
|
||||
"_rowid": [1, 2, 3, 4, 5],
|
||||
"_score": [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
"_text": ["b", "c", "d", "e", "f"],
|
||||
}
|
||||
)
|
||||
|
||||
combined_results = reranker.merge_results(vector_results, fts_results, 1.0)
|
||||
assert len(combined_results) == 6
|
||||
assert "_rowid" in combined_results.column_names
|
||||
assert "_text" in combined_results.column_names
|
||||
assert "_distance" not in combined_results.column_names
|
||||
assert "_score" not in combined_results.column_names
|
||||
assert "_relevance_score" in combined_results.column_names
|
||||
|
||||
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user