feat: support mean reciprocal rank reranker (#2671)

The basic idea of MRR is this -
https://www.evidentlyai.com/ranking-metrics/mean-reciprocal-rank-mrr
I've implemented a weighted version for allowing user to set weightage
between vector and fts.

The gist is something like this 

### Scenario A: Document at rank 1 in one set, absent from another

```
# Assuming equal weights: weight_vector = 0.5, weight_fts = 0.5
vector_rr = 1.0  # rank 1 → 1/1 = 1.0
fts_rr = 0.0     # absent → 0.0

weighted_mrr = 0.5 × 1.0 + 0.5 × 0.0 = 0.5
```
### Scenario B: Document at rank 1 in one set, rank 2 in another
```
# Same weights: weight_vector = 0.5, weight_fts = 0.5
vector_rr = 1.0  # rank 1 → 1/1 = 1.0
fts_rr = 0.5     # rank 2 → 1/2 = 0.5

weighted_mrr = 0.5 × 1.0 + 0.5 × 0.5 = 0.5 + 0.25 = 0.75
```

And so with `return_score="all"` the result looks something like this
(this is from the reranker tests).
Because this is a weighted rank based reranker, some results might have
the same score
```
                                                 text                                             vector     _distance      _rowid     _score  _relevance_score
0                                    I am your father  [-0.010703234, 0.069315575, 0.030076642, 0.002...  8.149148e-13  8589934598  10.978719          1.000000
1                          the ground beneath my feet  [-0.09500901, 0.00092102867, 0.0755851, 0.0372...  1.376896e+00  8589934604        NaN          0.250000
2                I find your lack of faith disturbing  [0.07525753, -0.0100010475, 0.09990541, 0.0209...           NaN  8589934595   3.483394          0.250000
3                               but I don't wanna die  [0.033476487, -0.011235877, -0.057625435, -0.0...  1.538222e+00  8589934610   1.130355          0.238095
4   if you strike me down I shall become more powe...  [0.00432201, 0.030120496, 5.3317923e-05, 0.033...  1.381086e+00  8589934594   0.715157          0.216667
5           I see a salty message written in the eves  [-0.04213107, 0.0016004723, 0.061052393, -0.02...  1.638301e+00  8589934603   1.043785          0.133333
6                              but his son was mortal  [0.012462767, 0.049041674, -0.057339743, -0.04...  1.421566e+00  8589934620        NaN          0.125000
7                   I've got a bad feeling about this  [-0.06973199, -0.029960092, 0.02641632, -0.031...           NaN  8589934596   1.043785          0.125000
8    now that's a name I haven't heard in a long time  [-0.014374257, -0.013588792, -0.07487557, 0.03...  1.597573e+00  8589934593   0.848772          0.118056
9                                        he was a god  [-0.0258895, 0.11925236, -0.029397793, 0.05888...  1.423147e+00  8589934618        NaN          0.100000
10                 I wish they would make another one  [-0.14737535, -0.015304729, 0.04318139, -0.061...           NaN  8589934622   1.043785          0.100000
11                                   Kratos had a son  [-0.057455737, 0.13734367, -0.03537109, -0.000...  1.488075e+00  8589934617        NaN          0.083333
12                       I don't wanna live like this  [-0.0028891307, 0.015214227, 0.025183653, 0.08...           NaN  8589934609   1.043785          0.071429
13             I see a mansard roof through the trees  [0.052383978, 0.087759204, 0.014739997, 0.0239...           NaN  8589934602   1.043785          0.062500
14                          great kid don't get cocky  [-0.047043696, 0.054648954, -0.008509666, -0.0...  1.618125e+00  8589934592        NaN          0.055556
```
This commit is contained in:
Ayush Chaurasia
2025-09-23 18:25:18 +05:30
committed by GitHub
parent 05a4ea646a
commit e921c90c1b
3 changed files with 202 additions and 1 deletions

View File

@@ -22,6 +22,7 @@ from lancedb.rerankers import (
JinaReranker,
AnswerdotaiRerankers,
VoyageAIReranker,
MRRReranker,
)
from lancedb.table import LanceTable
@@ -46,6 +47,7 @@ def get_test_table(tmp_path, use_tantivy):
db,
"my_table",
schema=MyTable,
mode="overwrite",
)
# Need to test with a bunch of phrases to make sure sorting is consistent
@@ -96,7 +98,7 @@ def get_test_table(tmp_path, use_tantivy):
)
# Create a fts index
table.create_fts_index("text", use_tantivy=use_tantivy)
table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)
return table, MyTable
@@ -320,6 +322,34 @@ def test_rrf_reranker(tmp_path, use_tantivy):
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_mrr_reranker(tmp_path, use_tantivy):
reranker = MRRReranker()
_run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
# Test multi-vector part
table, schema = get_test_table(tmp_path, use_tantivy)
query = "single player experience"
rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
rs2 = (
table.search(query, vector_column_name="meta_vector")
.limit(10)
.with_row_id(True)
)
result = reranker.rerank_multivector([rs1, rs2])
assert "_relevance_score" in result.column_names
assert len(result) <= 20
if len(result) > 1:
assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
"The _relevance_score should be descending."
)
# Test with duplicate results
result_deduped = reranker.rerank_multivector([rs1, rs2, rs1])
assert len(result_deduped) == len(result)
def test_rrf_reranker_distance():
data = pa.table(
{