From 7dba793629cb56b939c0c4183a7e52718edd25da Mon Sep 17 00:00:00 2001
From: devteamaegis <devteam.aegis@gmail.com>
Date: Tue, 26 May 2026 18:26:34 -0400
Subject: [PATCH] fix(rerankers): inverted scores and incorrect missing-FTS
 penalty in LinearCombinationReranker (#3437)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

`LinearCombinationReranker.merge_results` has two related bugs that make
it return **inverted relevance rankings** — the least relevant document
ranks first (closes #3154).

### Bug 1 — `_combine_score` subtracts from 1, inverting the final
ranking

```python
def _combine_score(self, vector_score, fts_score):
    return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score)
```

Both `vector_score` (already converted via `_invert_score`) and
`fts_score` (BM25 relevance) are in **higher-is-better** space. Wrapping
the weighted average in `1 - (...)` flips the direction: a perfectly
matching document (`vector_score=1, fts_score=1`) gets `_relevance_score
= 0.0`, while a non-matching document gets a high score.

### Bug 2 — Documents missing an FTS score are rewarded, not penalised

```python
fts_score = result.get("_score", fill)  # fill=1.0 by default
```

When a document has no FTS match, `fts_score = fill = 1.0`. In
`_combine_score` (with the bug-1 formula), this large value becomes a
**negative penalty** via `1 - (... + 0.3 * 1.0)`, counterintuitively
*boosting* the document's score. By contrast, missing vector results
correctly receive `_invert_score(fill) = 0.0` (penalised).

## Fix

**Bug 1** — remove the `1 -` inversion from `_combine_score`:

```python
def _combine_score(self, vector_score, fts_score):
    return self.weight * vector_score + (1 - self.weight) * fts_score
```

**Bug 2** — use `1 - fill` for missing FTS scores so both penalties are
symmetric (mirror of what `_invert_score(fill)` already does for missing
vector scores):

```python
fts_score = result.get("_score", 1 - fill)  # was: fill
```

With `fill=1.0` (default): `1 - 1.0 = 0.0` — missing-FTS entries
contribute `0` to the FTS term, identical to how missing-vector entries
contribute `0` to the vector term.

## Verification

Concrete example from the issue. With `weight=0.7`, `fill=1.0`:

| Document | `_distance` | `_score` | Old `_relevance_score` | New
`_relevance_score` |

|----------|-------------|----------|------------------------|------------------------|
| `apple orange` | 0.0 (best) | 2.41 (only FTS) | 0.30 (**wrong: ranked
2nd**) | 1.42 (**correct: ranked 1st**) |
| `banana grape` | 0.9999 (worst) | — | 0.70 (**wrong: ranked 1st**) |
0.00 (**correct: ranked last**) |

## Tests

Two regression tests added to `python/python/tests/test_rerankers.py`:

- `test_linear_combination_best_match_ranks_first` — the document with
the smallest distance **and** an FTS match must have the highest
`_relevance_score`.
- `test_linear_combination_missing_fts_is_penalised` — a document with
any FTS score must beat an otherwise-equal document with no FTS match.

---------

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 .../lancedb/rerankers/linear_combination.py   | 17 +++-
 python/python/tests/test_rerankers.py         | 86 +++++++++++++++++++
 2 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/python/python/lancedb/rerankers/linear_combination.py b/python/python/lancedb/rerankers/linear_combination.py
index 9f1d645c9..74f23ea61 100644
--- a/python/python/lancedb/rerankers/linear_combination.py
+++ b/python/python/lancedb/rerankers/linear_combination.py
@@ -102,8 +102,15 @@ class LinearCombinationReranker(Reranker):
 
         combined_list = []
         for row_id, result in results.items():
+            # Convert vector distance to a relevance score in [0, 1] where
+            # higher is better.  Missing vector entries are penalised with
+            # `_invert_score(fill)` = 1 - fill (= 0.0 for the default fill=1).
             vector_score = self._invert_score(result.get("_distance", fill))
-            fts_score = result.get("_score", fill)
+            # FTS scores (BM25) are already in a "higher = more relevant" space.
+            # Missing FTS entries are penalised symmetrically: we use
+            # `1 - fill` so that the same `fill` value drives both missing-vector
+            # and missing-FTS penalties in the same direction.
+            fts_score = result.get("_score", 1 - fill)
             result["_relevance_score"] = self._combine_score(vector_score, fts_score)
             combined_list.append(result)
 
@@ -123,8 +130,12 @@ class LinearCombinationReranker(Reranker):
         return tbl
 
     def _combine_score(self, vector_score, fts_score):
-        # these scores represent distance
-        return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score)
+        # Both vector_score (inverted distance) and fts_score are in a
+        # "higher = more relevant" space.  A straight weighted average gives
+        # higher _relevance_score to better matches, as expected.
+        # Previously this returned `1 - (...)` which inverted the final
+        # ranking so that the *least* relevant document ranked first.
+        return self.weight * vector_score + (1 - self.weight) * fts_score
 
     def _invert_score(self, dist: float):
         # Invert the score between relevance and distance
diff --git a/python/python/tests/test_rerankers.py b/python/python/tests/test_rerankers.py
index 3d028cb3a..c886772bb 100644
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -603,3 +603,89 @@ def test_cross_encoder_reranker_return_all(tmp_path):
     assert "_relevance_score" in result.column_names
     assert "_score" in result.column_names
     assert "_distance" in result.column_names
+
+
+# ---------------------------------------------------------------------------
+# Regression tests for LinearCombinationReranker scoring bugs (issue #3154)
+# ---------------------------------------------------------------------------
+
+
+def test_linear_combination_best_match_ranks_first():
+    """
+    The document that is BOTH the closest vector match AND the only FTS match
+    must rank first.  Previously _combine_score subtracted from 1, inverting
+    the ranking so the worst document ranked highest.
+    """
+    reranker = LinearCombinationReranker(weight=0.7, return_score="all")
+
+    # rowid 0: perfect vector match, sole FTS match  → should rank 1st
+    # rowid 1: mediocre vector, no FTS match
+    # rowid 2: bad vector, no FTS match
+    vector_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0, 1, 2],
+            "_distance": [0.0, 0.5, 0.9],
+        }
+    )
+    fts_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0],
+            "_score": [1.0],
+        }
+    )
+
+    combined = reranker.merge_results(vector_results, fts_results, fill=1.0)
+    scores = dict(
+        zip(
+            combined["_rowid"].to_pylist(),
+            combined["_relevance_score"].to_pylist(),
+        )
+    )
+
+    # rowid 0 must have the highest relevance score
+    assert scores[0] > scores[1], (
+        f"Best match (rowid 0, score={scores[0]:.4f}) should beat "
+        f"mid match (rowid 1, score={scores[1]:.4f})"
+    )
+    assert scores[1] > scores[2], (
+        f"Mid match (rowid 1, score={scores[1]:.4f}) should beat "
+        f"bad match (rowid 2, score={scores[2]:.4f})"
+    )
+
+
+def test_linear_combination_missing_fts_is_penalised():
+    """
+    A document with no FTS match must score *lower* than a document that
+    has a mediocre FTS match, everything else being equal.  Previously
+    missing-FTS entries used fill=1.0 directly, which gave them a reward
+    (via the 1-(...) inversion) instead of a penalty.
+    """
+    reranker = LinearCombinationReranker(weight=0.5, return_score="all")
+
+    vector_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0, 1],
+            "_distance": [0.2, 0.2],  # identical vector scores
+        }
+    )
+    fts_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0],  # rowid 1 has no FTS match
+            "_score": [0.3],  # small FTS score
+        }
+    )
+
+    combined = reranker.merge_results(vector_results, fts_results, fill=1.0)
+    scores = dict(
+        zip(
+            combined["_rowid"].to_pylist(),
+            combined["_relevance_score"].to_pylist(),
+        )
+    )
+
+    # rowid 0 has a small FTS score; rowid 1 has none.
+    # Even a small FTS contribution should beat having none at all.
+    assert scores[0] > scores[1], (
+        f"Document with FTS score (rowid 0, {scores[0]:.4f}) should beat "
+        f"document with no FTS match (rowid 1, {scores[1]:.4f})"
+    )