From 7dba793629cb56b939c0c4183a7e52718edd25da Mon Sep 17 00:00:00 2001 From: devteamaegis Date: Tue, 26 May 2026 18:26:34 -0400 Subject: [PATCH] fix(rerankers): inverted scores and incorrect missing-FTS penalty in LinearCombinationReranker (#3437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem `LinearCombinationReranker.merge_results` has two related bugs that make it return **inverted relevance rankings** — the least relevant document ranks first (closes #3154). ### Bug 1 — `_combine_score` subtracts from 1, inverting the final ranking ```python def _combine_score(self, vector_score, fts_score): return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score) ``` Both `vector_score` (already converted via `_invert_score`) and `fts_score` (BM25 relevance) are in **higher-is-better** space. Wrapping the weighted average in `1 - (...)` flips the direction: a perfectly matching document (`vector_score=1, fts_score=1`) gets `_relevance_score = 0.0`, while a non-matching document gets a high score. ### Bug 2 — Documents missing an FTS score are rewarded, not penalised ```python fts_score = result.get("_score", fill) # fill=1.0 by default ``` When a document has no FTS match, `fts_score = fill = 1.0`. In `_combine_score` (with the bug-1 formula), this large value becomes a **negative penalty** via `1 - (... + 0.3 * 1.0)`, counterintuitively *boosting* the document's score. By contrast, missing vector results correctly receive `_invert_score(fill) = 0.0` (penalised). ## Fix **Bug 1** — remove the `1 -` inversion from `_combine_score`: ```python def _combine_score(self, vector_score, fts_score): return self.weight * vector_score + (1 - self.weight) * fts_score ``` **Bug 2** — use `1 - fill` for missing FTS scores so both penalties are symmetric (mirror of what `_invert_score(fill)` already does for missing vector scores): ```python fts_score = result.get("_score", 1 - fill) # was: fill ``` With `fill=1.0` (default): `1 - 1.0 = 0.0` — missing-FTS entries contribute `0` to the FTS term, identical to how missing-vector entries contribute `0` to the vector term. ## Verification Concrete example from the issue. With `weight=0.7`, `fill=1.0`: | Document | `_distance` | `_score` | Old `_relevance_score` | New `_relevance_score` | |----------|-------------|----------|------------------------|------------------------| | `apple orange` | 0.0 (best) | 2.41 (only FTS) | 0.30 (**wrong: ranked 2nd**) | 1.42 (**correct: ranked 1st**) | | `banana grape` | 0.9999 (worst) | — | 0.70 (**wrong: ranked 1st**) | 0.00 (**correct: ranked last**) | ## Tests Two regression tests added to `python/python/tests/test_rerankers.py`: - `test_linear_combination_best_match_ranks_first` — the document with the smallest distance **and** an FTS match must have the highest `_relevance_score`. - `test_linear_combination_missing_fts_is_penalised` — a document with any FTS score must beat an otherwise-equal document with no FTS match. --------- Co-authored-by: Will Jones --- .../lancedb/rerankers/linear_combination.py | 17 +++- python/python/tests/test_rerankers.py | 86 +++++++++++++++++++ 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/python/python/lancedb/rerankers/linear_combination.py b/python/python/lancedb/rerankers/linear_combination.py index 9f1d645c9..74f23ea61 100644 --- a/python/python/lancedb/rerankers/linear_combination.py +++ b/python/python/lancedb/rerankers/linear_combination.py @@ -102,8 +102,15 @@ class LinearCombinationReranker(Reranker): combined_list = [] for row_id, result in results.items(): + # Convert vector distance to a relevance score in [0, 1] where + # higher is better. Missing vector entries are penalised with + # `_invert_score(fill)` = 1 - fill (= 0.0 for the default fill=1). vector_score = self._invert_score(result.get("_distance", fill)) - fts_score = result.get("_score", fill) + # FTS scores (BM25) are already in a "higher = more relevant" space. + # Missing FTS entries are penalised symmetrically: we use + # `1 - fill` so that the same `fill` value drives both missing-vector + # and missing-FTS penalties in the same direction. + fts_score = result.get("_score", 1 - fill) result["_relevance_score"] = self._combine_score(vector_score, fts_score) combined_list.append(result) @@ -123,8 +130,12 @@ class LinearCombinationReranker(Reranker): return tbl def _combine_score(self, vector_score, fts_score): - # these scores represent distance - return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score) + # Both vector_score (inverted distance) and fts_score are in a + # "higher = more relevant" space. A straight weighted average gives + # higher _relevance_score to better matches, as expected. + # Previously this returned `1 - (...)` which inverted the final + # ranking so that the *least* relevant document ranked first. + return self.weight * vector_score + (1 - self.weight) * fts_score def _invert_score(self, dist: float): # Invert the score between relevance and distance diff --git a/python/python/tests/test_rerankers.py b/python/python/tests/test_rerankers.py index 3d028cb3a..c886772bb 100644 --- a/python/python/tests/test_rerankers.py +++ b/python/python/tests/test_rerankers.py @@ -603,3 +603,89 @@ def test_cross_encoder_reranker_return_all(tmp_path): assert "_relevance_score" in result.column_names assert "_score" in result.column_names assert "_distance" in result.column_names + + +# --------------------------------------------------------------------------- +# Regression tests for LinearCombinationReranker scoring bugs (issue #3154) +# --------------------------------------------------------------------------- + + +def test_linear_combination_best_match_ranks_first(): + """ + The document that is BOTH the closest vector match AND the only FTS match + must rank first. Previously _combine_score subtracted from 1, inverting + the ranking so the worst document ranked highest. + """ + reranker = LinearCombinationReranker(weight=0.7, return_score="all") + + # rowid 0: perfect vector match, sole FTS match → should rank 1st + # rowid 1: mediocre vector, no FTS match + # rowid 2: bad vector, no FTS match + vector_results = pa.Table.from_pydict( + { + "_rowid": [0, 1, 2], + "_distance": [0.0, 0.5, 0.9], + } + ) + fts_results = pa.Table.from_pydict( + { + "_rowid": [0], + "_score": [1.0], + } + ) + + combined = reranker.merge_results(vector_results, fts_results, fill=1.0) + scores = dict( + zip( + combined["_rowid"].to_pylist(), + combined["_relevance_score"].to_pylist(), + ) + ) + + # rowid 0 must have the highest relevance score + assert scores[0] > scores[1], ( + f"Best match (rowid 0, score={scores[0]:.4f}) should beat " + f"mid match (rowid 1, score={scores[1]:.4f})" + ) + assert scores[1] > scores[2], ( + f"Mid match (rowid 1, score={scores[1]:.4f}) should beat " + f"bad match (rowid 2, score={scores[2]:.4f})" + ) + + +def test_linear_combination_missing_fts_is_penalised(): + """ + A document with no FTS match must score *lower* than a document that + has a mediocre FTS match, everything else being equal. Previously + missing-FTS entries used fill=1.0 directly, which gave them a reward + (via the 1-(...) inversion) instead of a penalty. + """ + reranker = LinearCombinationReranker(weight=0.5, return_score="all") + + vector_results = pa.Table.from_pydict( + { + "_rowid": [0, 1], + "_distance": [0.2, 0.2], # identical vector scores + } + ) + fts_results = pa.Table.from_pydict( + { + "_rowid": [0], # rowid 1 has no FTS match + "_score": [0.3], # small FTS score + } + ) + + combined = reranker.merge_results(vector_results, fts_results, fill=1.0) + scores = dict( + zip( + combined["_rowid"].to_pylist(), + combined["_relevance_score"].to_pylist(), + ) + ) + + # rowid 0 has a small FTS score; rowid 1 has none. + # Even a small FTS contribution should beat having none at all. + assert scores[0] > scores[1], ( + f"Document with FTS score (rowid 0, {scores[0]:.4f}) should beat " + f"document with no FTS match (rowid 1, {scores[1]:.4f})" + )