fix(python): preserve original distance and score in hybrid queries (#2061)

Fixes #2031 When we do hybrid search, we normalize the scores. We do this calculation in-place, because the Rerankers expect the `_distance` and `_score` columns to be the normalized ones. So I've changed the logic so that we restore the original distance and scores by matching on row ids.
2026-05-14 10:30:40 +00:00 · 2025-01-23 13:54:26 -08:00
parent 52b79d2b1e
commit 28e1b70e4b
3 changed files with 126 additions and 24 deletions
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -3,7 +3,9 @@

 import lancedb

+from lancedb.query import LanceHybridQueryBuilder
 import pyarrow as pa
+import pyarrow.compute as pc
 import pytest
 import pytest_asyncio

@@ -110,3 +112,23 @@ async def test_explain_plan(table: AsyncTable):
    assert "KNNVectorDistance" in plan
    assert "FTS Search Plan" in plan
    assert "LanceScan" in plan
+
+
+def test_normalize_scores():
+    cases = [
+        (pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
+        (pa.array([2.0, 10.0, 20.0]), pa.array([0.0, 8.0 / 18.0, 1.0])),
+        (pa.array([0.0, 0.0, 0.0]), pa.array([0.0, 0.0, 0.0])),
+        (pa.array([10.0, 9.9999999999999]), pa.array([0.0, 0.0])),
+    ]
+
+    for input, expected in cases:
+        for invert in [True, False]:
+            result = LanceHybridQueryBuilder._normalize_scores(input, invert)
+
+            if invert:
+                expected = pc.subtract(1.0, expected)
+
+            assert pc.equal(
+                result, expected
+            ), f"Expected {expected} but got {result} for invert={invert}"