Compare commits

...

2 Commits

Author SHA1 Message Date
Pascal Seitz
f451fa938f explain why naive scorer must accumulate scores in WAND order 2026-06-17 18:58:58 +08:00
Pascal Seitz
2a82dd6f64 fix flaky test 2026-06-17 18:58:58 +08:00

View File

@@ -273,8 +273,14 @@ mod tests {
}
if all_match {
let score: Score =
leader.score() + secondaries.iter_mut().map(|s| s.score()).sum::<Score>();
// Accumulate in the same left-to-right order as the WAND implementation
// (leader first, then each secondary in turn). Float addition is not
// associative, so `leader + secondaries.sum()` gives a different bit
// pattern and can cause spurious nearly_equals failures.
let mut score: Score = leader.score();
for secondary in secondaries.iter_mut() {
score += secondary.score();
}
if score > limit {
heap.push(Float(score));
@@ -417,6 +423,198 @@ mod tests {
}
}
#[test]
fn test_block_wand_intersection_three_scorers_regression() {
// Minimal failing case found by proptest (CI run 27557430583, job 81460063906).
// Posting list 0 spans docs 063 (all present, doc 8 has tf=80, doc 26 tf=4, rest tf=1).
// Posting lists 1 and 2 are sparse with varying term freqs, and doc 16/64 appear only
// in lists 1/2 but not list 0. The high tf=80 on doc 8 of list 0 makes the WAND
// upper-bound estimation skip documents that the naive intersection would score.
let posting_lists: &[&[(DocId, u32)]] = &[
&[
(0, 1),
(1, 1),
(2, 1),
(3, 1),
(4, 1),
(5, 1),
(6, 1),
(7, 1),
(8, 80),
(9, 1),
(10, 1),
(11, 1),
(12, 1),
(13, 1),
(14, 1),
(15, 1),
(17, 1),
(18, 1),
(19, 1),
(20, 1),
(21, 1),
(22, 1),
(23, 1),
(24, 1),
(25, 1),
(26, 4),
(27, 1),
(28, 1),
(29, 1),
(30, 1),
(31, 1),
(32, 1),
(33, 1),
(34, 1),
(35, 1),
(36, 1),
(37, 1),
(38, 1),
(39, 1),
(40, 1),
(41, 1),
(42, 1),
(43, 1),
(44, 1),
(45, 1),
(46, 1),
(47, 1),
(48, 1),
(49, 1),
(50, 1),
(51, 1),
(52, 1),
(53, 1),
(54, 1),
(55, 1),
(56, 1),
(57, 1),
(58, 1),
(59, 1),
(60, 1),
(61, 1),
(62, 1),
(63, 1),
],
&[
(0, 2),
(3, 98),
(7, 93),
(8, 87),
(9, 39),
(10, 2),
(12, 71),
(14, 47),
(15, 76),
(16, 6),
(17, 38),
(19, 61),
(20, 87),
(21, 1),
(22, 5),
(23, 43),
(25, 48),
(26, 87),
(28, 81),
(29, 69),
(30, 7),
(31, 47),
(32, 32),
(33, 38),
(35, 39),
(38, 65),
(39, 98),
(42, 43),
(43, 52),
(44, 99),
(45, 88),
(48, 24),
(51, 61),
(52, 22),
(53, 58),
(55, 26),
(56, 32),
(58, 57),
(60, 29),
(61, 78),
(62, 9),
(63, 44),
(64, 29),
],
&[
(0, 94),
(2, 49),
(3, 63),
(4, 7),
(6, 93),
(7, 17),
(8, 91),
(9, 18),
(10, 85),
(11, 11),
(12, 45),
(13, 42),
(15, 91),
(16, 44),
(17, 36),
(18, 68),
(19, 24),
(20, 17),
(21, 59),
(22, 97),
(24, 20),
(25, 7),
(26, 85),
(27, 69),
(28, 78),
(29, 84),
(30, 35),
(31, 49),
(33, 83),
(34, 97),
(35, 29),
(36, 43),
(37, 59),
(38, 79),
(39, 74),
(40, 21),
(41, 5),
(42, 47),
(43, 27),
(44, 59),
(45, 97),
(46, 91),
(47, 81),
(48, 57),
(49, 47),
(50, 64),
(51, 86),
(52, 60),
(53, 52),
(54, 14),
(55, 23),
(56, 64),
(57, 40),
(58, 5),
(59, 30),
(60, 81),
(61, 62),
(62, 39),
(63, 93),
(64, 82),
],
];
let fieldnorms: &[u32] = &[
624, 668, 725, 670, 851, 169, 537, 627, 200, 757, 51, 272, 835, 89, 750, 63, 272, 406,
394, 390, 822, 449, 257, 571, 527, 855, 4, 98, 548, 413, 539, 351, 596, 151, 728, 152,
766, 829, 20, 828, 477, 251, 743, 646, 136, 477, 909, 907, 266, 341, 676, 161, 40, 384,
347, 707, 42, 397, 482, 814, 801, 528, 465, 410, 171,
];
let posting_lists_owned: Vec<Vec<(DocId, u32)>> =
posting_lists.iter().map(|pl| pl.to_vec()).collect();
test_block_wand_intersection_aux(&posting_lists_owned, fieldnorms);
}
#[test]
fn test_block_wand_intersection_disjoint() {
// Two posting lists with no overlap — intersection is empty.