From 4e84c70387d14a46baf609c087498b670023192c Mon Sep 17 00:00:00 2001 From: PSeitz Date: Wed, 16 Jul 2025 21:44:04 +0800 Subject: [PATCH] Fix TopNComputer for reverse order (#2672) Co-authored-by: Pascal Seitz --- src/collector/top_score_collector.rs | 78 ++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index a1067aba4..908126386 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -970,7 +970,7 @@ impl From> for TopNCompu } } -impl TopNComputer +impl TopNComputer where Score: PartialOrd + Clone, D: Ord, @@ -991,7 +991,10 @@ where #[inline] pub fn push(&mut self, feature: Score, doc: D) { if let Some(last_median) = self.threshold.clone() { - if feature < last_median { + if !REVERSE_ORDER && feature > last_median { + return; + } + if REVERSE_ORDER && feature < last_median { return; } } @@ -1026,7 +1029,7 @@ where } /// Returns the top n elements in sorted order. - pub fn into_sorted_vec(mut self) -> Vec> { + pub fn into_sorted_vec(mut self) -> Vec> { if self.buffer.len() > self.top_n { self.truncate_top_n(); } @@ -1037,7 +1040,7 @@ where /// Returns the top n elements in stored order. /// Useful if you do not need the elements in sorted order, /// for example when merging the results of multiple segments. - pub fn into_vec(mut self) -> Vec> { + pub fn into_vec(mut self) -> Vec> { if self.buffer.len() > self.top_n { self.truncate_top_n(); } @@ -1047,9 +1050,11 @@ where #[cfg(test)] mod tests { + use proptest::prelude::*; + use super::{TopDocs, TopNComputer}; use crate::collector::top_collector::ComparableDoc; - use crate::collector::Collector; + use crate::collector::{Collector, DocSetCollector}; use crate::query::{AllQuery, Query, QueryParser}; use crate::schema::{Field, Schema, FAST, STORED, TEXT}; use crate::time::format_description::well_known::Rfc3339; @@ -1144,6 +1149,44 @@ mod tests { } } + proptest! { + #[test] + fn test_topn_computer_asc_prop( + limit in 0..10_usize, + docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize), + ) { + let mut computer: TopNComputer<_, _, false> = TopNComputer::new(limit); + for (feature, doc) in &docs { + computer.push(*feature, *doc); + } + let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::>(); + comparable_docs.sort(); + comparable_docs.truncate(limit); + prop_assert_eq!( + computer.into_sorted_vec(), + comparable_docs, + ); + } + + #[test] + fn test_topn_computer_desc_prop( + limit in 0..10_usize, + docs in proptest::collection::vec((0..100_u64, 0..100_u64), 0..100_usize), + ) { + let mut computer: TopNComputer<_, _, true> = TopNComputer::new(limit); + for (feature, doc) in &docs { + computer.push(*feature, *doc); + } + let mut comparable_docs = docs.into_iter().map(|(feature, doc)| ComparableDoc { feature, doc }).collect::>(); + comparable_docs.sort(); + comparable_docs.truncate(limit); + prop_assert_eq!( + computer.into_sorted_vec(), + comparable_docs, + ); + } + } + #[test] fn test_top_collector_not_at_capacity_without_offset() -> crate::Result<()> { let index = make_index()?; @@ -1645,4 +1688,29 @@ mod tests { ); Ok(()) } + + #[test] + fn test_topn_computer_asc() { + let mut computer: TopNComputer = TopNComputer::new(2); + + computer.push(1u32, 1u32); + computer.push(2u32, 2u32); + computer.push(3u32, 3u32); + computer.push(2u32, 4u32); + computer.push(4u32, 5u32); + computer.push(1u32, 6u32); + assert_eq!( + computer.into_sorted_vec(), + &[ + ComparableDoc { + feature: 1u32, + doc: 1u32, + }, + ComparableDoc { + feature: 1u32, + doc: 6u32, + } + ] + ); + } }