From 25b1fdf8d2ee334cc9bfaa4729db31b6b0003892 Mon Sep 17 00:00:00 2001 From: Rob Young Date: Sat, 16 May 2020 10:21:22 +0100 Subject: [PATCH] Address review comments - Make Debug formatting of TopDocs clearer. - Add unit tests for limit and offset on TopCollector. - Change API for using offset to a fluent interface. - Add some context to the docstring to clarify what limit and offset are equivalent to in other projects. --- src/collector/top_collector.rs | 71 +++++++++++++++++++++++----- src/collector/top_score_collector.rs | 20 ++++---- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index e3f7e6c84..1047a190b 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -70,25 +70,25 @@ where /// # Panics /// The method panics if limit is 0 pub fn with_limit(limit: usize) -> TopCollector { - Self::with_limit_and_offset(limit, 0) - } - - /// Creates a top collector, with a number of documents equal to "limit" and - /// skipping the first "offset" documents. - /// - /// # Panics - /// The method panics if limit is 0 - pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopCollector { if limit < 1 { panic!("Limit must be strictly greater than 0."); } - TopCollector { + Self { limit, - offset, + offset: 0, _marker: PhantomData, } } + /// Skip the first "offset" documents when collecting. + /// + /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in + /// Lucene's TopDocsCollector. + pub fn and_offset(mut self, offset: usize) -> TopCollector { + self.offset = offset; + self + } + pub fn limit(&self) -> usize { self.limit } @@ -129,7 +129,10 @@ where segment_id: SegmentLocalId, _: &SegmentReader, ) -> crate::Result> { - Ok(TopSegmentCollector::new(segment_id, self.limit + self.offset)) + Ok(TopSegmentCollector::new( + segment_id, + self.limit + self.offset, + )) } } @@ -203,7 +206,7 @@ impl TopSegmentCollector { #[cfg(test)] mod tests { - use super::TopSegmentCollector; + use super::{TopCollector, TopSegmentCollector}; use crate::DocAddress; #[test] @@ -264,6 +267,48 @@ mod tests { top_collector_limit_3.harvest()[..2].to_vec(), ); } + + #[test] + fn test_top_collector_with_limit_and_offset() { + let collector = TopCollector::with_limit(2).and_offset(1); + + let results = collector + .merge_fruits(vec![vec![ + (0.9, DocAddress(0, 1)), + (0.8, DocAddress(0, 2)), + (0.7, DocAddress(0, 3)), + (0.6, DocAddress(0, 4)), + (0.5, DocAddress(0, 5)), + ]]) + .unwrap(); + + assert_eq!( + results, + vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),] + ); + } + + #[test] + fn test_top_collector_with_limit_larger_than_set_and_offset() { + let collector = TopCollector::with_limit(2).and_offset(1); + + let results = collector + .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]]) + .unwrap(); + + assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]); + } + + #[test] + fn test_top_collector_with_limit_and_offset_larger_than_set() { + let collector = TopCollector::with_limit(2).and_offset(20); + + let results = collector + .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]]) + .unwrap(); + + assert_eq!(results, vec![]); + } } #[cfg(all(test, feature = "unstable"))] diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index f3e59d190..8ec93a5af 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -60,7 +60,7 @@ pub struct TopDocs(TopCollector); impl fmt::Debug for TopDocs { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "TopDocs({}, {})", self.0.limit(), self.0.offset()) + write!(f, "TopDocs(limit={}, offset={})", self.0.limit(), self.0.offset()) } } @@ -104,11 +104,10 @@ impl TopDocs { TopDocs(TopCollector::with_limit(limit)) } - /// Creates a top score collector, with a number of documents equal to "limit" and - /// skipping the first "offset" documents. This is useful for pagination. + /// Skip the first "offset" documents when collecting. /// - /// # Panics - /// The method panics if limit is 0 + /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in + /// Lucene's TopDocsCollector. /// /// ```rust /// use tantivy::collector::TopDocs; @@ -126,6 +125,7 @@ impl TopDocs { /// index_writer.add_document(doc!(title => "The Diary of Muadib")); /// index_writer.add_document(doc!(title => "A Dairy Cow")); /// index_writer.add_document(doc!(title => "The Diary of a Young Girl")); + /// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina")); /// assert!(index_writer.commit().is_ok()); /// /// let reader = index.reader().unwrap(); @@ -133,12 +133,14 @@ impl TopDocs { /// /// let query_parser = QueryParser::for_index(&index, vec![title]); /// let query = query_parser.parse_query("diary").unwrap(); - /// let top_docs = searcher.search(&query, &TopDocs::with_limit_and_offset(1, 1)).unwrap(); + /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap(); /// - /// assert_eq!(&top_docs[0], &(0.6099695, DocAddress(0, 3))); + /// assert_eq!(top_docs.len(), 2); + /// assert_eq!(&top_docs[0], &(0.5204813, DocAddress(0, 4))); + /// assert_eq!(&top_docs[1], &(0.4793185, DocAddress(0, 3))); /// ``` - pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopDocs { - TopDocs(TopCollector::with_limit_and_offset(limit, offset)) + pub fn and_offset(self, offset: usize) -> TopDocs { + TopDocs(self.0.and_offset(offset)) } /// Set top-K to rank documents by a given fast field.