Address review comments

- Make Debug formatting of TopDocs clearer. - Add unit tests for limit and offset on TopCollector. - Change API for using offset to a fluent interface. - Add some context to the docstring to clarify what limit and offset are equivalent to in other projects.
2026-01-07 17:42:55 +00:00 · 2020-05-16 10:21:22 +01:00
parent 3f2cd73ecb
commit 25b1fdf8d2
2 changed files with 69 additions and 22 deletions
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -70,25 +70,25 @@ where
    /// # Panics
    /// The method panics if limit is 0
    pub fn with_limit(limit: usize) -> TopCollector<T> {
-        Self::with_limit_and_offset(limit, 0)
-    }
-
-    /// Creates a top collector, with a number of documents equal to "limit" and
-    /// skipping the first "offset" documents.
-    ///
-    /// # Panics
-    /// The method panics if limit is 0
-    pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopCollector<T> {
        if limit < 1 {
            panic!("Limit must be strictly greater than 0.");
        }
-        TopCollector {
+        Self {
            limit,
-            offset,
+            offset: 0,
            _marker: PhantomData,
        }
    }

+    /// Skip the first "offset" documents when collecting.
+    ///
+    /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
+    /// Lucene's TopDocsCollector.
+    pub fn and_offset(mut self, offset: usize) -> TopCollector<T> {
+        self.offset = offset;
+        self
+    }
+
    pub fn limit(&self) -> usize {
        self.limit
    }
@@ -129,7 +129,10 @@ where
        segment_id: SegmentLocalId,
        _: &SegmentReader,
    ) -> crate::Result<TopSegmentCollector<F>> {
-        Ok(TopSegmentCollector::new(segment_id, self.limit + self.offset))
+        Ok(TopSegmentCollector::new(
+            segment_id,
+            self.limit + self.offset,
+        ))
    }
 }

@@ -203,7 +206,7 @@ impl<T: PartialOrd + Clone> TopSegmentCollector<T> {

 #[cfg(test)]
 mod tests {
-    use super::TopSegmentCollector;
+    use super::{TopCollector, TopSegmentCollector};
    use crate::DocAddress;

    #[test]
@@ -264,6 +267,48 @@ mod tests {
            top_collector_limit_3.harvest()[..2].to_vec(),
        );
    }
+
+    #[test]
+    fn test_top_collector_with_limit_and_offset() {
+        let collector = TopCollector::with_limit(2).and_offset(1);
+
+        let results = collector
+            .merge_fruits(vec![vec![
+                (0.9, DocAddress(0, 1)),
+                (0.8, DocAddress(0, 2)),
+                (0.7, DocAddress(0, 3)),
+                (0.6, DocAddress(0, 4)),
+                (0.5, DocAddress(0, 5)),
+            ]])
+            .unwrap();
+
+        assert_eq!(
+            results,
+            vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),]
+        );
+    }
+
+    #[test]
+    fn test_top_collector_with_limit_larger_than_set_and_offset() {
+        let collector = TopCollector::with_limit(2).and_offset(1);
+
+        let results = collector
+            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .unwrap();
+
+        assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]);
+    }
+
+    #[test]
+    fn test_top_collector_with_limit_and_offset_larger_than_set() {
+        let collector = TopCollector::with_limit(2).and_offset(20);
+
+        let results = collector
+            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .unwrap();
+
+        assert_eq!(results, vec![]);
+    }
 }

 #[cfg(all(test, feature = "unstable"))]
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -60,7 +60,7 @@ pub struct TopDocs(TopCollector<Score>);

 impl fmt::Debug for TopDocs {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "TopDocs({}, {})", self.0.limit(), self.0.offset())
+        write!(f, "TopDocs(limit={}, offset={})", self.0.limit(), self.0.offset())
    }
 }

@@ -104,11 +104,10 @@ impl TopDocs {
        TopDocs(TopCollector::with_limit(limit))
    }

-    /// Creates a top score collector, with a number of documents equal to "limit" and
-    /// skipping the first "offset" documents. This is useful for pagination.
+    /// Skip the first "offset" documents when collecting.
    ///
-    /// # Panics
-    /// The method panics if limit is 0
+    /// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
+    /// Lucene's TopDocsCollector.
    ///
    /// ```rust
    /// use tantivy::collector::TopDocs;
@@ -126,6 +125,7 @@ impl TopDocs {
    /// index_writer.add_document(doc!(title => "The Diary of Muadib"));
    /// index_writer.add_document(doc!(title => "A Dairy Cow"));
    /// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
+    /// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"));
    /// assert!(index_writer.commit().is_ok());
    ///
    /// let reader = index.reader().unwrap();
@@ -133,12 +133,14 @@ impl TopDocs {
    ///
    /// let query_parser = QueryParser::for_index(&index, vec![title]);
    /// let query = query_parser.parse_query("diary").unwrap();
-    /// let top_docs = searcher.search(&query, &TopDocs::with_limit_and_offset(1, 1)).unwrap();
+    /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
    ///
-    /// assert_eq!(&top_docs[0], &(0.6099695, DocAddress(0, 3)));
+    /// assert_eq!(top_docs.len(), 2);
+    /// assert_eq!(&top_docs[0], &(0.5204813, DocAddress(0, 4)));
+    /// assert_eq!(&top_docs[1], &(0.4793185, DocAddress(0, 3)));
    /// ```
-    pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopDocs {
-        TopDocs(TopCollector::with_limit_and_offset(limit, offset))
+    pub fn and_offset(self, offset: usize) -> TopDocs {
+        TopDocs(self.0.and_offset(offset))
    }

    /// Set top-K to rank documents by a given fast field.