diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index e930d9024..e3f7e6c84 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -57,6 +57,7 @@ impl Eq for ComparableDoc {} pub(crate) struct TopCollector { pub limit: usize, + pub offset: usize, _marker: PhantomData, } @@ -69,11 +70,21 @@ where /// # Panics /// The method panics if limit is 0 pub fn with_limit(limit: usize) -> TopCollector { + Self::with_limit_and_offset(limit, 0) + } + + /// Creates a top collector, with a number of documents equal to "limit" and + /// skipping the first "offset" documents. + /// + /// # Panics + /// The method panics if limit is 0 + pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopCollector { if limit < 1 { panic!("Limit must be strictly greater than 0."); } TopCollector { limit, + offset, _marker: PhantomData, } } @@ -82,6 +93,10 @@ where self.limit } + pub fn offset(&self) -> usize { + self.offset + } + pub fn merge_fruits( &self, children: Vec>, @@ -92,7 +107,7 @@ where let mut top_collector = BinaryHeap::new(); for child_fruit in children { for (feature, doc) in child_fruit { - if top_collector.len() < self.limit { + if top_collector.len() < (self.limit + self.offset) { top_collector.push(ComparableDoc { feature, doc }); } else if let Some(mut head) = top_collector.peek_mut() { if head.feature < feature { @@ -104,6 +119,7 @@ where Ok(top_collector .into_sorted_vec() .into_iter() + .skip(self.offset) .map(|cdoc| (cdoc.feature, cdoc.doc)) .collect()) } @@ -113,7 +129,7 @@ where segment_id: SegmentLocalId, _: &SegmentReader, ) -> crate::Result> { - Ok(TopSegmentCollector::new(segment_id, self.limit)) + Ok(TopSegmentCollector::new(segment_id, self.limit + self.offset)) } } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 260103e98..f3e59d190 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -60,7 +60,7 @@ pub struct TopDocs(TopCollector); impl fmt::Debug for TopDocs { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "TopDocs({})", self.0.limit()) + write!(f, "TopDocs({}, {})", self.0.limit(), self.0.offset()) } } @@ -104,6 +104,43 @@ impl TopDocs { TopDocs(TopCollector::with_limit(limit)) } + /// Creates a top score collector, with a number of documents equal to "limit" and + /// skipping the first "offset" documents. This is useful for pagination. + /// + /// # Panics + /// The method panics if limit is 0 + /// + /// ```rust + /// use tantivy::collector::TopDocs; + /// use tantivy::query::QueryParser; + /// use tantivy::schema::{Schema, TEXT}; + /// use tantivy::{doc, DocAddress, Index}; + /// + /// let mut schema_builder = Schema::builder(); + /// let title = schema_builder.add_text_field("title", TEXT); + /// let schema = schema_builder.build(); + /// let index = Index::create_in_ram(schema); + /// + /// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + /// index_writer.add_document(doc!(title => "The Name of the Wind")); + /// index_writer.add_document(doc!(title => "The Diary of Muadib")); + /// index_writer.add_document(doc!(title => "A Dairy Cow")); + /// index_writer.add_document(doc!(title => "The Diary of a Young Girl")); + /// assert!(index_writer.commit().is_ok()); + /// + /// let reader = index.reader().unwrap(); + /// let searcher = reader.searcher(); + /// + /// let query_parser = QueryParser::for_index(&index, vec![title]); + /// let query = query_parser.parse_query("diary").unwrap(); + /// let top_docs = searcher.search(&query, &TopDocs::with_limit_and_offset(1, 1)).unwrap(); + /// + /// assert_eq!(&top_docs[0], &(0.6099695, DocAddress(0, 3))); + /// ``` + pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopDocs { + TopDocs(TopCollector::with_limit_and_offset(limit, offset)) + } + /// Set top-K to rank documents by a given fast field. /// /// ```rust