mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 07:00:41 +00:00
Add offset to TopDocsCollector
Add an offset to TopDocsCollector and TopDocs to make it clearer how to handle pagination. Closes #822
This commit is contained in:
@@ -57,6 +57,7 @@ impl<T: PartialOrd, D: PartialOrd> Eq for ComparableDoc<T, D> {}
|
||||
|
||||
pub(crate) struct TopCollector<T> {
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
_marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
@@ -69,11 +70,21 @@ where
|
||||
/// # Panics
|
||||
/// The method panics if limit is 0
|
||||
pub fn with_limit(limit: usize) -> TopCollector<T> {
|
||||
Self::with_limit_and_offset(limit, 0)
|
||||
}
|
||||
|
||||
/// Creates a top collector, with a number of documents equal to "limit" and
|
||||
/// skipping the first "offset" documents.
|
||||
///
|
||||
/// # Panics
|
||||
/// The method panics if limit is 0
|
||||
pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopCollector<T> {
|
||||
if limit < 1 {
|
||||
panic!("Limit must be strictly greater than 0.");
|
||||
}
|
||||
TopCollector {
|
||||
limit,
|
||||
offset,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
@@ -82,6 +93,10 @@ where
|
||||
self.limit
|
||||
}
|
||||
|
||||
pub fn offset(&self) -> usize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
pub fn merge_fruits(
|
||||
&self,
|
||||
children: Vec<Vec<(T, DocAddress)>>,
|
||||
@@ -92,7 +107,7 @@ where
|
||||
let mut top_collector = BinaryHeap::new();
|
||||
for child_fruit in children {
|
||||
for (feature, doc) in child_fruit {
|
||||
if top_collector.len() < self.limit {
|
||||
if top_collector.len() < (self.limit + self.offset) {
|
||||
top_collector.push(ComparableDoc { feature, doc });
|
||||
} else if let Some(mut head) = top_collector.peek_mut() {
|
||||
if head.feature < feature {
|
||||
@@ -104,6 +119,7 @@ where
|
||||
Ok(top_collector
|
||||
.into_sorted_vec()
|
||||
.into_iter()
|
||||
.skip(self.offset)
|
||||
.map(|cdoc| (cdoc.feature, cdoc.doc))
|
||||
.collect())
|
||||
}
|
||||
@@ -113,7 +129,7 @@ where
|
||||
segment_id: SegmentLocalId,
|
||||
_: &SegmentReader,
|
||||
) -> crate::Result<TopSegmentCollector<F>> {
|
||||
Ok(TopSegmentCollector::new(segment_id, self.limit))
|
||||
Ok(TopSegmentCollector::new(segment_id, self.limit + self.offset))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ pub struct TopDocs(TopCollector<Score>);
|
||||
|
||||
impl fmt::Debug for TopDocs {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TopDocs({})", self.0.limit())
|
||||
write!(f, "TopDocs({}, {})", self.0.limit(), self.0.offset())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +104,43 @@ impl TopDocs {
|
||||
TopDocs(TopCollector::with_limit(limit))
|
||||
}
|
||||
|
||||
/// Creates a top score collector, with a number of documents equal to "limit" and
|
||||
/// skipping the first "offset" documents. This is useful for pagination.
|
||||
///
|
||||
/// # Panics
|
||||
/// The method panics if limit is 0
|
||||
///
|
||||
/// ```rust
|
||||
/// use tantivy::collector::TopDocs;
|
||||
/// use tantivy::query::QueryParser;
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, DocAddress, Index};
|
||||
///
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
///
|
||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
|
||||
/// assert!(index_writer.commit().is_ok());
|
||||
///
|
||||
/// let reader = index.reader().unwrap();
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary").unwrap();
|
||||
/// let top_docs = searcher.search(&query, &TopDocs::with_limit_and_offset(1, 1)).unwrap();
|
||||
///
|
||||
/// assert_eq!(&top_docs[0], &(0.6099695, DocAddress(0, 3)));
|
||||
/// ```
|
||||
pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopDocs {
|
||||
TopDocs(TopCollector::with_limit_and_offset(limit, offset))
|
||||
}
|
||||
|
||||
/// Set top-K to rank documents by a given fast field.
|
||||
///
|
||||
/// ```rust
|
||||
|
||||
Reference in New Issue
Block a user