Address review comments

- Make Debug formatting of TopDocs clearer.
- Add unit tests for limit and offset on TopCollector.
- Change API for using offset to a fluent interface.
- Add some context to the docstring to clarify what limit and offset are
  equivalent to in other projects.
This commit is contained in:
Rob Young
2020-05-16 10:21:22 +01:00
parent 3f2cd73ecb
commit 25b1fdf8d2
2 changed files with 69 additions and 22 deletions

View File

@@ -70,25 +70,25 @@ where
/// # Panics
/// The method panics if limit is 0
pub fn with_limit(limit: usize) -> TopCollector<T> {
Self::with_limit_and_offset(limit, 0)
}
/// Creates a top collector, with a number of documents equal to "limit" and
/// skipping the first "offset" documents.
///
/// # Panics
/// The method panics if limit is 0
pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopCollector<T> {
if limit < 1 {
panic!("Limit must be strictly greater than 0.");
}
TopCollector {
Self {
limit,
offset,
offset: 0,
_marker: PhantomData,
}
}
/// Skip the first "offset" documents when collecting.
///
/// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
/// Lucene's TopDocsCollector.
pub fn and_offset(mut self, offset: usize) -> TopCollector<T> {
self.offset = offset;
self
}
pub fn limit(&self) -> usize {
self.limit
}
@@ -129,7 +129,10 @@ where
segment_id: SegmentLocalId,
_: &SegmentReader,
) -> crate::Result<TopSegmentCollector<F>> {
Ok(TopSegmentCollector::new(segment_id, self.limit + self.offset))
Ok(TopSegmentCollector::new(
segment_id,
self.limit + self.offset,
))
}
}
@@ -203,7 +206,7 @@ impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
#[cfg(test)]
mod tests {
use super::TopSegmentCollector;
use super::{TopCollector, TopSegmentCollector};
use crate::DocAddress;
#[test]
@@ -264,6 +267,48 @@ mod tests {
top_collector_limit_3.harvest()[..2].to_vec(),
);
}
#[test]
fn test_top_collector_with_limit_and_offset() {
let collector = TopCollector::with_limit(2).and_offset(1);
let results = collector
.merge_fruits(vec![vec![
(0.9, DocAddress(0, 1)),
(0.8, DocAddress(0, 2)),
(0.7, DocAddress(0, 3)),
(0.6, DocAddress(0, 4)),
(0.5, DocAddress(0, 5)),
]])
.unwrap();
assert_eq!(
results,
vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),]
);
}
#[test]
fn test_top_collector_with_limit_larger_than_set_and_offset() {
let collector = TopCollector::with_limit(2).and_offset(1);
let results = collector
.merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
.unwrap();
assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]);
}
#[test]
fn test_top_collector_with_limit_and_offset_larger_than_set() {
let collector = TopCollector::with_limit(2).and_offset(20);
let results = collector
.merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
.unwrap();
assert_eq!(results, vec![]);
}
}
#[cfg(all(test, feature = "unstable"))]

View File

@@ -60,7 +60,7 @@ pub struct TopDocs(TopCollector<Score>);
impl fmt::Debug for TopDocs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TopDocs({}, {})", self.0.limit(), self.0.offset())
write!(f, "TopDocs(limit={}, offset={})", self.0.limit(), self.0.offset())
}
}
@@ -104,11 +104,10 @@ impl TopDocs {
TopDocs(TopCollector::with_limit(limit))
}
/// Creates a top score collector, with a number of documents equal to "limit" and
/// skipping the first "offset" documents. This is useful for pagination.
/// Skip the first "offset" documents when collecting.
///
/// # Panics
/// The method panics if limit is 0
/// This is equivalent to `OFFSET` in MySQL or PostgreSQL and `start` in
/// Lucene's TopDocsCollector.
///
/// ```rust
/// use tantivy::collector::TopDocs;
@@ -126,6 +125,7 @@ impl TopDocs {
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
/// index_writer.add_document(doc!(title => "The Diary of Lena Mukhina"));
/// assert!(index_writer.commit().is_ok());
///
/// let reader = index.reader().unwrap();
@@ -133,12 +133,14 @@ impl TopDocs {
///
/// let query_parser = QueryParser::for_index(&index, vec![title]);
/// let query = query_parser.parse_query("diary").unwrap();
/// let top_docs = searcher.search(&query, &TopDocs::with_limit_and_offset(1, 1)).unwrap();
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
///
/// assert_eq!(&top_docs[0], &(0.6099695, DocAddress(0, 3)));
/// assert_eq!(top_docs.len(), 2);
/// assert_eq!(&top_docs[0], &(0.5204813, DocAddress(0, 4)));
/// assert_eq!(&top_docs[1], &(0.4793185, DocAddress(0, 3)));
/// ```
pub fn with_limit_and_offset(limit: usize, offset: usize) -> TopDocs {
TopDocs(TopCollector::with_limit_and_offset(limit, offset))
pub fn and_offset(self, offset: usize) -> TopDocs {
TopDocs(self.0.and_offset(offset))
}
/// Set top-K to rank documents by a given fast field.