mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-08 18:12:55 +00:00
issue/50 Removed ScoredDoc
This commit is contained in:
@@ -2,7 +2,8 @@ use collector::Collector;
|
||||
use SegmentLocalId;
|
||||
use SegmentReader;
|
||||
use std::io;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
|
||||
/// Collector that does nothing.
|
||||
@@ -15,7 +16,7 @@ impl Collector for DoNothingCollector {
|
||||
Ok(())
|
||||
}
|
||||
#[inline]
|
||||
fn collect(&mut self, _: ScoredDoc) {}
|
||||
fn collect(&mut self, _doc: DocId, _score: Score) {}
|
||||
}
|
||||
|
||||
/// Zero-cost abstraction used to collect on multiple collectors.
|
||||
@@ -43,9 +44,9 @@ impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Rig
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
self.left.collect(scored_doc);
|
||||
self.right.collect(scored_doc);
|
||||
fn collect(&mut self, doc: DocId, score: Score) {
|
||||
self.left.collect(doc, score);
|
||||
self.right.collect(doc, score);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,7 +63,6 @@ pub fn chain() -> ChainedCollector<DoNothingCollector, DoNothingCollector> {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use ScoredDoc;
|
||||
use collector::{Collector, CountCollector, TopCollector};
|
||||
|
||||
#[test]
|
||||
@@ -73,9 +73,9 @@ mod tests {
|
||||
let mut collectors = chain()
|
||||
.push(&mut top_collector)
|
||||
.push(&mut count_collector);
|
||||
collectors.collect(ScoredDoc(0.2, 1));
|
||||
collectors.collect(ScoredDoc(0.1, 2));
|
||||
collectors.collect(ScoredDoc(0.5, 3));
|
||||
collectors.collect(1, 0.2);
|
||||
collectors.collect(2, 0.1);
|
||||
collectors.collect(3, 0.5);
|
||||
}
|
||||
assert_eq!(count_collector.count(), 3);
|
||||
assert!(top_collector.at_capacity());
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
|
||||
@@ -31,7 +32,7 @@ impl Collector for CountCollector {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, _: ScoredDoc) {
|
||||
fn collect(&mut self, _: DocId, _: Score) {
|
||||
self.count += 1;
|
||||
}
|
||||
}
|
||||
@@ -41,16 +42,14 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use ScoredDoc;
|
||||
use collector::Collector;
|
||||
|
||||
#[bench]
|
||||
fn build_collector(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
let mut count_collector = CountCollector::default();
|
||||
let docs: Vec<u32> = (0..1_000_000).collect();
|
||||
for doc in docs {
|
||||
count_collector.collect(ScoredDoc(1f32, doc));
|
||||
for doc in 0..1_000_000 {
|
||||
count_collector.collect(doc, 1f32);
|
||||
}
|
||||
count_collector.count()
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
use std::io;
|
||||
|
||||
mod count_collector;
|
||||
@@ -49,7 +50,7 @@ pub trait Collector {
|
||||
/// on this segment.
|
||||
fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()>;
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
fn collect(&mut self, scored_doc: ScoredDoc);
|
||||
fn collect(&mut self, doc: DocId, score: Score);
|
||||
}
|
||||
|
||||
|
||||
@@ -58,8 +59,8 @@ impl<'a, C: Collector> Collector for &'a mut C {
|
||||
(*self).set_segment(segment_local_id, segment)
|
||||
}
|
||||
/// The query pushes the scored document to the collector via this method.
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
(*self).collect(scored_doc);
|
||||
fn collect(&mut self, doc: DocId, score: Score) {
|
||||
(*self).collect(doc, score);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,8 +70,8 @@ pub mod tests {
|
||||
|
||||
use super::*;
|
||||
use test::Bencher;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
use core::SegmentReader;
|
||||
use std::io;
|
||||
use SegmentLocalId;
|
||||
@@ -112,8 +113,8 @@ pub mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
self.docs.push(scored_doc.doc() + self.offset);
|
||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||
self.docs.push(doc + self.offset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,8 +151,8 @@ pub mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
let val = self.ff_reader.as_ref().unwrap().get(scored_doc.doc());
|
||||
fn collect(&mut self, doc: DocId, _score: Score) {
|
||||
let val = self.ff_reader.as_ref().unwrap().get(doc);
|
||||
self.vals.push(val);
|
||||
}
|
||||
}
|
||||
@@ -163,7 +164,7 @@ pub mod tests {
|
||||
let mut count_collector = CountCollector::default();
|
||||
let docs: Vec<u32> = (0..1_000_000).collect();
|
||||
for doc in docs {
|
||||
count_collector.collect(ScoredDoc(1f32, doc));
|
||||
count_collector.collect(doc, 1f32);
|
||||
}
|
||||
count_collector.count()
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
|
||||
@@ -31,9 +32,9 @@ impl<'a> Collector for MultiCollector<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
fn collect(&mut self, doc: DocId, score: Score) {
|
||||
for collector in &mut self.collectors {
|
||||
collector.collect(scored_doc);
|
||||
collector.collect(doc, score);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -44,7 +45,6 @@ impl<'a> Collector for MultiCollector<'a> {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use ScoredDoc;
|
||||
use collector::{Collector, CountCollector, TopCollector};
|
||||
|
||||
#[test]
|
||||
@@ -53,9 +53,9 @@ mod tests {
|
||||
let mut count_collector = CountCollector::default();
|
||||
{
|
||||
let mut collectors = MultiCollector::from(vec!(&mut top_collector, &mut count_collector));
|
||||
collectors.collect(ScoredDoc(0.2, 1));
|
||||
collectors.collect(ScoredDoc(0.1, 2));
|
||||
collectors.collect(ScoredDoc(0.5, 3));
|
||||
collectors.collect(1, 0.2);
|
||||
collectors.collect(2, 0.1);
|
||||
collectors.collect(3, 0.5);
|
||||
}
|
||||
assert_eq!(count_collector.count(), 3);
|
||||
assert!(top_collector.at_capacity());
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::io;
|
||||
use super::Collector;
|
||||
use ScoredDoc;
|
||||
use SegmentReader;
|
||||
use SegmentLocalId;
|
||||
use DocAddress;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
use Score;
|
||||
|
||||
// Rust heap is a max-heap and we need a min heap.
|
||||
@@ -13,6 +13,7 @@ use Score;
|
||||
struct GlobalScoredDoc {
|
||||
score: Score,
|
||||
doc_address: DocAddress
|
||||
|
||||
}
|
||||
|
||||
impl PartialOrd for GlobalScoredDoc {
|
||||
@@ -109,20 +110,20 @@ impl Collector for TopCollector {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect(&mut self, scored_doc: ScoredDoc) {
|
||||
fn collect(&mut self, doc: DocId, score: Score) {
|
||||
if self.at_capacity() {
|
||||
// It's ok to unwrap as long as a limit of 0 is forbidden.
|
||||
let limit_doc: GlobalScoredDoc = *self.heap.peek().expect("Top collector with size 0 is forbidden");
|
||||
if limit_doc.score < scored_doc.score() {
|
||||
if limit_doc.score < score {
|
||||
let mut mut_head = self.heap.peek_mut().expect("Top collector with size 0 is forbidden");
|
||||
mut_head.score = scored_doc.score();
|
||||
mut_head.doc_address = DocAddress(self.segment_id, scored_doc.doc());
|
||||
mut_head.score = score;
|
||||
mut_head.doc_address = DocAddress(self.segment_id, doc);
|
||||
}
|
||||
}
|
||||
else {
|
||||
let wrapped_doc = GlobalScoredDoc {
|
||||
score: scored_doc.score(),
|
||||
doc_address: DocAddress(self.segment_id, scored_doc.doc())
|
||||
score: score,
|
||||
doc_address: DocAddress(self.segment_id, doc)
|
||||
};
|
||||
self.heap.push(wrapped_doc);
|
||||
}
|
||||
@@ -135,7 +136,6 @@ impl Collector for TopCollector {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use ScoredDoc;
|
||||
use DocId;
|
||||
use Score;
|
||||
use collector::Collector;
|
||||
@@ -143,9 +143,9 @@ mod tests {
|
||||
#[test]
|
||||
fn test_top_collector_not_at_capacity() {
|
||||
let mut top_collector = TopCollector::with_limit(4);
|
||||
top_collector.collect(ScoredDoc(0.8, 1));
|
||||
top_collector.collect(ScoredDoc(0.2, 3));
|
||||
top_collector.collect(ScoredDoc(0.3, 5));
|
||||
top_collector.collect(1, 0.8);
|
||||
top_collector.collect(3, 0.2);
|
||||
top_collector.collect(5, 0.3);
|
||||
assert!(!top_collector.at_capacity());
|
||||
let score_docs: Vec<(Score, DocId)> = top_collector.score_docs()
|
||||
.into_iter()
|
||||
@@ -159,11 +159,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_top_collector_at_capacity() {
|
||||
let mut top_collector = TopCollector::with_limit(4);
|
||||
top_collector.collect(ScoredDoc(0.8, 1));
|
||||
top_collector.collect(ScoredDoc(0.2, 3));
|
||||
top_collector.collect(ScoredDoc(0.3, 5));
|
||||
top_collector.collect(ScoredDoc(0.9, 7));
|
||||
top_collector.collect(ScoredDoc(-0.2, 9));
|
||||
top_collector.collect(1, 0.8);
|
||||
top_collector.collect(3, 0.2);
|
||||
top_collector.collect(5, 0.3);
|
||||
top_collector.collect(7, 0.9);
|
||||
top_collector.collect(9, -0.2);
|
||||
assert!(top_collector.at_capacity());
|
||||
{
|
||||
let score_docs: Vec<(Score, DocId)> = top_collector
|
||||
|
||||
16
src/lib.rs
16
src/lib.rs
@@ -139,22 +139,6 @@ impl DocAddress {
|
||||
}
|
||||
}
|
||||
|
||||
/// A scored doc is simply a couple `(score, doc_id)`
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct ScoredDoc(Score, DocId);
|
||||
|
||||
impl ScoredDoc {
|
||||
|
||||
/// Returns the score
|
||||
pub fn score(&self,) -> Score {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Returns the doc
|
||||
pub fn doc(&self,) -> DocId {
|
||||
self.1
|
||||
}
|
||||
}
|
||||
|
||||
/// `DocAddress` contains all the necessary information
|
||||
/// to identify a document given a `Searcher` object.
|
||||
|
||||
@@ -107,8 +107,7 @@ impl From<Vec<Term>> for MultiTermQuery {
|
||||
}
|
||||
|
||||
impl Query for MultiTermQuery {
|
||||
|
||||
|
||||
|
||||
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
|
||||
let similitude = self.similitude(searcher);
|
||||
Ok(
|
||||
|
||||
@@ -3,7 +3,6 @@ use collector::Collector;
|
||||
use core::searcher::Searcher;
|
||||
use common::TimerTree;
|
||||
use SegmentLocalId;
|
||||
use ScoredDoc;
|
||||
use super::Weight;
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use DocSet;
|
||||
use ScoredDoc;
|
||||
use collector::Collector;
|
||||
|
||||
/// Scored `DocSet`
|
||||
@@ -12,10 +11,7 @@ pub trait Scorer: DocSet {
|
||||
|
||||
fn collect(&mut self, collector: &mut Collector) {
|
||||
while self.advance() {
|
||||
let scored_doc = ScoredDoc(self.score(), self.doc());
|
||||
collector.collect(scored_doc);
|
||||
collector.collect(self.doc(), self.score());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user