From dd20454cc7ac825d7d77ece62c641003ce65ae52 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 17 May 2020 16:09:04 +0900 Subject: [PATCH] First stab at blockwand --- src/query/boolean_query/boolean_weight.rs | 4 +- src/query/mod.rs | 2 +- src/query/union.rs | 74 ++++++++++++++++++++--- 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 8b6a6c881..f62cc911c 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -7,7 +7,7 @@ use crate::query::Exclude; use crate::query::Occur; use crate::query::RequiredOptionalScorer; use crate::query::Scorer; -use crate::query::Union; +use crate::query::{Union, TermUnion}; use crate::query::Weight; use crate::query::{intersect_scorers, Explanation}; use crate::DocId; @@ -30,7 +30,7 @@ where .map(|scorer| *(scorer.downcast::().map_err(|_| ()).unwrap())) .collect(); let scorer: Box = - Box::new(Union::::from(scorers)); + Box::new(TermUnion::::from(scorers)); return scorer; } } diff --git a/src/query/mod.rs b/src/query/mod.rs index d07e46cb6..b5fa68d64 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -27,7 +27,7 @@ mod vec_docset; pub(crate) mod score_combiner; pub use self::intersection::Intersection; -pub use self::union::Union; +pub use self::union::{Union, TermUnion}; #[cfg(test)] pub use self::vec_docset::VecDocSet; diff --git a/src/query/union.rs b/src/query/union.rs index ddaa1ba3d..3e07caded 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -4,6 +4,8 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::Scorer; use crate::DocId; use crate::Score; +use crate::query::term_query::TermScorer; +use crate::fastfield::DeleteBitSet; const HORIZON_NUM_TINYBITSETS: usize = 64; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; @@ -38,6 +40,8 @@ pub struct Union { score: Score, } + + impl From> for Union where TScoreCombiner: ScoreCombiner, @@ -198,6 +202,14 @@ where // TODO Also implement `count` with deletes efficiently. + fn doc(&self) -> DocId { + self.doc + } + + fn size_hint(&self) -> u32 { + self.docsets.iter().map(|docset| docset.size_hint()).max().unwrap_or(0u32) + } + fn count_including_deleted(&mut self) -> u32 { if self.doc == TERMINATED { return 0; @@ -219,16 +231,9 @@ where self.cursor = HORIZON_NUM_TINYBITSETS; count } - - fn doc(&self) -> DocId { - self.doc - } - - fn size_hint(&self) -> u32 { - self.docsets.iter().map(|docset| docset.size_hint()).max().unwrap_or(0u32) - } } + impl Scorer for Union where TScoreCombiner: ScoreCombiner, @@ -239,6 +244,59 @@ where } } +pub struct TermUnion { + underlying: Union +} + +impl From> for TermUnion { + fn from(scorers: Vec) -> Self { + TermUnion { + underlying: Union::from(scorers) + } + } +} + +impl DocSet for TermUnion { + fn advance(&mut self) -> u32 { + self.underlying.advance() + } + + fn seek(&mut self, target: u32) -> u32 { + self.underlying.seek(target) + } + + fn fill_buffer(&mut self, buffer: &mut [u32]) -> usize { + self.underlying.fill_buffer(buffer) + } + + fn doc(&self) -> u32 { + self.underlying.doc() + } + + fn size_hint(&self) -> u32 { + self.underlying.size_hint() + } + + fn count(&mut self, delete_bitset: &DeleteBitSet) -> u32 { + self.underlying.count(delete_bitset) + } + + fn count_including_deleted(&mut self) -> u32 { + self.underlying.count_including_deleted() + } +} + +impl Scorer for TermUnion { + fn score(&mut self) -> f32 { + self.underlying.score() + } + + fn for_each_pruning(&mut self, mut threshold: f32, callback: &mut dyn FnMut(u32, f32) -> f32) { + unimplemented!() + } +} + + #[cfg(test)] mod tests {