mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 00:50:41 +00:00
Added bitse
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
*.swp
|
||||
target
|
||||
target/debug
|
||||
.vscode
|
||||
@@ -8,4 +9,4 @@ benchmark
|
||||
cpp/simdcomp/bitpackingbenchmark
|
||||
*.bk
|
||||
.idea
|
||||
trace.dat
|
||||
trace.dat
|
||||
|
||||
13
.vimrc
Normal file
13
.vimrc
Normal file
@@ -0,0 +1,13 @@
|
||||
set wildignore+=*/examples/*
|
||||
|
||||
set tabstop=2
|
||||
set shiftwidth=2
|
||||
set softtabstop=2
|
||||
set expandtab
|
||||
set nosmarttab
|
||||
|
||||
set textwidth=100
|
||||
|
||||
autocmd BufRead *.rs :setlocal tags=./rusty-tags.vi;/
|
||||
autocmd BufWritePost *.rs :silent! exec "!rusty-tags vi -o --quiet --start-dir=" . expand('%:p:h') . "&" | redraw!
|
||||
|
||||
210
src/common/bitset.rs
Normal file
210
src/common/bitset.rs
Normal file
@@ -0,0 +1,210 @@
|
||||
use DocId;
|
||||
|
||||
pub trait TinySet {
|
||||
fn insert(&mut self, b: u32);
|
||||
fn is_empty(&self) -> bool;
|
||||
fn pop_lowest(&mut self) -> Option<u32>;
|
||||
fn remove(&mut self, b: u32);
|
||||
fn lowest(&mut self) -> Option<u32>;
|
||||
|
||||
/// Update self to represent the
|
||||
/// intersection of its elements and the other
|
||||
/// set given in arguments.
|
||||
fn intersect(&mut self, other: Self);
|
||||
|
||||
/// Returns a `TinySet` than contains all values up
|
||||
/// to limit excluded.
|
||||
///
|
||||
/// The limit is assumed to be strictly lower than 64.
|
||||
fn range_lower(limit: u32) -> u64;
|
||||
|
||||
/// Returns a `TinySet` that contains all values greater
|
||||
/// or equal to the given limit, included. (and up to 63)
|
||||
///
|
||||
/// The limit is assumed to be strictly lower than 64.
|
||||
fn range_greater_or_equal(from_included: u32) -> u64 {
|
||||
assert!(from_included < 64);
|
||||
0 ^ Self::range_lower(from_included)
|
||||
}
|
||||
}
|
||||
|
||||
impl TinySet for u64 {
|
||||
fn range_lower(from_included: u32) -> u64 {
|
||||
assert!(from_included < 64);
|
||||
(1u64 << (from_included as u64)) - 1u64
|
||||
}
|
||||
|
||||
fn intersect(&mut self, filter_mask: u64) {
|
||||
*self &= filter_mask;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert(&mut self, b: u32) {
|
||||
*self |= 1u64 << (b as u64);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn is_empty(&self) -> bool {
|
||||
*self == 0u64
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn pop_lowest(&mut self) -> Option<u32> {
|
||||
if let Some(lowest) = self.lowest() {
|
||||
self.remove(lowest);
|
||||
Some(lowest)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn remove(&mut self, b: u32) {
|
||||
*self ^= 1 << (b as u64);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn lowest(&mut self) -> Option<u32> {
|
||||
if self.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let least_significant_bit = self.trailing_zeros() as u32;
|
||||
Some(least_significant_bit)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocBitSet {
|
||||
tinybitsets: Box<[u64]>,
|
||||
size_hint: usize, //< Technically it should be u32, but we
|
||||
// count multiple inserts.
|
||||
// `usize` guards us from overflow.
|
||||
max_doc: DocId
|
||||
}
|
||||
|
||||
impl DocBitSet {
|
||||
pub fn with_maxdoc(max_doc: DocId) -> DocBitSet {
|
||||
let num_buckets = (max_doc + 63) / 64;
|
||||
DocBitSet {
|
||||
tinybitsets: vec![0u64; num_buckets as usize].into_boxed_slice(),
|
||||
size_hint: 0,
|
||||
max_doc
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size_hint(&self) -> u32 {
|
||||
if self.max_doc as usize > self.size_hint {
|
||||
self.size_hint as u32
|
||||
} else {
|
||||
self.max_doc
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, doc: DocId) {
|
||||
// we do not check saturated els.
|
||||
self.size_hint += 1;
|
||||
let bucket = (doc / 64u32) as usize;
|
||||
self.tinybitsets[bucket].insert(doc % 64u32);
|
||||
}
|
||||
|
||||
pub fn contains(&self, doc: DocId) -> bool {
|
||||
let tiny_bitset = self.tiny_bitset((doc / 64u32) as usize);
|
||||
let lower = doc % 64;
|
||||
let mask = 1u64 << (lower as u64);
|
||||
(tiny_bitset & mask) != 0u64
|
||||
}
|
||||
|
||||
pub fn max_doc(&self) -> DocId {
|
||||
self.max_doc
|
||||
}
|
||||
|
||||
pub fn num_tiny_bitsets(&self) -> usize {
|
||||
self.tinybitsets.len()
|
||||
}
|
||||
|
||||
pub fn tiny_bitset(&self, bucket: usize) -> u64 {
|
||||
self.tinybitsets[bucket]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
use DocId;
|
||||
use super::TinySet;
|
||||
use super::DocBitSet;
|
||||
|
||||
#[test]
|
||||
fn test_tiny_set() {
|
||||
assert!(0u64.is_empty());
|
||||
{
|
||||
let mut u = 0u64;
|
||||
u.insert(1u32);
|
||||
assert_eq!(u.pop_lowest(), Some(1u32));
|
||||
assert!(u.pop_lowest().is_none())
|
||||
}
|
||||
{
|
||||
let mut u = 0u64;
|
||||
u.insert(1u32);
|
||||
u.insert(1u32);
|
||||
assert_eq!(u.pop_lowest(), Some(1u32));
|
||||
assert!(u.pop_lowest().is_none())
|
||||
}
|
||||
{
|
||||
let mut u = 0u64;
|
||||
u.insert(2u32);
|
||||
assert_eq!(u.pop_lowest(), Some(2u32));
|
||||
u.insert(1u32);
|
||||
assert_eq!(u.pop_lowest(), Some(1u32));
|
||||
assert!(u.pop_lowest().is_none());
|
||||
}
|
||||
{
|
||||
let mut u = 0u64;
|
||||
u.insert(63u32);
|
||||
assert_eq!(u.pop_lowest(), Some(63u32));
|
||||
assert!(u.pop_lowest().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_docbitset() {
|
||||
// docs are assumed to be lower than 100.
|
||||
let test_against_hashset = |docs: &[DocId], max_doc: u32| {
|
||||
let mut hashset: HashSet<DocId> = HashSet::new();
|
||||
let mut docbitset = DocBitSet::with_maxdoc(max_doc);
|
||||
for &doc in docs {
|
||||
assert!(doc < max_doc);
|
||||
hashset.insert(doc);
|
||||
docbitset.insert(doc);
|
||||
}
|
||||
for doc in 0..max_doc {
|
||||
assert_eq!(
|
||||
hashset.contains(&doc),
|
||||
docbitset.contains(doc)
|
||||
);
|
||||
}
|
||||
assert_eq!(docbitset.max_doc(), max_doc);
|
||||
};
|
||||
|
||||
test_against_hashset(&[], 0);
|
||||
test_against_hashset(&[], 1);
|
||||
test_against_hashset(&[0u32], 1);
|
||||
test_against_hashset(&[0u32], 100);
|
||||
test_against_hashset(&[1u32, 2u32], 4);
|
||||
test_against_hashset(&[99u32], 100);
|
||||
test_against_hashset(&[63u32], 64);
|
||||
test_against_hashset(&[62u32,63u32], 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docbitset_num_buckets() {
|
||||
assert_eq!(DocBitSet::with_maxdoc(0u32).num_tiny_bitsets(), 0);
|
||||
assert_eq!(DocBitSet::with_maxdoc(1u32).num_tiny_bitsets(), 1);
|
||||
assert_eq!(DocBitSet::with_maxdoc(64u32).num_tiny_bitsets(), 1);
|
||||
assert_eq!(DocBitSet::with_maxdoc(65u32).num_tiny_bitsets(), 2);
|
||||
assert_eq!(DocBitSet::with_maxdoc(128u32).num_tiny_bitsets(), 2);
|
||||
assert_eq!(DocBitSet::with_maxdoc(129u32).num_tiny_bitsets(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ mod vint;
|
||||
mod counting_writer;
|
||||
mod composite_file;
|
||||
pub mod bitpacker;
|
||||
mod bitset;
|
||||
|
||||
pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
|
||||
pub use self::serialize::BinarySerializable;
|
||||
@@ -12,6 +13,7 @@ pub use self::timer::TimerTree;
|
||||
pub use self::timer::OpenTimer;
|
||||
pub use self::vint::VInt;
|
||||
pub use self::counting_writer::CountingWriter;
|
||||
pub use self::bitset::{TinySet, DocBitSet};
|
||||
|
||||
use std::io;
|
||||
|
||||
|
||||
@@ -92,7 +92,7 @@ pub trait DocSet {
|
||||
|
||||
/// Returns a best-effort hint of the
|
||||
/// length of the docset.
|
||||
fn size_hint(&self) -> usize;
|
||||
fn size_hint(&self) -> u32;
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
|
||||
@@ -111,7 +111,7 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
|
||||
unboxed.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
let unboxed: &TDocSet = self.borrow();
|
||||
unboxed.size_hint()
|
||||
}
|
||||
@@ -133,7 +133,7 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
|
||||
unref.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
let unref: &TDocSet = *self;
|
||||
unref.size_hint()
|
||||
}
|
||||
|
||||
@@ -31,7 +31,8 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
|
||||
fn size_hint(&self) -> usize {
|
||||
/// Returns the minimum `.size_hint()` of the intersected docsets.
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.docsets
|
||||
.iter()
|
||||
.map(|docset| docset.size_hint())
|
||||
|
||||
@@ -235,8 +235,8 @@ impl DocSet for SegmentPostings {
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.len() as u32
|
||||
}
|
||||
|
||||
/// Return the current document's `DocId`.
|
||||
|
||||
@@ -35,8 +35,8 @@ impl DocSet for VecPostings {
|
||||
self.doc_ids[self.cursor.0]
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.len()
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.len() as u32
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,8 +62,8 @@ impl DocSet for AllScorer {
|
||||
self.doc
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
self.max_doc as usize
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.max_doc
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
199
src/query/bitset/mod.rs
Normal file
199
src/query/bitset/mod.rs
Normal file
@@ -0,0 +1,199 @@
|
||||
use common::{DocBitSet, TinySet};
|
||||
use DocId;
|
||||
use postings::DocSet;
|
||||
use postings::SkipResult;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
/// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`.
|
||||
///
|
||||
/// # Implementation detail
|
||||
///
|
||||
/// Skipping is relatively fast here as we can directly point to the
|
||||
/// right tiny bitset bucket.
|
||||
///
|
||||
/// TODO: Consider implementing a `BitTreeSet` in order to advance faster
|
||||
/// when the bitset is sparse
|
||||
pub struct BitSetDocSet {
|
||||
docs: DocBitSet,
|
||||
cursor_bucket: usize, //< index associated to the current tiny bitset
|
||||
cursor_tinybitset: u64,
|
||||
doc: u32
|
||||
}
|
||||
|
||||
impl From<DocBitSet> for BitSetDocSet {
|
||||
fn from(docs: DocBitSet) -> BitSetDocSet {
|
||||
let first_tiny_bitset =
|
||||
if docs.num_tiny_bitsets() == 0 {
|
||||
0u64
|
||||
} else {
|
||||
docs.tiny_bitset(0) as u64
|
||||
};
|
||||
BitSetDocSet {
|
||||
docs,
|
||||
cursor_bucket: 0,
|
||||
cursor_tinybitset: first_tiny_bitset,
|
||||
doc: 0u32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DocSet for BitSetDocSet {
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
if let Some(lower) = self.cursor_tinybitset.pop_lowest() {
|
||||
self.doc = (self.cursor_bucket as u32 * 64u32) | lower;
|
||||
return true;
|
||||
} else {
|
||||
if self.cursor_bucket < self.docs.num_tiny_bitsets() - 1 {
|
||||
self.cursor_bucket += 1;
|
||||
self.cursor_tinybitset = self.docs.tiny_bitset(self.cursor_bucket);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
// skip is required to advance.
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
let target_bucket = (target / 64u32) as usize;
|
||||
|
||||
// Mask for all of the bits greater or equal
|
||||
// to our target document.
|
||||
match target_bucket.cmp(&self.cursor_bucket) {
|
||||
Ordering::Less => {
|
||||
self.cursor_bucket = target_bucket;
|
||||
self.cursor_tinybitset = self.docs.tiny_bitset(target_bucket);
|
||||
let greater: u64 = <u64 as TinySet>::range_greater_or_equal(target % 64);
|
||||
self.cursor_tinybitset.intersect(greater);
|
||||
if !self.advance() {
|
||||
SkipResult::End
|
||||
} else {
|
||||
if self.doc() == target {
|
||||
SkipResult::Reached
|
||||
} else {
|
||||
SkipResult::OverStep
|
||||
}
|
||||
}
|
||||
}
|
||||
Ordering::Equal => {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
}
|
||||
Ordering::Equal => {
|
||||
return SkipResult::Reached;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ordering::Greater => SkipResult::OverStep
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the current document
|
||||
fn doc(&self) -> DocId {
|
||||
self.doc
|
||||
}
|
||||
|
||||
/// Advances the cursor to the next document
|
||||
/// None is returned if the iterator has `DocSet`
|
||||
/// has already been entirely consumed.
|
||||
fn next(&mut self) -> Option<DocId> {
|
||||
if self.advance() {
|
||||
Some(self.doc())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns half of the `max_doc`
|
||||
/// This is quite a terrible heuristic,
|
||||
/// but we don't have access to any better
|
||||
/// value.
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.docs.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use DocId;
|
||||
use common::DocBitSet;
|
||||
use postings::{SkipResult, DocSet};
|
||||
use super::BitSetDocSet;
|
||||
|
||||
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
|
||||
let mut docset = DocBitSet::with_maxdoc(max_doc);
|
||||
for &doc in docs {
|
||||
docset.insert(doc);
|
||||
}
|
||||
BitSetDocSet::from(docset)
|
||||
}
|
||||
|
||||
fn test_go_through_sequential(docs: &[DocId]) {
|
||||
let mut docset = create_docbitset(docs, 1_000u32);
|
||||
for &doc in docs {
|
||||
assert!(docset.advance());
|
||||
assert_eq!(doc, docset.doc());
|
||||
}
|
||||
assert!(!docset.advance());
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docbitset_sequential() {
|
||||
test_go_through_sequential(&[]);
|
||||
test_go_through_sequential(&[1,2,3]);
|
||||
test_go_through_sequential(&[1,2,3,4,5,63,64,65]);
|
||||
test_go_through_sequential(&[63,64,65]);
|
||||
test_go_through_sequential(&[1,2,3,4,95,96,97,98,99]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docbitset_skip() {
|
||||
{
|
||||
let mut docset = create_docbitset(&[1, 5, 6, 7, 5112], 10_000);
|
||||
assert_eq!(docset.skip_next(7), SkipResult::Reached);
|
||||
assert_eq!(docset.doc(), 7);
|
||||
assert!(docset.advance(), 7);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[1, 5, 6, 7, 5112], 10_000);
|
||||
assert_eq!(docset.skip_next(3), SkipResult::OverStep);
|
||||
assert_eq!(docset.doc(), 5);
|
||||
assert!(docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[5112], 10_000);
|
||||
assert_eq!(docset.skip_next(5112), SkipResult::Reached);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[5112], 10_000);
|
||||
assert_eq!(docset.skip_next(5113), SkipResult::End);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[5112], 10_000);
|
||||
assert_eq!(docset.skip_next(5111), SkipResult::OverStep);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -8,7 +8,6 @@ use schema::Term;
|
||||
use query::TermQuery;
|
||||
use schema::IndexRecordOption;
|
||||
use query::Occur;
|
||||
use query::OccurFilter;
|
||||
|
||||
/// The boolean query combines a set of queries
|
||||
///
|
||||
@@ -39,14 +38,11 @@ impl Query for BooleanQuery {
|
||||
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
|
||||
let sub_weights = self.subqueries
|
||||
.iter()
|
||||
.map(|&(ref _occur, ref subquery)| subquery.weight(searcher))
|
||||
.map(|&(ref occur, ref subquery)| {
|
||||
Ok((*occur, subquery.weight(searcher)?))
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
let occurs: Vec<Occur> = self.subqueries
|
||||
.iter()
|
||||
.map(|&(ref occur, ref _subquery)| *occur)
|
||||
.collect();
|
||||
let filter = OccurFilter::new(&occurs);
|
||||
Ok(box BooleanWeight::new(sub_weights, filter))
|
||||
Ok(box BooleanWeight::new(sub_weights))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
|
||||
}
|
||||
|
||||
impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
// TODO fix this. it should be the min
|
||||
// of the MUST scorer
|
||||
// and the max of the SHOULD scorers.
|
||||
|
||||
@@ -1,31 +1,49 @@
|
||||
use query::Weight;
|
||||
use core::SegmentReader;
|
||||
use query::EmptyScorer;
|
||||
use query::Scorer;
|
||||
use super::BooleanScorer;
|
||||
use query::OccurFilter;
|
||||
use query::Occur;
|
||||
use Result;
|
||||
|
||||
pub struct BooleanWeight {
|
||||
weights: Vec<Box<Weight>>,
|
||||
occur_filter: OccurFilter,
|
||||
weights: Vec<(Occur, Box<Weight>)>,
|
||||
}
|
||||
|
||||
impl BooleanWeight {
|
||||
pub fn new(weights: Vec<Box<Weight>>, occur_filter: OccurFilter) -> BooleanWeight {
|
||||
pub fn new(weights: Vec<(Occur, Box<Weight>)>) -> BooleanWeight {
|
||||
BooleanWeight {
|
||||
weights,
|
||||
occur_filter,
|
||||
weights
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Weight for BooleanWeight {
|
||||
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
let sub_scorers: Vec<Box<Scorer + 'a>> = self.weights
|
||||
.iter()
|
||||
.map(|weight| weight.scorer(reader))
|
||||
.collect::<Result<_>>()?;
|
||||
let boolean_scorer = BooleanScorer::new(sub_scorers, self.occur_filter);
|
||||
Ok(box boolean_scorer)
|
||||
}
|
||||
if self.weights.is_empty() {
|
||||
Ok(box EmptyScorer)
|
||||
} else if self.weights.len() == 1 {
|
||||
let &(occur, ref weight) = &self.weights[0];
|
||||
if occur == Occur::MustNot {
|
||||
Ok(box EmptyScorer)
|
||||
} else {
|
||||
weight.scorer(reader)
|
||||
}
|
||||
} else {
|
||||
let sub_scorers: Vec<Box<Scorer + 'a>> = self.weights
|
||||
.iter()
|
||||
.map(|&(_, ref weight)| weight)
|
||||
.map(|weight| weight.scorer(reader))
|
||||
.collect::<Result<_>>()?;
|
||||
let occurs: Vec<Occur> = self.weights
|
||||
.iter()
|
||||
.map(|&(ref occur, _)| *occur)
|
||||
.collect();
|
||||
let occur_filter = OccurFilter::new(&occurs);
|
||||
let boolean_scorer = BooleanScorer::new(sub_scorers, occur_filter);
|
||||
Ok(box boolean_scorer)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,9 @@ mod term_query;
|
||||
mod query_parser;
|
||||
mod phrase_query;
|
||||
mod all_query;
|
||||
mod bitset;
|
||||
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::occur_filter::OccurFilter;
|
||||
pub use self::occur::Occur;
|
||||
@@ -24,4 +26,4 @@ pub use self::scorer::EmptyScorer;
|
||||
pub use self::scorer::Scorer;
|
||||
pub use self::term_query::TermQuery;
|
||||
pub use self::weight::Weight;
|
||||
pub use self::all_query::{AllQuery, AllWeight, AllScorer};
|
||||
pub use self::all_query::{AllQuery, AllWeight, AllScorer};
|
||||
|
||||
@@ -35,7 +35,7 @@ impl DocSet for PostingsWithOffset {
|
||||
self.segment_postings.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.segment_postings.size_hint()
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@ impl DocSet for PhraseScorer {
|
||||
self.intersection_docset.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.intersection_docset.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ impl DocSet for EmptyScorer {
|
||||
DocId::max_value()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ where
|
||||
self.postings.doc()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> usize {
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.postings.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user