mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-25 20:50:43 +00:00
Backmerging master
This commit is contained in:
@@ -9,7 +9,6 @@ use DocId;
|
||||
use std::any::Any;
|
||||
use core::Searcher;
|
||||
|
||||
|
||||
/// Query that matches all of the documents.
|
||||
///
|
||||
/// All of the document get the score 1f32.
|
||||
@@ -34,12 +33,11 @@ impl Weight for AllWeight {
|
||||
Ok(box AllScorer {
|
||||
started: false,
|
||||
doc: 0u32,
|
||||
max_doc: reader.max_doc()
|
||||
max_doc: reader.max_doc(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Scorer associated to the `AllQuery` query.
|
||||
pub struct AllScorer {
|
||||
started: bool,
|
||||
@@ -51,8 +49,7 @@ impl DocSet for AllScorer {
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.started {
|
||||
self.doc += 1u32;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
self.started = true;
|
||||
}
|
||||
self.doc < self.max_doc
|
||||
@@ -71,4 +68,4 @@ impl Scorer for AllScorer {
|
||||
fn score(&self) -> Score {
|
||||
1f32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use common::{DocBitSet, TinySet};
|
||||
use common::{BitSet, TinySet};
|
||||
use DocId;
|
||||
use postings::DocSet;
|
||||
use postings::SkipResult;
|
||||
@@ -8,107 +8,100 @@ use std::cmp::Ordering;
|
||||
///
|
||||
/// # Implementation detail
|
||||
///
|
||||
/// Skipping is relatively fast here as we can directly point to the
|
||||
/// Skipping is relatively fast here as we can directly point to the
|
||||
/// right tiny bitset bucket.
|
||||
///
|
||||
/// TODO: Consider implementing a `BitTreeSet` in order to advance faster
|
||||
/// TODO: Consider implementing a `BitTreeSet` in order to advance faster
|
||||
/// when the bitset is sparse
|
||||
pub struct BitSetDocSet {
|
||||
docs: DocBitSet,
|
||||
cursor_bucket: usize, //< index associated to the current tiny bitset
|
||||
cursor_tinybitset: u64,
|
||||
doc: u32
|
||||
docs: BitSet,
|
||||
cursor_bucket: u32, //< index associated to the current tiny bitset
|
||||
cursor_tinybitset: TinySet,
|
||||
doc: u32,
|
||||
}
|
||||
|
||||
impl From<DocBitSet> for BitSetDocSet {
|
||||
fn from(docs: DocBitSet) -> BitSetDocSet {
|
||||
let first_tiny_bitset =
|
||||
if docs.num_tiny_bitsets() == 0 {
|
||||
0u64
|
||||
} else {
|
||||
docs.tiny_bitset(0) as u64
|
||||
};
|
||||
impl BitSetDocSet {
|
||||
fn go_to_bucket(&mut self, bucket_addr: u32) {
|
||||
self.cursor_bucket = bucket_addr;
|
||||
self.cursor_tinybitset = self.docs.tinyset(bucket_addr);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BitSet> for BitSetDocSet {
|
||||
fn from(docs: BitSet) -> BitSetDocSet {
|
||||
let first_tiny_bitset = if docs.max_value() == 0 {
|
||||
TinySet::empty()
|
||||
} else {
|
||||
docs.tinyset(0)
|
||||
};
|
||||
BitSetDocSet {
|
||||
docs,
|
||||
cursor_bucket: 0,
|
||||
cursor_tinybitset: first_tiny_bitset,
|
||||
doc: 0u32
|
||||
doc: 0u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DocSet for BitSetDocSet {
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
if let Some(lower) = self.cursor_tinybitset.pop_lowest() {
|
||||
self.doc = (self.cursor_bucket as u32 * 64u32) | lower;
|
||||
return true;
|
||||
} else {
|
||||
if self.cursor_bucket < self.docs.num_tiny_bitsets() - 1 {
|
||||
self.cursor_bucket += 1;
|
||||
self.cursor_tinybitset = self.docs.tiny_bitset(self.cursor_bucket);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(lower) = self.cursor_tinybitset.pop_lowest() {
|
||||
self.doc = (self.cursor_bucket as u32 * 64u32) | lower;
|
||||
return true;
|
||||
}
|
||||
if let Some(cursor_bucket) = self.docs.first_non_empty_bucket(self.cursor_bucket + 1) {
|
||||
self.go_to_bucket(cursor_bucket);
|
||||
let lower = self.cursor_tinybitset.pop_lowest().unwrap();
|
||||
self.doc = (cursor_bucket * 64u32) | lower;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
// skip is required to advance.
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
let target_bucket = (target / 64u32) as usize;
|
||||
|
||||
let target_bucket = target / 64u32;
|
||||
|
||||
// Mask for all of the bits greater or equal
|
||||
// to our target document.
|
||||
match target_bucket.cmp(&self.cursor_bucket) {
|
||||
Ordering::Greater => {
|
||||
self.cursor_bucket = target_bucket;
|
||||
self.cursor_tinybitset = self.docs.tiny_bitset(target_bucket);
|
||||
// let greater: u64 = <u64 as TinySet>::range_greater_or_equal(target % 64);
|
||||
// self.cursor_tinybitset.intersect(greater);
|
||||
loop {
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
self.go_to_bucket(target_bucket);
|
||||
let greater_filter: TinySet = TinySet::range_greater_or_equal(target);
|
||||
self.cursor_tinybitset = self.cursor_tinybitset.intersect(greater_filter);
|
||||
if !self.advance() {
|
||||
SkipResult::End
|
||||
} else {
|
||||
if self.doc() == target {
|
||||
SkipResult::Reached
|
||||
} else {
|
||||
if self.doc() == target {
|
||||
return SkipResult::Reached;
|
||||
} else {
|
||||
// assert!(self.doc() > target);
|
||||
if self.doc() > target {
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
|
||||
}
|
||||
debug_assert!(self.doc() > target);
|
||||
SkipResult::OverStep
|
||||
}
|
||||
}
|
||||
}
|
||||
Ordering::Equal => {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
}
|
||||
Ordering::Equal => {
|
||||
assert!(self.doc() == target);
|
||||
return SkipResult::Reached;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
assert!(self.doc() > target);
|
||||
return SkipResult::OverStep;
|
||||
Ordering::Equal => loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.advance() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
}
|
||||
Ordering::Equal => {
|
||||
return SkipResult::Reached;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
debug_assert!(self.doc() > target);
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
Ordering::Less => {
|
||||
assert!(self.doc() > target);
|
||||
debug_assert!(self.doc() > target);
|
||||
SkipResult::OverStep
|
||||
}
|
||||
}
|
||||
@@ -135,19 +128,20 @@ impl DocSet for BitSetDocSet {
|
||||
/// but we don't have access to any better
|
||||
/// value.
|
||||
fn size_hint(&self) -> u32 {
|
||||
self.docs.size_hint()
|
||||
self.docs.len() as u32
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use DocId;
|
||||
use common::DocBitSet;
|
||||
use postings::{SkipResult, DocSet};
|
||||
use common::BitSet;
|
||||
use postings::{DocSet, SkipResult};
|
||||
use super::BitSetDocSet;
|
||||
extern crate test;
|
||||
|
||||
fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet {
|
||||
let mut docset = DocBitSet::with_maxdoc(max_doc);
|
||||
let mut docset = BitSet::with_max_value(max_doc);
|
||||
for &doc in docs {
|
||||
docset.insert(doc);
|
||||
}
|
||||
@@ -167,10 +161,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_docbitset_sequential() {
|
||||
test_go_through_sequential(&[]);
|
||||
test_go_through_sequential(&[1,2,3]);
|
||||
test_go_through_sequential(&[1,2,3,4,5,63,64,65]);
|
||||
test_go_through_sequential(&[63,64,65]);
|
||||
test_go_through_sequential(&[1,2,3,4,95,96,97,98,99]);
|
||||
test_go_through_sequential(&[1, 2, 3]);
|
||||
test_go_through_sequential(&[1, 2, 3, 4, 5, 63, 64, 65]);
|
||||
test_go_through_sequential(&[63, 64, 65]);
|
||||
test_go_through_sequential(&[1, 2, 3, 4, 95, 96, 97, 98, 99]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -206,6 +200,73 @@ mod tests {
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[1, 5, 6, 7, 5112, 5500, 6666], 10_000);
|
||||
assert_eq!(docset.skip_next(5112), SkipResult::Reached);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 5500);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 6666);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[1, 5, 6, 7, 5112, 5500, 6666], 10_000);
|
||||
assert_eq!(docset.skip_next(5111), SkipResult::OverStep);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 5500);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 6666);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
{
|
||||
let mut docset = create_docbitset(&[1, 5, 6, 7, 5112, 5513, 6666], 10_000);
|
||||
assert_eq!(docset.skip_next(5111), SkipResult::OverStep);
|
||||
assert_eq!(docset.doc(), 5112);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 5513);
|
||||
assert!(docset.advance());
|
||||
assert_eq!(docset.doc(), 6666);
|
||||
assert!(!docset.advance());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_insert(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
b.iter(|| {
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els.iter().cloned() {
|
||||
bitset.insert(el);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_clone(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| bitset.clone());
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_bitset_1pct_clone_iterate(b: &mut test::Bencher) {
|
||||
use tests;
|
||||
use DocSet;
|
||||
let els = tests::generate_nonunique_unsorted(1_000_000u32, 10_000);
|
||||
let mut bitset = BitSet::with_max_value(1_000_000);
|
||||
for el in els {
|
||||
bitset.insert(el);
|
||||
}
|
||||
b.iter(|| {
|
||||
let mut docset = BitSetDocSet::from(bitset.clone());
|
||||
while docset.advance() {}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,13 @@ impl Query for BooleanQuery {
|
||||
self
|
||||
}
|
||||
|
||||
fn disable_scoring(&mut self) {
|
||||
self.scoring_disabled = true;
|
||||
for &mut (_, ref mut subquery) in &mut self.subqueries {
|
||||
subquery.disable_scoring();
|
||||
}
|
||||
}
|
||||
|
||||
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
|
||||
let sub_weights = self.subqueries
|
||||
.iter()
|
||||
@@ -48,13 +55,6 @@ impl Query for BooleanQuery {
|
||||
.collect::<Result<_>>()?;
|
||||
Ok(box BooleanWeight::new(sub_weights, self.scoring_disabled))
|
||||
}
|
||||
|
||||
fn disable_scoring(&mut self) {
|
||||
self.scoring_disabled = true;
|
||||
for &mut (_, ref mut subquery) in &mut self.subqueries {
|
||||
subquery.disable_scoring();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BooleanQuery {
|
||||
|
||||
@@ -24,13 +24,14 @@ impl BooleanWeight {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Weight for BooleanWeight {
|
||||
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
if self.weights.is_empty() {
|
||||
Ok(box EmptyScorer)
|
||||
} else if self.weights.len() == 1 {
|
||||
let &(occur, ref weight) = &self.weights[0];
|
||||
if occur == Occur::MustNot {
|
||||
let &(occur, ref weight) = &self.weights[0];
|
||||
if occur == Occur::MustNot {
|
||||
Ok(box EmptyScorer)
|
||||
} else {
|
||||
weight.scorer(reader)
|
||||
|
||||
@@ -15,7 +15,6 @@ mod all_query;
|
||||
mod bitset;
|
||||
mod range_query;
|
||||
|
||||
|
||||
pub use self::bitset::BitSetDocSet;
|
||||
pub use self::boolean_query::BooleanQuery;
|
||||
pub use self::occur_filter::OccurFilter;
|
||||
@@ -28,7 +27,7 @@ pub use self::scorer::EmptyScorer;
|
||||
pub use self::scorer::Scorer;
|
||||
pub use self::term_query::TermQuery;
|
||||
pub use self::weight::Weight;
|
||||
pub use self::all_query::{AllQuery, AllWeight, AllScorer};
|
||||
pub use self::range_query::{RangeQuery,RangeDefinition, RangeWeight};
|
||||
pub use self::all_query::{AllQuery, AllScorer, AllWeight};
|
||||
pub use self::range_query::RangeQuery;
|
||||
pub use self::scorer::ConstScorer;
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::phrase_weight::PhraseWeight;
|
||||
pub use self::phrase_scorer::PhraseScorer;
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -75,8 +74,6 @@ mod tests {
|
||||
assert_eq!(test_query(vec!["g", "a"]), empty_vec);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test] // motivated by #234
|
||||
pub fn test_phrase_query_docfreq_order() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
@@ -90,11 +87,13 @@ mod tests {
|
||||
let doc = doc!(text_field=>"b");
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
{ // 1
|
||||
{
|
||||
// 1
|
||||
let doc = doc!(text_field=>"a b");
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
{ // 2
|
||||
{
|
||||
// 2
|
||||
let doc = doc!(text_field=>"b a");
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use query::Scorer;
|
||||
use DocId;
|
||||
use postings::{SkipResult, IntersectionDocSet, DocSet, Postings, SegmentPostings};
|
||||
use postings::{DocSet, IntersectionDocSet, Postings, SegmentPostings, SkipResult};
|
||||
|
||||
struct PostingsWithOffset {
|
||||
offset: u32,
|
||||
@@ -11,7 +11,7 @@ impl PostingsWithOffset {
|
||||
pub fn new(segment_postings: SegmentPostings, offset: u32) -> PostingsWithOffset {
|
||||
PostingsWithOffset {
|
||||
offset,
|
||||
segment_postings
|
||||
segment_postings,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,7 +49,6 @@ pub struct PhraseScorer {
|
||||
}
|
||||
|
||||
impl PhraseScorer {
|
||||
|
||||
pub fn new(term_postings: Vec<SegmentPostings>) -> PhraseScorer {
|
||||
let postings_with_offsets: Vec<_> = term_postings
|
||||
.into_iter()
|
||||
@@ -57,12 +56,11 @@ impl PhraseScorer {
|
||||
.map(|(offset, postings)| PostingsWithOffset::new(postings, offset as u32))
|
||||
.collect();
|
||||
PhraseScorer {
|
||||
intersection_docset: IntersectionDocSet::from(postings_with_offsets)
|
||||
intersection_docset: IntersectionDocSet::from(postings_with_offsets),
|
||||
}
|
||||
}
|
||||
|
||||
fn phrase_match(&self) -> bool {
|
||||
|
||||
// TODO maybe we could avoid decoding positions lazily for all terms
|
||||
// when there is > 2 terms.
|
||||
//
|
||||
@@ -74,7 +72,6 @@ impl PhraseScorer {
|
||||
positions_arr[docset.offset as usize] = docset.positions();
|
||||
}
|
||||
|
||||
|
||||
let num_postings = positions_arr.len() as u32;
|
||||
|
||||
let mut ord = 1u32;
|
||||
|
||||
@@ -23,7 +23,8 @@ impl Weight for PhraseWeight {
|
||||
for term in &self.phrase_terms {
|
||||
if let Some(postings) = reader
|
||||
.inverted_index(term.field())
|
||||
.read_postings(term, IndexRecordOption::WithFreqsAndPositions) {
|
||||
.read_postings(term, IndexRecordOption::WithFreqsAndPositions)
|
||||
{
|
||||
term_postings_list.push(postings);
|
||||
} else {
|
||||
return Ok(box EmptyScorer);
|
||||
|
||||
@@ -1,91 +1,129 @@
|
||||
use schema::{Field, Term, IndexRecordOption};
|
||||
use query::{Query, Weight, Scorer};
|
||||
use schema::{Field, IndexRecordOption, Term};
|
||||
use query::{Query, Scorer, Weight};
|
||||
use termdict::{TermDictionary, TermStreamer, TermStreamerBuilder};
|
||||
use core::SegmentReader;
|
||||
use common::DocBitSet;
|
||||
use common::BitSet;
|
||||
use Result;
|
||||
use std::any::Any;
|
||||
use core::Searcher;
|
||||
use query::BitSetDocSet;
|
||||
use query::ConstScorer;
|
||||
use std::collections::Bound;
|
||||
use std::collections::range::RangeArgument;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum Boundary {
|
||||
Included(Vec<u8>),
|
||||
Excluded(Vec<u8>),
|
||||
Unbounded,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RangeDefinition {
|
||||
field: Field,
|
||||
left_bound: Boundary,
|
||||
right_bound: Boundary
|
||||
}
|
||||
|
||||
impl RangeDefinition {
|
||||
pub fn for_field(field: Field) -> RangeDefinition{
|
||||
RangeDefinition {
|
||||
field,
|
||||
left_bound: Boundary::Unbounded,
|
||||
right_bound: Boundary::Unbounded
|
||||
}
|
||||
}
|
||||
|
||||
pub fn left_included(mut self, left: Term) -> RangeDefinition {
|
||||
assert_eq!(left.field(), self.field);
|
||||
self.left_bound = Boundary::Included(left.value_bytes().to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn left_excluded(mut self, left: Term) -> RangeDefinition {
|
||||
assert_eq!(left.field(), self.field);
|
||||
self.left_bound = Boundary::Excluded(left.value_bytes().to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn right_included(mut self, right: Term) -> RangeDefinition {
|
||||
assert_eq!(right.field(), self.field);
|
||||
self.right_bound = Boundary::Included(right.value_bytes().to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn right_excluded(mut self, right: Term) -> RangeDefinition {
|
||||
assert_eq!(right.field(), self.field);
|
||||
self.right_bound = Boundary::Excluded(right.value_bytes().to_owned());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn term_range<'a, T>(&self, term_dict: &'a T) -> T::Streamer
|
||||
where T: TermDictionary<'a> + 'a
|
||||
{
|
||||
use self::Boundary::*;
|
||||
let mut term_stream_builder = term_dict.range();
|
||||
term_stream_builder =
|
||||
match &self.left_bound {
|
||||
&Included(ref term_val) => term_stream_builder.ge(term_val),
|
||||
&Excluded(ref term_val) => term_stream_builder.gt(term_val),
|
||||
&Unbounded => term_stream_builder
|
||||
};
|
||||
term_stream_builder =
|
||||
match &self.right_bound {
|
||||
&Included(ref term_val) => term_stream_builder.le(term_val),
|
||||
&Excluded(ref term_val) => term_stream_builder.lt(term_val),
|
||||
&Unbounded => term_stream_builder
|
||||
};
|
||||
term_stream_builder.into_stream()
|
||||
fn map_bound<TFrom, Transform: Fn(TFrom) -> Vec<u8>>(
|
||||
bound: Bound<TFrom>,
|
||||
transform: &Transform,
|
||||
) -> Bound<Vec<u8>> {
|
||||
use self::Bound::*;
|
||||
match bound {
|
||||
Excluded(from_val) => Excluded(transform(from_val)),
|
||||
Included(from_val) => Included(transform(from_val)),
|
||||
Unbounded => Unbounded,
|
||||
}
|
||||
}
|
||||
|
||||
/// `RangeQuery` match all documents that have at least one term within a defined range.
|
||||
///
|
||||
/// Matched document will all get a constant `Score` of one.
|
||||
///
|
||||
/// # Implementation
|
||||
///
|
||||
/// The current implement will iterate over the terms within the range
|
||||
/// and append all of the document cross into a `BitSet`.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
///
|
||||
/// # #[macro_use]
|
||||
/// # extern crate tantivy;
|
||||
/// # use tantivy::Index;
|
||||
/// # use tantivy::schema::{SchemaBuilder, INT_INDEXED};
|
||||
/// # use tantivy::collector::CountCollector;
|
||||
/// # use tantivy::query::Query;
|
||||
/// # use tantivy::Result;
|
||||
/// # use tantivy::query::RangeQuery;
|
||||
/// #
|
||||
/// # fn run() -> Result<()> {
|
||||
/// # let mut schema_builder = SchemaBuilder::new();
|
||||
/// # let year_field = schema_builder.add_u64_field("year", INT_INDEXED);
|
||||
/// # let schema = schema_builder.build();
|
||||
/// #
|
||||
/// # let index = Index::create_in_ram(schema);
|
||||
/// # {
|
||||
/// # let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
|
||||
/// # for year in 1950u64..2017u64 {
|
||||
/// # let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||
/// # for _ in 0..num_docs_within_year {
|
||||
/// # index_writer.add_document(doc!(year_field => year));
|
||||
/// # }
|
||||
/// # }
|
||||
/// # index_writer.commit().unwrap();
|
||||
/// # }
|
||||
/// # index.load_searchers()?;
|
||||
/// let searcher = index.searcher();
|
||||
///
|
||||
/// let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
|
||||
///
|
||||
/// // ... or `1960..=1969` if inclusive range is enabled.
|
||||
/// let mut count_collector = CountCollector::default();
|
||||
/// docs_in_the_sixties.search(&*searcher, &mut count_collector)?;
|
||||
///
|
||||
/// let num_60s_books = count_collector.count();
|
||||
///
|
||||
/// # assert_eq!(num_60s_books, 2285);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # run().unwrap()
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct RangeQuery {
|
||||
range_definition: RangeDefinition
|
||||
field: Field,
|
||||
left_bound: Bound<Vec<u8>>,
|
||||
right_bound: Bound<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl RangeQuery {
|
||||
pub fn new(range_definition: RangeDefinition) -> RangeQuery {
|
||||
/// Create a new `RangeQuery` over a `i64` field.
|
||||
pub fn new_i64<TRangeArgument: RangeArgument<i64>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &i64| Term::from_field_i64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
range_definition
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `u64` field.
|
||||
pub fn new_u64<TRangeArgument: RangeArgument<u64>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &u64| Term::from_field_u64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `Str` field.
|
||||
pub fn new_str<'b, TRangeArgument: RangeArgument<&'b str>>(
|
||||
field: Field,
|
||||
range: TRangeArgument,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &&str| val.as_bytes().to_vec();
|
||||
RangeQuery {
|
||||
field,
|
||||
left_bound: map_bound(range.start(), &make_term_val),
|
||||
right_bound: map_bound(range.end(), &make_term_val),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,27 +135,52 @@ impl Query for RangeQuery {
|
||||
|
||||
fn weight(&self, _searcher: &Searcher) -> Result<Box<Weight>> {
|
||||
Ok(box RangeWeight {
|
||||
range_definition: self.range_definition.clone()
|
||||
field: self.field,
|
||||
left_bound: self.left_bound.clone(),
|
||||
right_bound: self.right_bound.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct RangeWeight {
|
||||
range_definition: RangeDefinition
|
||||
field: Field,
|
||||
left_bound: Bound<Vec<u8>>,
|
||||
right_bound: Bound<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl RangeWeight {
|
||||
fn term_range<'a, T>(&self, term_dict: &'a T) -> T::Streamer
|
||||
where
|
||||
T: TermDictionary<'a> + 'a,
|
||||
{
|
||||
use std::collections::Bound::*;
|
||||
let mut term_stream_builder = term_dict.range();
|
||||
term_stream_builder = match &self.left_bound {
|
||||
&Included(ref term_val) => term_stream_builder.ge(term_val),
|
||||
&Excluded(ref term_val) => term_stream_builder.gt(term_val),
|
||||
&Unbounded => term_stream_builder,
|
||||
};
|
||||
term_stream_builder = match &self.right_bound {
|
||||
&Included(ref term_val) => term_stream_builder.le(term_val),
|
||||
&Excluded(ref term_val) => term_stream_builder.lt(term_val),
|
||||
&Unbounded => term_stream_builder,
|
||||
};
|
||||
term_stream_builder.into_stream()
|
||||
}
|
||||
}
|
||||
|
||||
impl Weight for RangeWeight {
|
||||
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
|
||||
let max_doc = reader.max_doc();
|
||||
let mut doc_bitset = DocBitSet::with_maxdoc(max_doc);
|
||||
let mut doc_bitset = BitSet::with_max_value(max_doc);
|
||||
|
||||
let inverted_index = reader.inverted_index(self.range_definition.field);
|
||||
let inverted_index = reader.inverted_index(self.field);
|
||||
let term_dict = inverted_index.terms();
|
||||
let mut term_range = self.range_definition.term_range(term_dict);
|
||||
let mut term_range = self.term_range(term_dict);
|
||||
while term_range.advance() {
|
||||
let term_info = term_range.value();
|
||||
let mut block_segment_postings = inverted_index.read_block_postings_from_terminfo(term_info,IndexRecordOption::Basic);
|
||||
let mut block_segment_postings = inverted_index
|
||||
.read_block_postings_from_terminfo(term_info, IndexRecordOption::Basic);
|
||||
while block_segment_postings.advance() {
|
||||
for &doc in block_segment_postings.docs() {
|
||||
doc_bitset.insert(doc);
|
||||
@@ -133,8 +196,45 @@ impl Weight for RangeWeight {
|
||||
mod tests {
|
||||
|
||||
use Index;
|
||||
use schema::{SchemaBuilder, Field, Document, INT_INDEXED};
|
||||
use schema::{Document, Field, SchemaBuilder, INT_INDEXED};
|
||||
use collector::CountCollector;
|
||||
use std::collections::Bound;
|
||||
use query::Query;
|
||||
use Result;
|
||||
use super::RangeQuery;
|
||||
|
||||
#[test]
|
||||
fn test_range_query_simple() {
|
||||
fn run() -> Result<()> {
|
||||
let mut schema_builder = SchemaBuilder::new();
|
||||
let year_field = schema_builder.add_u64_field("year", INT_INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
|
||||
for year in 1950u64..2017u64 {
|
||||
let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||
for _ in 0..num_docs_within_year {
|
||||
index_writer.add_document(doc!(year_field => year));
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
|
||||
let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
|
||||
|
||||
// ... or `1960..=1969` if inclusive range is enabled.
|
||||
let mut count_collector = CountCollector::default();
|
||||
docs_in_the_sixties.search(&*searcher, &mut count_collector)?;
|
||||
assert_eq!(count_collector.count(), 2285);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
run().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_query() {
|
||||
@@ -163,43 +263,30 @@ mod tests {
|
||||
}
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
use collector::CountCollector;
|
||||
use schema::Term;
|
||||
use query::Query;
|
||||
use super::{RangeQuery, RangeDefinition};
|
||||
|
||||
let count_multiples = |range: RangeDefinition| {
|
||||
let count_multiples = |range_query: RangeQuery| {
|
||||
let mut count_collector = CountCollector::default();
|
||||
let range_query = RangeQuery::new(range);
|
||||
range_query.search(&*searcher, &mut count_collector).unwrap();
|
||||
range_query
|
||||
.search(&*searcher, &mut count_collector)
|
||||
.unwrap();
|
||||
count_collector.count()
|
||||
};
|
||||
|
||||
assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 10..11)), 9);
|
||||
assert_eq!(
|
||||
count_multiples(RangeDefinition::for_field(int_field)
|
||||
.left_included(Term::from_field_i64(int_field, 10))
|
||||
.right_excluded(Term::from_field_i64(int_field, 11)))
|
||||
, 9
|
||||
count_multiples(RangeQuery::new_i64(
|
||||
int_field,
|
||||
(Bound::Included(10), Bound::Included(11))
|
||||
)),
|
||||
18
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeDefinition::for_field(int_field)
|
||||
.left_included(Term::from_field_i64(int_field, 10))
|
||||
.right_included(Term::from_field_i64(int_field, 11)))
|
||||
, 18
|
||||
count_multiples(RangeQuery::new_i64(
|
||||
int_field,
|
||||
(Bound::Excluded(9), Bound::Included(10))
|
||||
)),
|
||||
9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeDefinition::for_field(int_field)
|
||||
.left_excluded(Term::from_field_i64(int_field, 9))
|
||||
.right_included(Term::from_field_i64(int_field, 10)))
|
||||
, 9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeDefinition::for_field(int_field)
|
||||
.left_excluded(Term::from_field_i64(int_field, 9)))
|
||||
, 90
|
||||
);
|
||||
|
||||
assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 9..)), 91);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use DocId;
|
||||
use Score;
|
||||
use collector::Collector;
|
||||
use postings::SkipResult;
|
||||
use common::DocBitSet;
|
||||
use common::BitSet;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
/// Scored set of documents matching a query within a specific segment.
|
||||
@@ -62,19 +62,27 @@ impl Scorer for EmptyScorer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps a `DocSet` and simply returns a constant `Scorer`.
|
||||
/// The `ConstScorer` is useful if you have a `DocSet` where
|
||||
/// you needed a scorer.
|
||||
///
|
||||
/// The `ConstScorer`'s constant score can be set
|
||||
/// by calling `.set_score(...)`.
|
||||
pub struct ConstScorer<TDocSet: DocSet> {
|
||||
docset: TDocSet,
|
||||
score: Score
|
||||
score: Score,
|
||||
}
|
||||
|
||||
impl<TDocSet: DocSet> ConstScorer<TDocSet> {
|
||||
/// Creates a new `ConstScorer`.
|
||||
pub fn new(docset: TDocSet) -> ConstScorer<TDocSet> {
|
||||
ConstScorer {
|
||||
docset,
|
||||
score: 1f32
|
||||
score: 1f32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the constant score to a different value.
|
||||
pub fn set_score(&mut self, score: Score) {
|
||||
self.score = score;
|
||||
}
|
||||
@@ -101,12 +109,11 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
|
||||
self.docset.size_hint()
|
||||
}
|
||||
|
||||
fn to_doc_bitset(&mut self, max_doc: DocId) -> DocBitSet {
|
||||
self.docset.to_doc_bitset(max_doc)
|
||||
fn append_to_bitset(&mut self, bitset: &mut BitSet) {
|
||||
self.docset.append_to_bitset(bitset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<TDocSet: DocSet> Scorer for ConstScorer<TDocSet> {
|
||||
fn score(&self) -> Score {
|
||||
1f32
|
||||
|
||||
Reference in New Issue
Block a user