mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Added CR comments.
Added Unit tests.
This commit is contained in:
61
src/collector/docset_collector.rs
Normal file
61
src/collector/docset_collector.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::{DocAddress, DocId, Score};
|
||||
|
||||
use super::{Collector, SegmentCollector};
|
||||
|
||||
/// Collectors that returns the set of DocAddress that matches the query.
|
||||
///
|
||||
/// This collector is mostly useful for tests.
|
||||
pub struct DocSetCollector;
|
||||
|
||||
impl Collector for DocSetCollector {
|
||||
type Fruit = HashSet<DocAddress>;
|
||||
type Child = DocSetChildCollector;
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: crate::SegmentLocalId,
|
||||
_segment: &crate::SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
Ok(DocSetChildCollector {
|
||||
segment_local_id,
|
||||
docs: HashSet::new(),
|
||||
})
|
||||
}
|
||||
|
||||
fn requires_scoring(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn merge_fruits(
|
||||
&self,
|
||||
segment_fruits: Vec<(u32, HashSet<DocId>)>,
|
||||
) -> crate::Result<Self::Fruit> {
|
||||
let len: usize = segment_fruits.iter().map(|(_, docset)| docset.len()).sum();
|
||||
let mut result = HashSet::with_capacity(len);
|
||||
for (segment_local_id, docs) in segment_fruits {
|
||||
for doc in docs {
|
||||
result.insert(DocAddress(segment_local_id, doc));
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocSetChildCollector {
|
||||
segment_local_id: u32,
|
||||
docs: HashSet<DocId>,
|
||||
}
|
||||
|
||||
impl SegmentCollector for DocSetChildCollector {
|
||||
type Fruit = (u32, HashSet<DocId>);
|
||||
|
||||
fn collect(&mut self, doc: crate::DocId, _score: Score) {
|
||||
self.docs.insert(doc);
|
||||
}
|
||||
|
||||
fn harvest(self) -> (u32, HashSet<DocId>) {
|
||||
(self.segment_local_id, self.docs)
|
||||
}
|
||||
}
|
||||
@@ -111,6 +111,9 @@ mod facet_collector;
|
||||
pub use self::facet_collector::FacetCollector;
|
||||
use crate::query::Weight;
|
||||
|
||||
mod docset_collector;
|
||||
pub use self::docset_collector::DocSetCollector;
|
||||
|
||||
/// `Fruit` is the type for the result of our collection.
|
||||
/// e.g. `usize` for the `Count` collector.
|
||||
pub trait Fruit: Send + downcast_rs::Downcast {}
|
||||
|
||||
@@ -277,7 +277,7 @@ impl DocAddress {
|
||||
///
|
||||
/// The id used for the segment is actually an ordinal
|
||||
/// in the list of `Segment`s held by a `Searcher`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -143,7 +143,7 @@ impl Clone for BooleanQuery {
|
||||
|
||||
impl From<Vec<(Occur, Box<dyn Query>)>> for BooleanQuery {
|
||||
fn from(subqueries: Vec<(Occur, Box<dyn Query>)>) -> BooleanQuery {
|
||||
BooleanQuery { subqueries }
|
||||
BooleanQuery::new(subqueries)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,7 +167,6 @@ impl Query for BooleanQuery {
|
||||
}
|
||||
|
||||
impl BooleanQuery {
|
||||
|
||||
/// Creates a new boolean query.
|
||||
pub fn new(subqueries: Vec<(Occur, Box<dyn Query>)>) -> BooleanQuery {
|
||||
BooleanQuery { subqueries }
|
||||
@@ -176,13 +175,13 @@ impl BooleanQuery {
|
||||
/// Returns the intersection of the queries.
|
||||
pub fn intersection(queries: Vec<Box<dyn Query>>) -> BooleanQuery {
|
||||
let subqueries = queries.into_iter().map(|s| (Occur::Must, s)).collect();
|
||||
BooleanQuery { subqueries }
|
||||
BooleanQuery::new(subqueries)
|
||||
}
|
||||
|
||||
/// Returns the union of the queries.
|
||||
pub fn union(queries: Vec<Box<dyn Query>>) -> BooleanQuery {
|
||||
let subqueries = queries.into_iter().map(|s| (Occur::Should, s)).collect();
|
||||
BooleanQuery { subqueries }
|
||||
BooleanQuery::new(subqueries)
|
||||
}
|
||||
|
||||
/// Helper method to create a boolean query matching a given list of terms.
|
||||
@@ -204,3 +203,77 @@ impl BooleanQuery {
|
||||
&self.subqueries[..]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::BooleanQuery;
|
||||
use crate::collector::DocSetCollector;
|
||||
use crate::query::{QueryClone, TermQuery};
|
||||
use crate::schema::{IndexRecordOption, Schema, TEXT};
|
||||
use crate::{DocAddress, Index, Term};
|
||||
|
||||
fn create_test_index() -> crate::Result<Index> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut writer = index.writer_for_tests().unwrap();
|
||||
writer.add_document(doc!(text=>"b c"));
|
||||
writer.add_document(doc!(text=>"a c"));
|
||||
writer.add_document(doc!(text=>"a b"));
|
||||
writer.add_document(doc!(text=>"a d"));
|
||||
writer.commit()?;
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_union() -> crate::Result<()> {
|
||||
let index = create_test_index()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let text = index.schema().get_field("text").unwrap();
|
||||
let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic);
|
||||
let term_d = TermQuery::new(Term::from_field_text(text, "d"), IndexRecordOption::Basic);
|
||||
let union_ad = BooleanQuery::union(vec![term_a.box_clone(), term_d.box_clone()]);
|
||||
let docs = searcher.search(&union_ad, &DocSetCollector)?;
|
||||
assert_eq!(
|
||||
docs,
|
||||
vec![
|
||||
DocAddress(0u32, 1u32),
|
||||
DocAddress(0u32, 2u32),
|
||||
DocAddress(0u32, 3u32)
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intersection() -> crate::Result<()> {
|
||||
let index = create_test_index()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let text = index.schema().get_field("text").unwrap();
|
||||
let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic);
|
||||
let term_b = TermQuery::new(Term::from_field_text(text, "b"), IndexRecordOption::Basic);
|
||||
let term_c = TermQuery::new(Term::from_field_text(text, "c"), IndexRecordOption::Basic);
|
||||
let intersection_ab =
|
||||
BooleanQuery::intersection(vec![term_a.box_clone(), term_b.box_clone()]);
|
||||
let intersection_ac =
|
||||
BooleanQuery::intersection(vec![term_a.box_clone(), term_c.box_clone()]);
|
||||
let intersection_bc =
|
||||
BooleanQuery::intersection(vec![term_b.box_clone(), term_c.box_clone()]);
|
||||
{
|
||||
let docs = searcher.search(&intersection_ab, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 2u32)].into_iter().collect());
|
||||
}
|
||||
{
|
||||
let docs = searcher.search(&intersection_ac, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 1u32)].into_iter().collect());
|
||||
}
|
||||
{
|
||||
let docs = searcher.search(&intersection_bc, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 0u32)].into_iter().collect());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ pub(crate) use self::fuzzy_query::DFAWrapper;
|
||||
pub use self::fuzzy_query::FuzzyTermQuery;
|
||||
pub use self::intersection::intersect_scorers;
|
||||
pub use self::phrase_query::PhraseQuery;
|
||||
pub use self::query::Query;
|
||||
pub use self::query::{Query, QueryClone};
|
||||
pub use self::query_parser::QueryParser;
|
||||
pub use self::query_parser::QueryParserError;
|
||||
pub use self::range_query::RangeQuery;
|
||||
|
||||
@@ -71,7 +71,9 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
||||
fn query_terms(&self, _term_set: &mut BTreeSet<Term>) {}
|
||||
}
|
||||
|
||||
/// Implements `box_clone`.
|
||||
pub trait QueryClone {
|
||||
/// Returns a boxed clone of `self`.
|
||||
fn box_clone(&self) -> Box<dyn Query>;
|
||||
}
|
||||
|
||||
|
||||
@@ -96,6 +96,7 @@ impl Term {
|
||||
term
|
||||
}
|
||||
|
||||
/// Builds a term bytes.
|
||||
pub fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
|
||||
let mut term = Term::for_field(field);
|
||||
term.set_bytes(bytes);
|
||||
|
||||
Reference in New Issue
Block a user