mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-16 14:02:55 +00:00
With tantivy 0.20 the minimum memory consumption per SegmentWriter increased to 12MB. 7MB are for the different fast field collectors types (they could be lazily created). Increase the minimum memory from 3MB to 15MB. Change memory variable naming from arena to budget. closes #2156
319 lines
12 KiB
Rust
319 lines
12 KiB
Rust
mod block_wand;
|
|
mod boolean_query;
|
|
mod boolean_weight;
|
|
|
|
pub(crate) use self::block_wand::{block_wand, block_wand_single_scorer};
|
|
pub use self::boolean_query::BooleanQuery;
|
|
pub use self::boolean_weight::BooleanWeight;
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use super::*;
|
|
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
|
use crate::collector::TopDocs;
|
|
use crate::query::score_combiner::SumWithCoordsCombiner;
|
|
use crate::query::term_query::TermScorer;
|
|
use crate::query::{
|
|
EnableScoring, Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer,
|
|
TermQuery,
|
|
};
|
|
use crate::schema::*;
|
|
use crate::{assert_nearly_equals, DocAddress, DocId, Index, Score};
|
|
|
|
fn aux_test_helper() -> crate::Result<(Index, Field)> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
{
|
|
// writing the segment
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.add_document(doc!(text_field => "a b c"))?;
|
|
index_writer.add_document(doc!(text_field => "a c"))?;
|
|
index_writer.add_document(doc!(text_field => "b c"))?;
|
|
index_writer.add_document(doc!(text_field => "a b c d"))?;
|
|
index_writer.add_document(doc!(text_field => "d"))?;
|
|
index_writer.commit()?;
|
|
}
|
|
Ok((index, text_field))
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_non_all_term_disjunction() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
let query_parser = QueryParser::for_index(&index, vec![text_field]);
|
|
let query = query_parser.parse_query("(+a +b) d")?;
|
|
let searcher = index.reader()?.searcher();
|
|
assert_eq!(query.count(&searcher)?, 3);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_single_must_clause() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
let query_parser = QueryParser::for_index(&index, vec![text_field]);
|
|
let query = query_parser.parse_query("+a")?;
|
|
let searcher = index.reader()?.searcher();
|
|
let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
|
|
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert!(scorer.is::<TermScorer>());
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_termonly_intersection() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
let query_parser = QueryParser::for_index(&index, vec![text_field]);
|
|
let searcher = index.reader()?.searcher();
|
|
{
|
|
let query = query_parser.parse_query("+a +b +c")?;
|
|
let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
|
|
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert!(scorer.is::<Intersection<TermScorer>>());
|
|
}
|
|
{
|
|
let query = query_parser.parse_query("+a +(b c)")?;
|
|
let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
|
|
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert!(scorer.is::<Intersection<Box<dyn Scorer>>>());
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_reqopt() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
let query_parser = QueryParser::for_index(&index, vec![text_field]);
|
|
let searcher = index.reader()?.searcher();
|
|
{
|
|
let query = query_parser.parse_query("+a b")?;
|
|
let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
|
|
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert!(scorer.is::<RequiredOptionalScorer<
|
|
Box<dyn Scorer>,
|
|
Box<dyn Scorer>,
|
|
SumWithCoordsCombiner,
|
|
>>());
|
|
}
|
|
{
|
|
let query = query_parser.parse_query("+a b")?;
|
|
let weight = query.weight(EnableScoring::disabled_from_schema(searcher.schema()))?;
|
|
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert!(scorer.is::<TermScorer>());
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_query() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
|
|
let make_term_query = |text: &str| {
|
|
let term_query = TermQuery::new(
|
|
Term::from_field_text(text_field, text),
|
|
IndexRecordOption::Basic,
|
|
);
|
|
let query: Box<dyn Query> = Box::new(term_query);
|
|
query
|
|
};
|
|
|
|
let reader = index.reader()?;
|
|
|
|
let matching_docs = |boolean_query: &dyn Query| {
|
|
reader
|
|
.searcher()
|
|
.search(boolean_query, &TEST_COLLECTOR_WITH_SCORE)
|
|
.unwrap()
|
|
.docs()
|
|
.iter()
|
|
.cloned()
|
|
.map(|doc| doc.doc_id)
|
|
.collect::<Vec<DocId>>()
|
|
};
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![(Occur::Must, make_term_query("a"))]);
|
|
assert_eq!(matching_docs(&boolean_query), vec![0, 1, 3]);
|
|
}
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![(Occur::Should, make_term_query("a"))]);
|
|
assert_eq!(matching_docs(&boolean_query), vec![0, 1, 3]);
|
|
}
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![
|
|
(Occur::Should, make_term_query("a")),
|
|
(Occur::Should, make_term_query("b")),
|
|
]);
|
|
assert_eq!(matching_docs(&boolean_query), vec![0, 1, 2, 3]);
|
|
}
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![
|
|
(Occur::Must, make_term_query("a")),
|
|
(Occur::Should, make_term_query("b")),
|
|
]);
|
|
assert_eq!(matching_docs(&boolean_query), vec![0, 1, 3]);
|
|
}
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![
|
|
(Occur::Must, make_term_query("a")),
|
|
(Occur::Should, make_term_query("b")),
|
|
(Occur::MustNot, make_term_query("d")),
|
|
]);
|
|
assert_eq!(matching_docs(&boolean_query), vec![0, 1]);
|
|
}
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![(Occur::MustNot, make_term_query("d"))]);
|
|
assert_eq!(matching_docs(&boolean_query), Vec::<u32>::new());
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_query_two_excluded() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
|
|
let make_term_query = |text: &str| {
|
|
let term_query = TermQuery::new(
|
|
Term::from_field_text(text_field, text),
|
|
IndexRecordOption::Basic,
|
|
);
|
|
let query: Box<dyn Query> = Box::new(term_query);
|
|
query
|
|
};
|
|
|
|
let reader = index.reader()?;
|
|
|
|
let matching_topdocs = |query: &dyn Query| {
|
|
reader
|
|
.searcher()
|
|
.search(query, &TopDocs::with_limit(3))
|
|
.unwrap()
|
|
};
|
|
|
|
let score_doc_4: Score; // score of doc 4 should not be influenced by exclusion
|
|
{
|
|
let boolean_query_no_excluded =
|
|
BooleanQuery::new(vec![(Occur::Must, make_term_query("d"))]);
|
|
let topdocs_no_excluded = matching_topdocs(&boolean_query_no_excluded);
|
|
assert_eq!(topdocs_no_excluded.len(), 2);
|
|
let (top_score, top_doc) = topdocs_no_excluded[0];
|
|
assert_eq!(top_doc, DocAddress::new(0, 4));
|
|
assert_eq!(topdocs_no_excluded[1].1, DocAddress::new(0, 3)); // ignore score of doc 3.
|
|
score_doc_4 = top_score;
|
|
}
|
|
|
|
{
|
|
let boolean_query_two_excluded = BooleanQuery::new(vec![
|
|
(Occur::Must, make_term_query("d")),
|
|
(Occur::MustNot, make_term_query("a")),
|
|
(Occur::MustNot, make_term_query("b")),
|
|
]);
|
|
let topdocs_excluded = matching_topdocs(&boolean_query_two_excluded);
|
|
assert_eq!(topdocs_excluded.len(), 1);
|
|
let (top_score, top_doc) = topdocs_excluded[0];
|
|
assert_eq!(top_doc, DocAddress::new(0, 4));
|
|
assert_eq!(top_score, score_doc_4);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_boolean_query_with_weight() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
{
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.add_document(doc!(text_field => "a b c"))?;
|
|
index_writer.add_document(doc!(text_field => "a c"))?;
|
|
index_writer.add_document(doc!(text_field => "b c"))?;
|
|
index_writer.commit()?;
|
|
}
|
|
let term_a: Box<dyn Query> = Box::new(TermQuery::new(
|
|
Term::from_field_text(text_field, "a"),
|
|
IndexRecordOption::WithFreqs,
|
|
));
|
|
let term_b: Box<dyn Query> = Box::new(TermQuery::new(
|
|
Term::from_field_text(text_field, "b"),
|
|
IndexRecordOption::WithFreqs,
|
|
));
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
let boolean_query =
|
|
BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
|
|
let boolean_weight = boolean_query
|
|
.weight(EnableScoring::enabled_from_searcher(&searcher))
|
|
.unwrap();
|
|
{
|
|
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
|
|
assert_eq!(boolean_scorer.doc(), 0u32);
|
|
assert_nearly_equals!(boolean_scorer.score(), 0.84163445);
|
|
}
|
|
{
|
|
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 2.0)?;
|
|
assert_eq!(boolean_scorer.doc(), 0u32);
|
|
assert_nearly_equals!(boolean_scorer.score(), 1.6832689);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_intersection_score() -> crate::Result<()> {
|
|
let (index, text_field) = aux_test_helper()?;
|
|
|
|
let make_term_query = |text: &str| {
|
|
let term_query = TermQuery::new(
|
|
Term::from_field_text(text_field, text),
|
|
IndexRecordOption::Basic,
|
|
);
|
|
let query: Box<dyn Query> = Box::new(term_query);
|
|
query
|
|
};
|
|
let reader = index.reader()?;
|
|
let score_docs = |boolean_query: &dyn Query| {
|
|
let fruit = reader
|
|
.searcher()
|
|
.search(boolean_query, &TEST_COLLECTOR_WITH_SCORE)
|
|
.unwrap();
|
|
fruit.scores().to_vec()
|
|
};
|
|
|
|
{
|
|
let boolean_query = BooleanQuery::new(vec![
|
|
(Occur::Must, make_term_query("a")),
|
|
(Occur::Must, make_term_query("b")),
|
|
]);
|
|
let scores = score_docs(&boolean_query);
|
|
assert_nearly_equals!(scores[0], 0.977973);
|
|
assert_nearly_equals!(scores[1], 0.84699446);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_explain() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text = schema_builder.add_text_field("text", STRING);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.add_document(doc!(text=>"a"))?;
|
|
index_writer.add_document(doc!(text=>"b"))?;
|
|
index_writer.commit()?;
|
|
let searcher = index.reader()?.searcher();
|
|
let term_a: Box<dyn Query> = Box::new(TermQuery::new(
|
|
Term::from_field_text(text, "a"),
|
|
IndexRecordOption::Basic,
|
|
));
|
|
let term_b: Box<dyn Query> = Box::new(TermQuery::new(
|
|
Term::from_field_text(text, "b"),
|
|
IndexRecordOption::Basic,
|
|
));
|
|
let query = BooleanQuery::from(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
|
|
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32))?;
|
|
assert_nearly_equals!(explanation.value(), std::f32::consts::LN_2);
|
|
Ok(())
|
|
}
|
|
}
|