Using the manual reload policy in IndexWriter. (#1667)

This commit is contained in:
Paul Masurel
2022-11-09 19:20:41 +09:00
committed by GitHub
parent 8ca12a5683
commit 3edf0a2724
28 changed files with 189 additions and 162 deletions

View File

@@ -4,7 +4,7 @@ use std::{fmt, io};
use crate::collector::Collector;
use crate::core::{Executor, SegmentReader};
use crate::query::Query;
use crate::query::{EnableScoring, Query};
use crate::schema::{Document, Schema, Term};
use crate::space_usage::SearcherSpaceUsage;
use crate::store::{CacheStats, StoreReader};
@@ -199,7 +199,12 @@ impl Searcher {
executor: &Executor,
) -> crate::Result<C::Fruit> {
let scoring_enabled = collector.requires_scoring();
let weight = query.weight(self, scoring_enabled)?;
let enabled_scoring = if scoring_enabled {
EnableScoring::Enabled(self)
} else {
EnableScoring::Disabled(self.schema())
};
let weight = query.weight(enabled_scoring)?;
let segment_readers = self.segment_readers();
let fruits = executor.map(
|(segment_ord, segment_reader)| {

View File

@@ -6,7 +6,7 @@ pub use self::writer::BytesFastFieldWriter;
#[cfg(test)]
mod tests {
use crate::query::TermQuery;
use crate::query::{EnableScoring, TermQuery};
use crate::schema::{BytesOptions, IndexRecordOption, Schema, Value, FAST, INDEXED, STORED};
use crate::{DocAddress, DocSet, Index, Searcher, Term};
@@ -82,7 +82,7 @@ mod tests {
let field = searcher.schema().get_field("string_bytes").unwrap();
let term = Term::from_field_bytes(field, b"lucene".as_ref());
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
let term_weight = term_query.specialized_weight(&searcher, true)?;
let term_weight = term_query.specialized_weight(EnableScoring::Enabled(&searcher))?;
let term_scorer = term_weight.specialized_scorer(searcher.segment_reader(0), 1.0)?;
assert_eq!(term_scorer.doc(), 0u32);
Ok(())
@@ -95,7 +95,8 @@ mod tests {
let field = searcher.schema().get_field("string_bytes").unwrap();
let term = Term::from_field_bytes(field, b"lucene".as_ref());
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
let term_weight_err = term_query.specialized_weight(&searcher, false);
let term_weight_err =
term_query.specialized_weight(EnableScoring::Disabled(searcher.schema()));
assert!(matches!(
term_weight_err,
Err(crate::TantivyError::SchemaError(_))

View File

@@ -34,7 +34,7 @@ mod tests {
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::fieldnorm::{FieldNormReader, FieldNormsSerializer, FieldNormsWriter};
use crate::query::{Query, TermQuery};
use crate::query::{EnableScoring, Query, TermQuery};
use crate::schema::{
Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, TEXT,
};
@@ -112,7 +112,7 @@ mod tests {
Term::from_field_text(text, "hello"),
IndexRecordOption::WithFreqs,
);
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
assert_eq!(scorer.doc(), 0);
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
@@ -141,7 +141,7 @@ mod tests {
Term::from_field_text(text, "hello"),
IndexRecordOption::WithFreqs,
);
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
assert_eq!(scorer.doc(), 0);
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);

View File

@@ -19,9 +19,9 @@ use crate::indexer::index_writer_status::IndexWriterStatus;
use crate::indexer::operation::DeleteOperation;
use crate::indexer::stamper::Stamper;
use crate::indexer::{MergePolicy, SegmentEntry, SegmentWriter};
use crate::query::{Query, TermQuery};
use crate::query::{EnableScoring, Query, TermQuery};
use crate::schema::{Document, IndexRecordOption, Term};
use crate::{FutureResult, IndexReader, Opstamp};
use crate::{FutureResult, Opstamp};
// Size of the margin for the `memory_arena`. A segment is closed when the remaining memory
// in the `memory_arena` goes below MARGIN_IN_BYTES.
@@ -57,7 +57,6 @@ pub struct IndexWriter {
_directory_lock: Option<DirectoryLock>,
index: Index,
index_reader: IndexReader,
memory_arena_in_bytes_per_thread: usize,
@@ -298,8 +297,6 @@ impl IndexWriter {
memory_arena_in_bytes_per_thread,
index: index.clone(),
index_reader: index.reader()?,
index_writer_status: IndexWriterStatus::from(document_receiver),
operation_sender: document_sender,
@@ -681,8 +678,7 @@ impl IndexWriter {
/// only after calling `commit()`.
#[doc(hidden)]
pub fn delete_query(&self, query: Box<dyn Query>) -> crate::Result<Opstamp> {
let weight = query.weight(&self.index_reader.searcher(), false)?;
let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?;
let opstamp = self.stamper.stamp();
let delete_operation = DeleteOperation {
opstamp,
@@ -763,8 +759,7 @@ impl IndexWriter {
match user_op {
UserOperation::Delete(term) => {
let query = TermQuery::new(term, IndexRecordOption::Basic);
let weight = query.weight(&self.index_reader.searcher(), false)?;
let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?;
let delete_operation = DeleteOperation {
opstamp,
target: weight,

View File

@@ -1064,7 +1064,7 @@ mod tests {
};
use crate::collector::{Count, FacetCollector};
use crate::core::Index;
use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery};
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
use crate::schema::{
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
TextFieldIndexing, INDEXED, TEXT,
@@ -1977,7 +1977,7 @@ mod tests {
let reader = index.reader()?;
let searcher = reader.searcher();
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(term_scorer.doc(), 0);
assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
@@ -1992,7 +1992,7 @@ mod tests {
assert_eq!(searcher.segment_readers().len(), 2);
for segment_reader in searcher.segment_readers() {
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(segment_reader, 1.0)?;
// the difference compared to before is intrinsic to the bm25 formula. no worries
// there.
@@ -2017,7 +2017,7 @@ mod tests {
let segment_reader = searcher.segment_reader(0u32);
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(segment_reader, 1.0)?;
// the difference compared to before is intrinsic to the bm25 formula. no worries there.
for doc in segment_reader.doc_ids_alive() {

View File

@@ -277,6 +277,8 @@ pub mod fastfield;
pub mod fieldnorm;
pub mod positions;
pub mod postings;
/// Module containing the different query implementations.
pub mod query;
pub mod schema;
pub mod space_usage;

View File

@@ -1,8 +1,8 @@
use crate::core::{Searcher, SegmentReader};
use crate::core::SegmentReader;
use crate::docset::{DocSet, TERMINATED};
use crate::query::boost_query::BoostScorer;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, Score};
/// Query that matches all of the documents.
@@ -12,7 +12,7 @@ use crate::{DocId, Score};
pub struct AllQuery;
impl Query for AllQuery {
fn weight(&self, _: &Searcher, _: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, _: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(AllWeight))
}
}
@@ -72,7 +72,7 @@ impl Scorer for AllScorer {
mod tests {
use super::AllQuery;
use crate::docset::TERMINATED;
use crate::query::Query;
use crate::query::{EnableScoring, Query};
use crate::schema::{Schema, TEXT};
use crate::Index;
@@ -95,7 +95,7 @@ mod tests {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false)?;
let weight = AllQuery.weight(EnableScoring::Disabled(&index.schema()))?;
{
let reader = searcher.segment_reader(0);
let mut scorer = weight.scorer(reader, 1.0)?;
@@ -118,7 +118,7 @@ mod tests {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false)?;
let weight = AllQuery.weight(EnableScoring::Disabled(searcher.schema()))?;
let reader = searcher.segment_reader(0);
{
let mut scorer = weight.scorer(reader, 2.0)?;

View File

@@ -1,7 +1,6 @@
use super::boolean_weight::BooleanWeight;
use crate::query::{Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
use crate::schema::{IndexRecordOption, Term};
use crate::Searcher;
/// The boolean query returns a set of documents
/// that matches the Boolean combination of constituent subqueries.
@@ -143,17 +142,15 @@ impl From<Vec<(Occur, Box<dyn Query>)>> for BooleanQuery {
}
impl Query for BooleanQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let sub_weights = self
.subqueries
.iter()
.map(|&(ref occur, ref subquery)| {
Ok((*occur, subquery.weight(searcher, scoring_enabled)?))
})
.map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(enable_scoring)?)))
.collect::<crate::Result<_>>()?;
Ok(Box::new(BooleanWeight::new(
sub_weights,
scoring_enabled,
enable_scoring.is_scoring_enabled(),
Box::new(SumWithCoordsCombiner::default),
)))
}

View File

@@ -15,7 +15,8 @@ mod tests {
use crate::query::score_combiner::SumWithCoordsCombiner;
use crate::query::term_query::TermScorer;
use crate::query::{
Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer, TermQuery,
EnableScoring, Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer,
TermQuery,
};
use crate::schema::*;
use crate::{assert_nearly_equals, DocAddress, DocId, Index, Score};
@@ -54,7 +55,7 @@ mod tests {
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("+a")?;
let searcher = index.reader()?.searcher();
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
Ok(())
@@ -67,13 +68,13 @@ mod tests {
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a +b +c")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<TermScorer>>());
}
{
let query = query_parser.parse_query("+a +(b c)")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<Box<dyn Scorer>>>());
}
@@ -87,7 +88,7 @@ mod tests {
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<RequiredOptionalScorer<
Box<dyn Scorer>,
@@ -97,7 +98,7 @@ mod tests {
}
{
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, false)?;
let weight = query.weight(EnableScoring::Disabled(searcher.schema()))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
}
@@ -241,7 +242,9 @@ mod tests {
let searcher = reader.searcher();
let boolean_query =
BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
let boolean_weight = boolean_query.weight(&searcher, true).unwrap();
let boolean_weight = boolean_query
.weight(EnableScoring::Enabled(&searcher))
.unwrap();
{
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(boolean_scorer.doc(), 0u32);

View File

@@ -2,8 +2,8 @@ use std::fmt;
use crate::fastfield::AliveBitSet;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, SegmentReader, Term};
/// `BoostQuery` is a wrapper over a query used to boost its score.
///
@@ -38,9 +38,9 @@ impl fmt::Debug for BoostQuery {
}
impl Query for BoostQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let weight_without_boost = self.query.weight(searcher, scoring_enabled)?;
let boosted_weight = if scoring_enabled {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let weight_without_boost = self.query.weight(enable_scoring)?;
let boosted_weight = if enable_scoring.is_scoring_enabled() {
Box::new(BoostWeight::new(weight_without_boost, self.boost))
} else {
weight_without_boost

View File

@@ -1,7 +1,7 @@
use std::fmt;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader, TantivyError, Term};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};
/// `ConstScoreQuery` is a wrapper over a query to provide a constant score.
/// It can avoid unnecessary score computation on the wrapped query.
@@ -36,9 +36,9 @@ impl fmt::Debug for ConstScoreQuery {
}
impl Query for ConstScoreQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let inner_weight = self.query.weight(searcher, scoring_enabled)?;
Ok(if scoring_enabled {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let inner_weight = self.query.weight(enable_scoring)?;
Ok(if enable_scoring.is_scoring_enabled() {
Box::new(ConstWeight::new(inner_weight, self.score))
} else {
inner_weight

View File

@@ -1,7 +1,7 @@
use tantivy_query_grammar::Occur;
use crate::query::{BooleanWeight, DisjunctionMaxCombiner, Query, Weight};
use crate::{Score, Searcher, Term};
use crate::query::{BooleanWeight, DisjunctionMaxCombiner, EnableScoring, Query, Weight};
use crate::{Score, Term};
/// The disjunction max query кeturns documents matching one or more wrapped queries,
/// called query clauses or clauses.
@@ -91,16 +91,16 @@ impl Clone for DisjunctionMaxQuery {
}
impl Query for DisjunctionMaxQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let disjuncts = self
.disjuncts
.iter()
.map(|disjunct| Ok((Occur::Should, disjunct.weight(searcher, scoring_enabled)?)))
.map(|disjunct| Ok((Occur::Should, disjunct.weight(enable_scoring)?)))
.collect::<crate::Result<_>>()?;
let tie_breaker = self.tie_breaker;
Ok(Box::new(BooleanWeight::new(
disjuncts,
scoring_enabled,
enable_scoring.is_scoring_enabled(),
Box::new(move || DisjunctionMaxCombiner::with_tie_breaker(tie_breaker)),
)))
}

View File

@@ -1,7 +1,7 @@
use super::Scorer;
use crate::docset::TERMINATED;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Weight};
use crate::query::{EnableScoring, Explanation, Query, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
/// `EmptyQuery` is a dummy `Query` in which no document matches.
@@ -11,11 +11,7 @@ use crate::{DocId, DocSet, Score, Searcher, SegmentReader};
pub struct EmptyQuery;
impl Query for EmptyQuery {
fn weight(
&self,
_searcher: &Searcher,
_scoring_enabled: bool,
) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, _enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(EmptyWeight))
}

View File

@@ -5,9 +5,8 @@ use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder, DFA};
use once_cell::sync::Lazy;
use tantivy_fst::Automaton;
use crate::query::{AutomatonWeight, Query, Weight};
use crate::query::{AutomatonWeight, EnableScoring, Query, Weight};
use crate::schema::Term;
use crate::Searcher;
use crate::TantivyError::InvalidArgument;
pub(crate) struct DfaWrapper(pub DFA);
@@ -158,11 +157,7 @@ impl FuzzyTermQuery {
}
impl Query for FuzzyTermQuery {
fn weight(
&self,
_searcher: &Searcher,
_scoring_enabled: bool,
) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, _enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight()?))
}
}

View File

@@ -1,5 +1,3 @@
//! Query Module
mod all_query;
mod automaton_weight;
mod bitset;
@@ -51,7 +49,7 @@ pub use self::fuzzy_query::FuzzyTermQuery;
pub use self::intersection::{intersect_scorers, Intersection};
pub use self::more_like_this::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
pub use self::phrase_query::PhraseQuery;
pub use self::query::{Query, QueryClone};
pub use self::query::{EnableScoring, Query, QueryClone};
pub use self::query_parser::{QueryParser, QueryParserError};
pub use self::range_query::RangeQuery;
pub use self::regex_query::RegexQuery;

View File

@@ -1,4 +1,6 @@
mod more_like_this;
/// Module containing the different query implementations.
mod query;
pub use self::more_like_this::MoreLikeThis;

View File

@@ -1,7 +1,7 @@
use super::MoreLikeThis;
use crate::query::{Query, Weight};
use crate::query::{EnableScoring, Query, Weight};
use crate::schema::{Field, Value};
use crate::{DocAddress, Result, Searcher};
use crate::DocAddress;
/// A query that matches all of the documents similar to a document
/// or a set of field values provided.
@@ -42,16 +42,23 @@ impl MoreLikeThisQuery {
}
impl Query for MoreLikeThisQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result<Box<dyn Weight>> {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let searcher = match enable_scoring {
EnableScoring::Enabled(searcher) => searcher,
EnableScoring::Disabled(_) => {
let err = "MoreLikeThisQuery requires to enable scoring.".to_string();
return Err(crate::TantivyError::InvalidArgument(err));
}
};
match &self.target {
TargetDocument::DocumentAdress(doc_address) => self
.mlt
.query_with_document(searcher, *doc_address)?
.weight(searcher, scoring_enabled),
.weight(enable_scoring),
TargetDocument::DocumentFields(doc_fields) => self
.mlt
.query_with_document_fields(searcher, doc_fields)?
.weight(searcher, scoring_enabled),
.weight(enable_scoring),
}
}
}

View File

@@ -14,7 +14,7 @@ pub mod tests {
use super::*;
use crate::collector::tests::{TEST_COLLECTOR_WITHOUT_SCORE, TEST_COLLECTOR_WITH_SCORE};
use crate::core::Index;
use crate::query::{QueryParser, Weight};
use crate::query::{EnableScoring, QueryParser, Weight};
use crate::schema::{Schema, Term, TEXT};
use crate::{assert_nearly_equals, DocAddress, DocId, TERMINATED};
@@ -79,7 +79,8 @@ pub mod tests {
.map(|text| Term::from_field_text(text_field, text))
.collect();
let phrase_query = PhraseQuery::new(terms);
let phrase_weight = phrase_query.phrase_weight(&searcher, false)?;
let phrase_weight =
phrase_query.phrase_weight(EnableScoring::Disabled(searcher.schema()))?;
let mut phrase_scorer = phrase_weight.scorer(searcher.segment_reader(0), 1.0)?;
assert_eq!(phrase_scorer.doc(), 1);
assert_eq!(phrase_scorer.advance(), TERMINATED);
@@ -359,7 +360,9 @@ pub mod tests {
let matching_docs = |query: &str| {
let query_parser = QueryParser::for_index(&index, vec![json_field]);
let phrase_query = query_parser.parse_query(query).unwrap();
let phrase_weight = phrase_query.weight(&searcher, false).unwrap();
let phrase_weight = phrase_query
.weight(EnableScoring::Disabled(searcher.schema()))
.unwrap();
let mut phrase_scorer = phrase_weight
.scorer(searcher.segment_reader(0), 1.0f32)
.unwrap();

View File

@@ -1,7 +1,6 @@
use super::PhraseWeight;
use crate::core::searcher::Searcher;
use crate::query::bm25::Bm25Weight;
use crate::query::{Query, Weight};
use crate::query::{EnableScoring, Query, Weight};
use crate::schema::{Field, IndexRecordOption, Term};
/// `PhraseQuery` matches a specific sequence of words.
@@ -67,7 +66,7 @@ impl PhraseQuery {
/// Slop allowed for the phrase.
///
/// The query will match if its terms are separated by `slop` terms at most.
/// By default the slop is 0 meaning query terms need to be adjacent.
/// By default the slop is 0 meaning query terms need to be adjacent.
pub fn set_slop(&mut self, value: u32) {
self.slop = value;
}
@@ -91,10 +90,9 @@ impl PhraseQuery {
/// a specialized type [`PhraseWeight`] instead of a Boxed trait.
pub(crate) fn phrase_weight(
&self,
searcher: &Searcher,
scoring_enabled: bool,
enable_scoring: EnableScoring<'_>,
) -> crate::Result<PhraseWeight> {
let schema = searcher.schema();
let schema = enable_scoring.schema();
let field_entry = schema.get_field_entry(self.field);
let has_positions = field_entry
.field_type()
@@ -109,8 +107,11 @@ impl PhraseQuery {
)));
}
let terms = self.phrase_terms();
let bm25_weight = Bm25Weight::for_terms(searcher, &terms)?;
let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight, scoring_enabled);
let bm25_weight_opt = match enable_scoring {
EnableScoring::Enabled(searcher) => Some(Bm25Weight::for_terms(searcher, &terms)?),
EnableScoring::Disabled(_) => None,
};
let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
if self.slop > 0 {
weight.slop(self.slop);
}
@@ -122,8 +123,8 @@ impl Query for PhraseQuery {
/// Create the weight associated with a query.
///
/// See [`Weight`].
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?;
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let phrase_weight = self.phrase_weight(enable_scoring)?;
Ok(Box::new(phrase_weight))
}

View File

@@ -50,8 +50,7 @@ pub struct PhraseScorer<TPostings: Postings> {
right: Vec<u32>,
phrase_count: u32,
fieldnorm_reader: FieldNormReader,
similarity_weight: Bm25Weight,
scoring_enabled: bool,
similarity_weight_opt: Option<Bm25Weight>,
slop: u32,
}
@@ -245,11 +244,11 @@ fn intersection_exists_with_slop(left: &[u32], right: &[u32], slop: u32) -> bool
}
impl<TPostings: Postings> PhraseScorer<TPostings> {
// If similarity_weight is None, then scoring is disabled.
pub fn new(
term_postings: Vec<(usize, TPostings)>,
similarity_weight: Bm25Weight,
similarity_weight_opt: Option<Bm25Weight>,
fieldnorm_reader: FieldNormReader,
scoring_enabled: bool,
slop: u32,
) -> PhraseScorer<TPostings> {
let max_offset = term_postings
@@ -270,9 +269,8 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
left: Vec::with_capacity(100),
right: Vec::with_capacity(100),
phrase_count: 0u32,
similarity_weight,
similarity_weight_opt,
fieldnorm_reader,
scoring_enabled,
slop,
};
if scorer.doc() != TERMINATED && !scorer.phrase_match() {
@@ -286,7 +284,7 @@ impl<TPostings: Postings> PhraseScorer<TPostings> {
}
fn phrase_match(&mut self) -> bool {
if self.scoring_enabled {
if self.similarity_weight_opt.is_some() {
let count = self.compute_phrase_count();
self.phrase_count = count;
count > 0u32
@@ -388,8 +386,11 @@ impl<TPostings: Postings> Scorer for PhraseScorer<TPostings> {
fn score(&mut self) -> Score {
let doc = self.doc();
let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc);
self.similarity_weight
.score(fieldnorm_id, self.phrase_count)
if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
similarity_weight.score(fieldnorm_id, self.phrase_count)
} else {
1.0f32
}
}
}

View File

@@ -10,30 +10,28 @@ use crate::{DocId, DocSet, Score};
pub struct PhraseWeight {
phrase_terms: Vec<(usize, Term)>,
similarity_weight: Bm25Weight,
scoring_enabled: bool,
similarity_weight_opt: Option<Bm25Weight>,
slop: u32,
}
impl PhraseWeight {
/// Creates a new phrase weight.
/// If `similarity_weight_opt` is None, then scoring is disabled
pub fn new(
phrase_terms: Vec<(usize, Term)>,
similarity_weight: Bm25Weight,
scoring_enabled: bool,
similarity_weight_opt: Option<Bm25Weight>,
) -> PhraseWeight {
let slop = 0;
PhraseWeight {
phrase_terms,
similarity_weight,
scoring_enabled,
similarity_weight_opt,
slop,
}
}
fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result<FieldNormReader> {
let field = self.phrase_terms[0].1.field();
if self.scoring_enabled {
if self.similarity_weight_opt.is_some() {
if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? {
return Ok(fieldnorm_reader);
}
@@ -46,7 +44,10 @@ impl PhraseWeight {
reader: &SegmentReader,
boost: Score,
) -> crate::Result<Option<PhraseScorer<SegmentPostings>>> {
let similarity_weight = self.similarity_weight.boost_by(boost);
let similarity_weight_opt = self
.similarity_weight_opt
.as_ref()
.map(|similarity_weight| similarity_weight.boost_by(boost));
let fieldnorm_reader = self.fieldnorm_reader(reader)?;
let mut term_postings_list = Vec::new();
if reader.has_deletes() {
@@ -74,9 +75,8 @@ impl PhraseWeight {
}
Ok(Some(PhraseScorer::new(
term_postings_list,
similarity_weight,
similarity_weight_opt,
fieldnorm_reader,
self.scoring_enabled,
self.slop,
)))
}
@@ -108,7 +108,9 @@ impl Weight for PhraseWeight {
let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc);
let phrase_count = scorer.phrase_count();
let mut explanation = Explanation::new("Phrase Scorer", scorer.score());
explanation.add_detail(self.similarity_weight.explain(fieldnorm_id, phrase_count));
if let Some(similarity_weight) = self.similarity_weight_opt.as_ref() {
explanation.add_detail(similarity_weight.explain(fieldnorm_id, phrase_count));
}
Ok(explanation)
}
}
@@ -117,7 +119,7 @@ impl Weight for PhraseWeight {
mod tests {
use super::super::tests::create_index;
use crate::docset::TERMINATED;
use crate::query::PhraseQuery;
use crate::query::{EnableScoring, PhraseQuery};
use crate::{DocSet, Term};
#[test]
@@ -130,7 +132,8 @@ mod tests {
Term::from_field_text(text_field, "a"),
Term::from_field_text(text_field, "b"),
]);
let phrase_weight = phrase_query.phrase_weight(&searcher, true).unwrap();
let enable_scoring = EnableScoring::Enabled(&searcher);
let phrase_weight = phrase_query.phrase_weight(enable_scoring).unwrap();
let mut phrase_scorer = phrase_weight
.phrase_scorer(searcher.segment_reader(0u32), 1.0)?
.unwrap();

View File

@@ -5,8 +5,37 @@ use downcast_rs::impl_downcast;
use super::Weight;
use crate::core::searcher::Searcher;
use crate::query::Explanation;
use crate::schema::Schema;
use crate::{DocAddress, Term};
/// Argument used in `Query::weight(..)`
#[derive(Copy, Clone)]
pub enum EnableScoring<'a> {
/// Pass this to enable scoring.
Enabled(&'a Searcher),
/// Pass this to disable scoring.
/// This can improve performance.
Disabled(&'a Schema),
}
impl<'a> EnableScoring<'a> {
/// Returns the schema.
pub fn schema(&self) -> &Schema {
match self {
EnableScoring::Enabled(searcher) => searcher.schema(),
EnableScoring::Disabled(schema) => schema,
}
}
/// Returns true if the scoring is enabled.
pub fn is_scoring_enabled(&self) -> bool {
match self {
EnableScoring::Enabled(_) => true,
EnableScoring::Disabled(_) => false,
}
}
}
/// The `Query` trait defines a set of documents and a scoring method
/// for those documents.
///
@@ -48,18 +77,18 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
/// can increase performances.
///
/// See [`Weight`].
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>>;
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>>;
/// Returns an `Explanation` for the score of the document.
fn explain(&self, searcher: &Searcher, doc_address: DocAddress) -> crate::Result<Explanation> {
let weight = self.weight(EnableScoring::Enabled(searcher))?;
let reader = searcher.segment_reader(doc_address.segment_ord);
let weight = self.weight(searcher, true)?;
weight.explain(reader, doc_address.doc_id)
}
/// Returns the number of documents matching the query.
fn count(&self, searcher: &Searcher) -> crate::Result<usize> {
let weight = self.weight(searcher, false)?;
let weight = self.weight(EnableScoring::Disabled(searcher.schema()))?;
let mut result = 0;
for reader in searcher.segment_readers() {
result += weight.count(reader)? as usize;
@@ -93,8 +122,8 @@ where T: 'static + Query + Clone
}
impl Query for Box<dyn Query> {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
self.as_ref().weight(searcher, scoring_enabled)
fn weight(&self, enabled_scoring: EnableScoring) -> crate::Result<Box<dyn Weight>> {
self.as_ref().weight(enabled_scoring)
}
fn count(&self, searcher: &Searcher) -> crate::Result<usize> {

View File

@@ -3,11 +3,11 @@ use std::ops::{Bound, Range};
use common::BitSet;
use crate::core::{Searcher, SegmentReader};
use crate::core::SegmentReader;
use crate::error::TantivyError;
use crate::query::explanation::does_not_match;
use crate::query::range_query_ip_fastfield::IPFastFieldRangeWeight;
use crate::query::{BitSetDocSet, ConstScorer, Explanation, Query, Scorer, Weight};
use crate::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight};
use crate::schema::{Field, IndexRecordOption, Term, Type};
use crate::termdict::{TermDictionary, TermStreamer};
use crate::{DocId, Score};
@@ -253,12 +253,8 @@ impl RangeQuery {
}
impl Query for RangeQuery {
fn weight(
&self,
searcher: &Searcher,
_scoring_enabled: bool,
) -> crate::Result<Box<dyn Weight>> {
let schema = searcher.schema();
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let schema = enable_scoring.schema();
let field_type = schema.get_field_entry(self.field).field_type();
let value_type = field_type.value_type();
if value_type != self.value_type {

View File

@@ -4,9 +4,8 @@ use std::sync::Arc;
use tantivy_fst::Regex;
use crate::error::TantivyError;
use crate::query::{AutomatonWeight, Query, Weight};
use crate::query::{AutomatonWeight, EnableScoring, Query, Weight};
use crate::schema::Field;
use crate::Searcher;
/// A Regex Query matches all of the documents
/// containing a specific term that matches
@@ -82,11 +81,7 @@ impl RegexQuery {
}
impl Query for RegexQuery {
fn weight(
&self,
_searcher: &Searcher,
_scoring_enabled: bool,
) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, _enabled_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight()))
}
}

View File

@@ -4,9 +4,9 @@ use tantivy_fst::raw::CompiledAddr;
use tantivy_fst::{Automaton, Map};
use crate::query::score_combiner::DoNothingCombiner;
use crate::query::{AutomatonWeight, BooleanWeight, Occur, Query, Weight};
use crate::schema::Field;
use crate::{Searcher, Term};
use crate::query::{AutomatonWeight, BooleanWeight, EnableScoring, Occur, Query, Weight};
use crate::schema::{Field, Schema};
use crate::Term;
/// A Term Set Query matches all of the documents containing any of the Term provided
#[derive(Debug, Clone)]
@@ -32,12 +32,12 @@ impl TermSetQuery {
fn specialized_weight(
&self,
searcher: &Searcher,
schema: &Schema,
) -> crate::Result<BooleanWeight<DoNothingCombiner>> {
let mut sub_queries: Vec<(_, Box<dyn Weight>)> = Vec::with_capacity(self.terms_map.len());
for (&field, sorted_terms) in self.terms_map.iter() {
let field_entry = searcher.schema().get_field_entry(field);
let field_entry = schema.get_field_entry(field);
let field_type = field_entry.field_type();
if !field_type.is_indexed() {
let error_msg = format!("Field {:?} is not indexed.", field_entry.name());
@@ -65,12 +65,8 @@ impl TermSetQuery {
}
impl Query for TermSetQuery {
fn weight(
&self,
searcher: &Searcher,
_scoring_enabled: bool,
) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight(searcher)?))
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight(enable_scoring.schema())?))
}
}

View File

@@ -12,7 +12,7 @@ mod tests {
use crate::collector::TopDocs;
use crate::docset::DocSet;
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
use crate::query::{Query, QueryParser, Scorer, TermQuery};
use crate::query::{EnableScoring, Query, QueryParser, Scorer, TermQuery};
use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT};
use crate::{assert_nearly_equals, DocAddress, Index, Term, TERMINATED};
@@ -34,7 +34,7 @@ mod tests {
Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic,
);
let term_weight = term_query.weight(&searcher, true)?;
let term_weight = term_query.weight(EnableScoring::Enabled(&searcher))?;
let segment_reader = searcher.segment_reader(0);
let mut term_scorer = term_weight.scorer(segment_reader, 1.0)?;
assert_eq!(term_scorer.doc(), 0);
@@ -62,7 +62,7 @@ mod tests {
Term::from_field_text(text_field, "a"),
IndexRecordOption::Basic,
);
let term_weight = term_query.weight(&searcher, true)?;
let term_weight = term_query.weight(EnableScoring::Enabled(&searcher))?;
let segment_reader = searcher.segment_reader(0);
let mut term_scorer = term_weight.scorer(segment_reader, 1.0)?;
for i in 0u32..COMPRESSION_BLOCK_SIZE as u32 {
@@ -158,7 +158,7 @@ mod tests {
let term_a = Term::from_field_text(text_field, "a");
let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);
let searcher = index.reader()?.searcher();
let term_weight = term_query.weight(&searcher, false)?;
let term_weight = term_query.weight(EnableScoring::Disabled(searcher.schema()))?;
let mut term_scorer = term_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(term_scorer.doc(), 0u32);
term_scorer.seek(1u32);

View File

@@ -2,9 +2,9 @@ use std::fmt;
use super::term_weight::TermWeight;
use crate::query::bm25::Bm25Weight;
use crate::query::{Explanation, Query, Weight};
use crate::query::{EnableScoring, Explanation, Query, Weight};
use crate::schema::IndexRecordOption;
use crate::{Searcher, Term};
use crate::Term;
/// A Term query matches all of the documents
/// containing a specific term.
@@ -87,19 +87,23 @@ impl TermQuery {
/// This is useful for optimization purpose.
pub fn specialized_weight(
&self,
searcher: &Searcher,
scoring_enabled: bool,
enable_scoring: EnableScoring<'_>,
) -> crate::Result<TermWeight> {
let field_entry = searcher.schema().get_field_entry(self.term.field());
let schema = enable_scoring.schema();
let field_entry = schema.get_field_entry(self.term.field());
if !field_entry.is_indexed() {
let error_msg = format!("Field {:?} is not indexed.", field_entry.name());
return Err(crate::TantivyError::SchemaError(error_msg));
}
let bm25_weight = if scoring_enabled {
Bm25Weight::for_terms(searcher, &[self.term.clone()])?
} else {
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32)
let bm25_weight = match enable_scoring {
EnableScoring::Enabled(searcher) => {
Bm25Weight::for_terms(searcher, &[self.term.clone()])?
}
EnableScoring::Disabled(_schema) => {
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32)
}
};
let scoring_enabled = enable_scoring.is_scoring_enabled();
let index_record_option = if scoring_enabled {
self.index_record_option
} else {
@@ -115,10 +119,8 @@ impl TermQuery {
}
impl Query for TermQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(
self.specialized_weight(searcher, scoring_enabled)?,
))
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight(enable_scoring)?))
}
fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
visitor(&self.term, false);

View File

@@ -130,7 +130,7 @@ mod tests {
use crate::merge_policy::NoMergePolicy;
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
use crate::query::term_query::TermScorer;
use crate::query::{Bm25Weight, Scorer, TermQuery};
use crate::query::{Bm25Weight, EnableScoring, Scorer, TermQuery};
use crate::schema::{IndexRecordOption, Schema, TEXT};
use crate::{
assert_nearly_equals, DocId, DocSet, Index, Score, Searcher, SegmentId, Term, TERMINATED,
@@ -250,7 +250,7 @@ mod tests {
}
fn test_block_wand_aux(term_query: &TermQuery, searcher: &Searcher) -> crate::Result<()> {
let term_weight = term_query.specialized_weight(searcher, true)?;
let term_weight = term_query.specialized_weight(EnableScoring::Enabled(searcher))?;
for reader in searcher.segment_readers() {
let mut block_max_scores = vec![];
let mut block_max_scores_b = vec![];