mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
Compare commits
3 Commits
warming
...
audunhalla
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
548129fc6d | ||
|
|
9f04f42b64 | ||
|
|
aeb8ae3ef0 |
@@ -15,6 +15,7 @@ use tantivy_fst::Automaton;
|
|||||||
pub struct AutomatonWeight<A> {
|
pub struct AutomatonWeight<A> {
|
||||||
field: Field,
|
field: Field,
|
||||||
automaton: Arc<A>,
|
automaton: Arc<A>,
|
||||||
|
boost: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> AutomatonWeight<A>
|
impl<A> AutomatonWeight<A>
|
||||||
@@ -26,9 +27,15 @@ where
|
|||||||
AutomatonWeight {
|
AutomatonWeight {
|
||||||
field,
|
field,
|
||||||
automaton: automaton.into(),
|
automaton: automaton.into(),
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Boost the scorer by the given factor.
|
||||||
|
pub fn boost_by(self, boost: f32) -> Self {
|
||||||
|
Self { boost, ..self }
|
||||||
|
}
|
||||||
|
|
||||||
fn automaton_stream<'a>(&'a self, term_dict: &'a TermDictionary) -> TermStreamer<'a, &'a A> {
|
fn automaton_stream<'a>(&'a self, term_dict: &'a TermDictionary) -> TermStreamer<'a, &'a A> {
|
||||||
let automaton: &A = &*self.automaton;
|
let automaton: &A = &*self.automaton;
|
||||||
let term_stream_builder = term_dict.search(automaton);
|
let term_stream_builder = term_dict.search(automaton);
|
||||||
@@ -58,7 +65,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let doc_bitset = BitSetDocSet::from(doc_bitset);
|
let doc_bitset = BitSetDocSet::from(doc_bitset);
|
||||||
Ok(Box::new(ConstScorer::new(doc_bitset)))
|
Ok(Box::new(ConstScorer::with_score(doc_bitset, self.boost)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ pub struct BM25Weight {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl BM25Weight {
|
impl BM25Weight {
|
||||||
pub fn for_terms(searcher: &Searcher, terms: &[Term]) -> BM25Weight {
|
pub fn for_terms(searcher: &Searcher, terms: &[Term], boost: f32) -> BM25Weight {
|
||||||
assert!(!terms.is_empty(), "BM25 requires at least one term");
|
assert!(!terms.is_empty(), "BM25 requires at least one term");
|
||||||
let field = terms[0].field();
|
let field = terms[0].field();
|
||||||
for term in &terms[1..] {
|
for term in &terms[1..] {
|
||||||
@@ -75,11 +75,11 @@ impl BM25Weight {
|
|||||||
.sum::<f32>();
|
.sum::<f32>();
|
||||||
idf_explain = Explanation::new("idf", idf);
|
idf_explain = Explanation::new("idf", idf);
|
||||||
}
|
}
|
||||||
BM25Weight::new(idf_explain, average_fieldnorm)
|
BM25Weight::new(idf_explain, average_fieldnorm, boost)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new(idf_explain: Explanation, average_fieldnorm: f32) -> BM25Weight {
|
fn new(idf_explain: Explanation, average_fieldnorm: f32, boost: f32) -> BM25Weight {
|
||||||
let weight = idf_explain.value() * (1f32 + K1);
|
let weight = idf_explain.value() * (1f32 + K1) * boost;
|
||||||
BM25Weight {
|
BM25Weight {
|
||||||
idf_explain,
|
idf_explain,
|
||||||
weight,
|
weight,
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ pub struct FuzzyTermQuery {
|
|||||||
transposition_cost_one: bool,
|
transposition_cost_one: bool,
|
||||||
///
|
///
|
||||||
prefix: bool,
|
prefix: bool,
|
||||||
|
boost: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FuzzyTermQuery {
|
impl FuzzyTermQuery {
|
||||||
@@ -89,6 +90,7 @@ impl FuzzyTermQuery {
|
|||||||
distance,
|
distance,
|
||||||
transposition_cost_one,
|
transposition_cost_one,
|
||||||
prefix: false,
|
prefix: false,
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,16 +101,22 @@ impl FuzzyTermQuery {
|
|||||||
distance,
|
distance,
|
||||||
transposition_cost_one,
|
transposition_cost_one,
|
||||||
prefix: true,
|
prefix: true,
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Boost the query score by the given factor.
|
||||||
|
pub fn boost_by(self, boost: f32) -> Self {
|
||||||
|
Self { boost, ..self }
|
||||||
|
}
|
||||||
|
|
||||||
fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
|
fn specialized_weight(&self) -> Result<AutomatonWeight<DFA>> {
|
||||||
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
||||||
match LEV_BUILDER.get(&(self.distance, false)) {
|
match LEV_BUILDER.get(&(self.distance, false)) {
|
||||||
// Unwrap the option and build the Ok(AutomatonWeight)
|
// Unwrap the option and build the Ok(AutomatonWeight)
|
||||||
Some(automaton_builder) => {
|
Some(automaton_builder) => {
|
||||||
let automaton = automaton_builder.build_dfa(self.term.text());
|
let automaton = automaton_builder.build_dfa(self.term.text());
|
||||||
Ok(AutomatonWeight::new(self.term.field(), automaton))
|
Ok(AutomatonWeight::new(self.term.field(), automaton).boost_by(self.boost))
|
||||||
}
|
}
|
||||||
None => Err(InvalidArgument(format!(
|
None => Err(InvalidArgument(format!(
|
||||||
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
|
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ use std::collections::BTreeSet;
|
|||||||
pub struct PhraseQuery {
|
pub struct PhraseQuery {
|
||||||
field: Field,
|
field: Field,
|
||||||
phrase_terms: Vec<(usize, Term)>,
|
phrase_terms: Vec<(usize, Term)>,
|
||||||
|
boost: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PhraseQuery {
|
impl PhraseQuery {
|
||||||
@@ -57,9 +58,15 @@ impl PhraseQuery {
|
|||||||
PhraseQuery {
|
PhraseQuery {
|
||||||
field,
|
field,
|
||||||
phrase_terms: terms,
|
phrase_terms: terms,
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Boost the query score by the given factor.
|
||||||
|
pub fn boost_by(self, boost: f32) -> Self {
|
||||||
|
Self { boost, ..self }
|
||||||
|
}
|
||||||
|
|
||||||
/// The `Field` this `PhraseQuery` is targeting.
|
/// The `Field` this `PhraseQuery` is targeting.
|
||||||
pub fn field(&self) -> Field {
|
pub fn field(&self) -> Field {
|
||||||
self.field
|
self.field
|
||||||
@@ -97,7 +104,7 @@ impl PhraseQuery {
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let terms = self.phrase_terms();
|
let terms = self.phrase_terms();
|
||||||
let bm25_weight = BM25Weight::for_terms(searcher, &terms);
|
let bm25_weight = BM25Weight::for_terms(searcher, &terms, self.boost);
|
||||||
Ok(PhraseWeight::new(
|
Ok(PhraseWeight::new(
|
||||||
self.phrase_terms.clone(),
|
self.phrase_terms.clone(),
|
||||||
bm25_weight,
|
bm25_weight,
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ use tantivy_fst::Regex;
|
|||||||
pub struct RegexQuery {
|
pub struct RegexQuery {
|
||||||
regex: Arc<Regex>,
|
regex: Arc<Regex>,
|
||||||
field: Field,
|
field: Field,
|
||||||
|
boost: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegexQuery {
|
impl RegexQuery {
|
||||||
@@ -69,11 +70,17 @@ impl RegexQuery {
|
|||||||
RegexQuery {
|
RegexQuery {
|
||||||
regex: regex.into(),
|
regex: regex.into(),
|
||||||
field,
|
field,
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Boost the query score by the given factor.
|
||||||
|
pub fn boost_by(self, boost: f32) -> Self {
|
||||||
|
Self { boost, ..self }
|
||||||
|
}
|
||||||
|
|
||||||
fn specialized_weight(&self) -> AutomatonWeight<Regex> {
|
fn specialized_weight(&self) -> AutomatonWeight<Regex> {
|
||||||
AutomatonWeight::new(self.field, self.regex.clone())
|
AutomatonWeight::new(self.field, self.regex.clone()).boost_by(self.boost)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,11 @@ impl<TDocSet: DocSet> ConstScorer<TDocSet> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new `ConstScorer` with a custom score value
|
||||||
|
pub fn with_score(docset: TDocSet, score: f32) -> ConstScorer<TDocSet> {
|
||||||
|
ConstScorer { docset, score }
|
||||||
|
}
|
||||||
|
|
||||||
/// Sets the constant score to a different value.
|
/// Sets the constant score to a different value.
|
||||||
pub fn set_score(&mut self, score: Score) {
|
pub fn set_score(&mut self, score: Score) {
|
||||||
self.score = score;
|
self.score = score;
|
||||||
|
|||||||
@@ -45,6 +45,35 @@ mod tests {
|
|||||||
assert_eq!(term_scorer.score(), 0.28768212);
|
assert_eq!(term_scorer.score(), 0.28768212);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_term_query_boost_by() {
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let text_field = schema_builder.add_text_field("text", STRING);
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
let index = Index::create_in_ram(schema);
|
||||||
|
{
|
||||||
|
// writing the segment
|
||||||
|
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||||
|
{
|
||||||
|
let doc = doc!(text_field => "a");
|
||||||
|
index_writer.add_document(doc);
|
||||||
|
}
|
||||||
|
assert!(index_writer.commit().is_ok());
|
||||||
|
}
|
||||||
|
let searcher = index.reader().unwrap().searcher();
|
||||||
|
let term_query = TermQuery::new(
|
||||||
|
Term::from_field_text(text_field, "a"),
|
||||||
|
IndexRecordOption::Basic,
|
||||||
|
)
|
||||||
|
.boost_by(42.0);
|
||||||
|
let term_weight = term_query.weight(&searcher, true).unwrap();
|
||||||
|
let segment_reader = searcher.segment_reader(0);
|
||||||
|
let mut term_scorer = term_weight.scorer(segment_reader).unwrap();
|
||||||
|
assert!(term_scorer.advance());
|
||||||
|
assert_eq!(term_scorer.doc(), 0);
|
||||||
|
assert_nearly_equals(0.28768212 * 42.0, term_scorer.score());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_term_weight() {
|
pub fn test_term_weight() {
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ use std::fmt;
|
|||||||
pub struct TermQuery {
|
pub struct TermQuery {
|
||||||
term: Term,
|
term: Term,
|
||||||
index_record_option: IndexRecordOption,
|
index_record_option: IndexRecordOption,
|
||||||
|
boost: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for TermQuery {
|
impl fmt::Debug for TermQuery {
|
||||||
@@ -75,9 +76,15 @@ impl TermQuery {
|
|||||||
TermQuery {
|
TermQuery {
|
||||||
term,
|
term,
|
||||||
index_record_option: segment_postings_options,
|
index_record_option: segment_postings_options,
|
||||||
|
boost: 1.0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Boost the query score by the given factor.
|
||||||
|
pub fn boost_by(self, boost: f32) -> Self {
|
||||||
|
Self { boost, ..self }
|
||||||
|
}
|
||||||
|
|
||||||
/// The `Term` this query is built out of.
|
/// The `Term` this query is built out of.
|
||||||
pub fn term(&self) -> &Term {
|
pub fn term(&self) -> &Term {
|
||||||
&self.term
|
&self.term
|
||||||
@@ -90,7 +97,7 @@ impl TermQuery {
|
|||||||
/// This is useful for optimization purpose.
|
/// This is useful for optimization purpose.
|
||||||
pub fn specialized_weight(&self, searcher: &Searcher, scoring_enabled: bool) -> TermWeight {
|
pub fn specialized_weight(&self, searcher: &Searcher, scoring_enabled: bool) -> TermWeight {
|
||||||
let term = self.term.clone();
|
let term = self.term.clone();
|
||||||
let bm25_weight = BM25Weight::for_terms(searcher, &[term]);
|
let bm25_weight = BM25Weight::for_terms(searcher, &[term], self.boost);
|
||||||
let index_record_option = if scoring_enabled {
|
let index_record_option = if scoring_enabled {
|
||||||
self.index_record_option
|
self.index_record_option
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user