mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
137 lines
4.6 KiB
Rust
137 lines
4.6 KiB
Rust
use super::PhraseWeight;
|
|
use crate::query::bm25::Bm25Weight;
|
|
use crate::query::{EnableScoring, Query, Weight};
|
|
use crate::schema::{Field, IndexRecordOption, Term};
|
|
|
|
/// `PhraseQuery` matches a specific sequence of words.
|
|
///
|
|
/// For instance the phrase query for `"part time"` will match
|
|
/// the sentence
|
|
///
|
|
/// **Alan just got a part time job.**
|
|
///
|
|
/// On the other hand it will not match the sentence.
|
|
///
|
|
/// **This is my favorite part of the job.**
|
|
///
|
|
/// [Slop](PhraseQuery::set_slop) allows leniency in term proximity
|
|
/// for some performance tradeof.
|
|
///
|
|
/// Using a `PhraseQuery` on a field requires positions
|
|
/// to be indexed for this field.
|
|
#[derive(Clone, Debug)]
|
|
pub struct PhraseQuery {
|
|
field: Field,
|
|
phrase_terms: Vec<(usize, Term)>,
|
|
slop: u32,
|
|
}
|
|
|
|
impl PhraseQuery {
|
|
/// Creates a new `PhraseQuery` given a list of terms.
|
|
///
|
|
/// There must be at least two terms, and all terms
|
|
/// must belong to the same field.
|
|
/// Offset for each term will be same as index in the Vector
|
|
pub fn new(terms: Vec<Term>) -> PhraseQuery {
|
|
let terms_with_offset = terms.into_iter().enumerate().collect();
|
|
PhraseQuery::new_with_offset(terms_with_offset)
|
|
}
|
|
|
|
/// Creates a new `PhraseQuery` given a list of terms and their offsets.
|
|
///
|
|
/// Can be used to provide custom offset for each term.
|
|
pub fn new_with_offset(terms: Vec<(usize, Term)>) -> PhraseQuery {
|
|
PhraseQuery::new_with_offset_and_slop(terms, 0)
|
|
}
|
|
|
|
/// Creates a new `PhraseQuery` given a list of terms, their offsets and a slop
|
|
pub fn new_with_offset_and_slop(mut terms: Vec<(usize, Term)>, slop: u32) -> PhraseQuery {
|
|
assert!(
|
|
terms.len() > 1,
|
|
"A phrase query is required to have strictly more than one term."
|
|
);
|
|
terms.sort_by_key(|&(offset, _)| offset);
|
|
let field = terms[0].1.field();
|
|
assert!(
|
|
terms[1..].iter().all(|term| term.1.field() == field),
|
|
"All terms from a phrase query must belong to the same field"
|
|
);
|
|
PhraseQuery {
|
|
field,
|
|
phrase_terms: terms,
|
|
slop,
|
|
}
|
|
}
|
|
|
|
/// Slop allowed for the phrase.
|
|
///
|
|
/// The query will match if its terms are separated by `slop` terms at most.
|
|
/// By default the slop is 0 meaning query terms need to be adjacent.
|
|
pub fn set_slop(&mut self, value: u32) {
|
|
self.slop = value;
|
|
}
|
|
|
|
/// The [`Field`] this `PhraseQuery` is targeting.
|
|
pub fn field(&self) -> Field {
|
|
self.field
|
|
}
|
|
|
|
/// `Term`s in the phrase without the associated offsets.
|
|
pub fn phrase_terms(&self) -> Vec<Term> {
|
|
self.phrase_terms
|
|
.iter()
|
|
.map(|(_, term)| term.clone())
|
|
.collect::<Vec<Term>>()
|
|
}
|
|
|
|
/// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`.
|
|
///
|
|
/// This function is the same as [`Query::weight()`] except it returns
|
|
/// a specialized type [`PhraseWeight`] instead of a Boxed trait.
|
|
pub(crate) fn phrase_weight(
|
|
&self,
|
|
enable_scoring: EnableScoring<'_>,
|
|
) -> crate::Result<PhraseWeight> {
|
|
let schema = enable_scoring.schema();
|
|
let field_entry = schema.get_field_entry(self.field);
|
|
let has_positions = field_entry
|
|
.field_type()
|
|
.get_index_record_option()
|
|
.map(IndexRecordOption::has_positions)
|
|
.unwrap_or(false);
|
|
if !has_positions {
|
|
let field_name = field_entry.name();
|
|
return Err(crate::TantivyError::SchemaError(format!(
|
|
"Applied phrase query on field {:?}, which does not have positions indexed",
|
|
field_name
|
|
)));
|
|
}
|
|
let terms = self.phrase_terms();
|
|
let bm25_weight_opt = match enable_scoring {
|
|
EnableScoring::Enabled(searcher) => Some(Bm25Weight::for_terms(searcher, &terms)?),
|
|
EnableScoring::Disabled(_) => None,
|
|
};
|
|
let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
|
|
if self.slop > 0 {
|
|
weight.slop(self.slop);
|
|
}
|
|
Ok(weight)
|
|
}
|
|
}
|
|
|
|
impl Query for PhraseQuery {
|
|
/// Create the weight associated with a query.
|
|
///
|
|
/// See [`Weight`].
|
|
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
|
|
let phrase_weight = self.phrase_weight(enable_scoring)?;
|
|
Ok(Box::new(phrase_weight))
|
|
}
|
|
|
|
fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
|
|
for (_, term) in &self.phrase_terms {
|
|
visitor(term, true);
|
|
}
|
|
}
|
|
}
|