mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
reduce number of allocations (#2257)
* reduce number of allocations Explanation makes up around 50% of all allocations (numbers not perf). It's created during serialization but not called. - Make Explanation optional in BM25 - Avoid allocations when using Explanation * use Cow
This commit is contained in:
@@ -355,7 +355,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
return;
|
||||
}
|
||||
|
||||
self.bm25_weight = Some(Bm25Weight::for_one_term(
|
||||
self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
|
||||
term_doc_freq as u64,
|
||||
num_docs_in_segment,
|
||||
self.avg_fieldnorm,
|
||||
|
||||
@@ -77,7 +77,7 @@ pub struct Bm25Params {
|
||||
/// A struct used for computing BM25 scores.
|
||||
#[derive(Clone)]
|
||||
pub struct Bm25Weight {
|
||||
idf_explain: Explanation,
|
||||
idf_explain: Option<Explanation>,
|
||||
weight: Score,
|
||||
cache: [Score; 256],
|
||||
average_fieldnorm: Score,
|
||||
@@ -147,11 +147,30 @@ impl Bm25Weight {
|
||||
idf_explain.add_const("N, total number of docs", total_num_docs as Score);
|
||||
Bm25Weight::new(idf_explain, avg_fieldnorm)
|
||||
}
|
||||
/// Construct a [Bm25Weight] for a single term.
|
||||
/// This method does not carry the [Explanation] for the idf.
|
||||
pub fn for_one_term_without_explain(
|
||||
term_doc_freq: u64,
|
||||
total_num_docs: u64,
|
||||
avg_fieldnorm: Score,
|
||||
) -> Bm25Weight {
|
||||
let idf = idf(term_doc_freq, total_num_docs);
|
||||
Bm25Weight::new_without_explain(idf, avg_fieldnorm)
|
||||
}
|
||||
|
||||
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight {
|
||||
let weight = idf_explain.value() * (1.0 + K1);
|
||||
Bm25Weight {
|
||||
idf_explain,
|
||||
idf_explain: Some(idf_explain),
|
||||
weight,
|
||||
cache: compute_tf_cache(average_fieldnorm),
|
||||
average_fieldnorm,
|
||||
}
|
||||
}
|
||||
pub(crate) fn new_without_explain(idf: f32, average_fieldnorm: Score) -> Bm25Weight {
|
||||
let weight = idf * (1.0 + K1);
|
||||
Bm25Weight {
|
||||
idf_explain: None,
|
||||
weight,
|
||||
cache: compute_tf_cache(average_fieldnorm),
|
||||
average_fieldnorm,
|
||||
@@ -202,7 +221,9 @@ impl Bm25Weight {
|
||||
|
||||
let mut explanation = Explanation::new("TermQuery, product of...", score);
|
||||
explanation.add_detail(Explanation::new("(K1+1)", K1 + 1.0));
|
||||
explanation.add_detail(self.idf_explain.clone());
|
||||
if let Some(idf_explain) = &self.idf_explain {
|
||||
explanation.add_detail(idf_explain.clone());
|
||||
}
|
||||
explanation.add_detail(tf_explanation);
|
||||
explanation
|
||||
}
|
||||
|
||||
@@ -74,7 +74,8 @@ impl Weight for BoostWeight {
|
||||
fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
|
||||
let underlying_explanation = self.weight.explain(reader, doc)?;
|
||||
let score = underlying_explanation.value() * self.boost;
|
||||
let mut explanation = Explanation::new(format!("Boost x{} of ...", self.boost), score);
|
||||
let mut explanation =
|
||||
Explanation::new_with_string(format!("Boost x{} of ...", self.boost), score);
|
||||
explanation.add_detail(underlying_explanation);
|
||||
Ok(explanation)
|
||||
}
|
||||
@@ -151,7 +152,7 @@ mod tests {
|
||||
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
|
||||
assert_eq!(
|
||||
explanation.to_pretty_json(),
|
||||
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}"
|
||||
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\"\n }\n ]\n}"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -164,11 +164,9 @@ mod tests {
|
||||
"details": [
|
||||
{
|
||||
"value": 1.0,
|
||||
"description": "AllQuery",
|
||||
"context": []
|
||||
"description": "AllQuery"
|
||||
}
|
||||
],
|
||||
"context": []
|
||||
]
|
||||
}"#
|
||||
);
|
||||
Ok(())
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
|
||||
use serde::Serialize;
|
||||
@@ -16,12 +17,12 @@ pub(crate) fn does_not_match(doc: DocId) -> TantivyError {
|
||||
#[derive(Clone, Serialize)]
|
||||
pub struct Explanation {
|
||||
value: Score,
|
||||
description: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
details: Vec<Explanation>,
|
||||
context: Vec<String>,
|
||||
description: Cow<'static, str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
details: Option<Vec<Explanation>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
context: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for Explanation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Explanation({})", self.to_pretty_json())
|
||||
@@ -30,12 +31,21 @@ impl fmt::Debug for Explanation {
|
||||
|
||||
impl Explanation {
|
||||
/// Creates a new explanation object.
|
||||
pub fn new<T: ToString>(description: T, value: Score) -> Explanation {
|
||||
pub fn new_with_string(description: String, value: Score) -> Explanation {
|
||||
Explanation {
|
||||
value,
|
||||
description: description.to_string(),
|
||||
details: vec![],
|
||||
context: vec![],
|
||||
description: Cow::Owned(description),
|
||||
details: None,
|
||||
context: None,
|
||||
}
|
||||
}
|
||||
/// Creates a new explanation object.
|
||||
pub fn new(description: &'static str, value: Score) -> Explanation {
|
||||
Explanation {
|
||||
value,
|
||||
description: Cow::Borrowed(description),
|
||||
details: None,
|
||||
context: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,17 +58,21 @@ impl Explanation {
|
||||
///
|
||||
/// Details are treated as child of the current node.
|
||||
pub fn add_detail(&mut self, child_explanation: Explanation) {
|
||||
self.details.push(child_explanation);
|
||||
self.details
|
||||
.get_or_insert_with(Vec::new)
|
||||
.push(child_explanation);
|
||||
}
|
||||
|
||||
/// Adds some extra context to the explanation.
|
||||
pub fn add_context(&mut self, context: String) {
|
||||
self.context.push(context);
|
||||
self.context.get_or_insert_with(Vec::new).push(context);
|
||||
}
|
||||
|
||||
/// Shortcut for `self.details.push(Explanation::new(name, value));`
|
||||
pub fn add_const<T: ToString>(&mut self, name: T, value: Score) {
|
||||
self.details.push(Explanation::new(name, value));
|
||||
pub fn add_const(&mut self, name: &'static str, value: Score) {
|
||||
self.details
|
||||
.get_or_insert_with(Vec::new)
|
||||
.push(Explanation::new(name, value));
|
||||
}
|
||||
|
||||
/// Returns an indented json representation of the explanation tree for debug usage.
|
||||
|
||||
@@ -101,7 +101,7 @@ impl TermQuery {
|
||||
..
|
||||
} => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?,
|
||||
EnableScoring::Disabled { .. } => {
|
||||
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32)
|
||||
Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
|
||||
}
|
||||
};
|
||||
let scoring_enabled = enable_scoring.is_scoring_enabled();
|
||||
|
||||
Reference in New Issue
Block a user