reduce number of allocations (#2257)

* reduce number of allocations

Explanation makes up around 50% of all allocations (numbers not perf).
It's created during serialization but not called.

- Make Explanation optional in BM25
- Avoid allocations when using Explanation

* use Cow
This commit is contained in:
PSeitz
2023-11-16 13:47:36 +01:00
committed by GitHub
parent 9caab45136
commit 0aae31d7d7
6 changed files with 58 additions and 24 deletions

View File

@@ -355,7 +355,7 @@ impl<W: Write> PostingsSerializer<W> {
return;
}
self.bm25_weight = Some(Bm25Weight::for_one_term(
self.bm25_weight = Some(Bm25Weight::for_one_term_without_explain(
term_doc_freq as u64,
num_docs_in_segment,
self.avg_fieldnorm,

View File

@@ -77,7 +77,7 @@ pub struct Bm25Params {
/// A struct used for computing BM25 scores.
#[derive(Clone)]
pub struct Bm25Weight {
idf_explain: Explanation,
idf_explain: Option<Explanation>,
weight: Score,
cache: [Score; 256],
average_fieldnorm: Score,
@@ -147,11 +147,30 @@ impl Bm25Weight {
idf_explain.add_const("N, total number of docs", total_num_docs as Score);
Bm25Weight::new(idf_explain, avg_fieldnorm)
}
/// Construct a [Bm25Weight] for a single term.
/// This method does not carry the [Explanation] for the idf.
pub fn for_one_term_without_explain(
term_doc_freq: u64,
total_num_docs: u64,
avg_fieldnorm: Score,
) -> Bm25Weight {
let idf = idf(term_doc_freq, total_num_docs);
Bm25Weight::new_without_explain(idf, avg_fieldnorm)
}
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf_explain.value() * (1.0 + K1);
Bm25Weight {
idf_explain,
idf_explain: Some(idf_explain),
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
}
}
pub(crate) fn new_without_explain(idf: f32, average_fieldnorm: Score) -> Bm25Weight {
let weight = idf * (1.0 + K1);
Bm25Weight {
idf_explain: None,
weight,
cache: compute_tf_cache(average_fieldnorm),
average_fieldnorm,
@@ -202,7 +221,9 @@ impl Bm25Weight {
let mut explanation = Explanation::new("TermQuery, product of...", score);
explanation.add_detail(Explanation::new("(K1+1)", K1 + 1.0));
explanation.add_detail(self.idf_explain.clone());
if let Some(idf_explain) = &self.idf_explain {
explanation.add_detail(idf_explain.clone());
}
explanation.add_detail(tf_explanation);
explanation
}

View File

@@ -74,7 +74,8 @@ impl Weight for BoostWeight {
fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result<Explanation> {
let underlying_explanation = self.weight.explain(reader, doc)?;
let score = underlying_explanation.value() * self.boost;
let mut explanation = Explanation::new(format!("Boost x{} of ...", self.boost), score);
let mut explanation =
Explanation::new_with_string(format!("Boost x{} of ...", self.boost), score);
explanation.add_detail(underlying_explanation);
Ok(explanation)
}
@@ -151,7 +152,7 @@ mod tests {
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
assert_eq!(
explanation.to_pretty_json(),
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}"
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\"\n }\n ]\n}"
);
Ok(())
}

View File

@@ -164,11 +164,9 @@ mod tests {
"details": [
{
"value": 1.0,
"description": "AllQuery",
"context": []
"description": "AllQuery"
}
],
"context": []
]
}"#
);
Ok(())

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt;
use serde::Serialize;
@@ -16,12 +17,12 @@ pub(crate) fn does_not_match(doc: DocId) -> TantivyError {
#[derive(Clone, Serialize)]
pub struct Explanation {
value: Score,
description: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
details: Vec<Explanation>,
context: Vec<String>,
description: Cow<'static, str>,
#[serde(skip_serializing_if = "Option::is_none")]
details: Option<Vec<Explanation>>,
#[serde(skip_serializing_if = "Option::is_none")]
context: Option<Vec<String>>,
}
impl fmt::Debug for Explanation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Explanation({})", self.to_pretty_json())
@@ -30,12 +31,21 @@ impl fmt::Debug for Explanation {
impl Explanation {
/// Creates a new explanation object.
pub fn new<T: ToString>(description: T, value: Score) -> Explanation {
pub fn new_with_string(description: String, value: Score) -> Explanation {
Explanation {
value,
description: description.to_string(),
details: vec![],
context: vec![],
description: Cow::Owned(description),
details: None,
context: None,
}
}
/// Creates a new explanation object.
pub fn new(description: &'static str, value: Score) -> Explanation {
Explanation {
value,
description: Cow::Borrowed(description),
details: None,
context: None,
}
}
@@ -48,17 +58,21 @@ impl Explanation {
///
/// Details are treated as child of the current node.
pub fn add_detail(&mut self, child_explanation: Explanation) {
self.details.push(child_explanation);
self.details
.get_or_insert_with(Vec::new)
.push(child_explanation);
}
/// Adds some extra context to the explanation.
pub fn add_context(&mut self, context: String) {
self.context.push(context);
self.context.get_or_insert_with(Vec::new).push(context);
}
/// Shortcut for `self.details.push(Explanation::new(name, value));`
pub fn add_const<T: ToString>(&mut self, name: T, value: Score) {
self.details.push(Explanation::new(name, value));
pub fn add_const(&mut self, name: &'static str, value: Score) {
self.details
.get_or_insert_with(Vec::new)
.push(Explanation::new(name, value));
}
/// Returns an indented json representation of the explanation tree for debug usage.

View File

@@ -101,7 +101,7 @@ impl TermQuery {
..
} => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?,
EnableScoring::Disabled { .. } => {
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32)
Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
}
};
let scoring_enabled = enable_scoring.is_scoring_enabled();