From f745dbc054ce291b016dee23f02fe4af8f33be33 Mon Sep 17 00:00:00 2001 From: PSeitz Date: Tue, 20 Feb 2024 03:22:00 +0100 Subject: [PATCH] fix Clone for TopNComputer, add top_hits bench (#2315) * fix Clone for TopNComputer, add top_hits bench add top_hits agg bench test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_sub_agg ... bench: 123,475,175 ns/iter (+/- 30,608,889) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_sub_agg_multi ... bench: 194,170,414 ns/iter (+/- 36,495,516) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_sub_agg_opt ... bench: 179,742,809 ns/iter (+/- 29,976,507) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_sub_agg_sparse ... bench: 27,592,534 ns/iter (+/- 2,672,370) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_top_hits_agg ... bench: 552,851,227 ns/iter (+/- 71,975,886) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_top_hits_agg_multi ... bench: 558,616,384 ns/iter (+/- 100,890,124) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_top_hits_agg_opt ... bench: 554,031,368 ns/iter (+/- 165,452,650) test aggregation::agg_bench::bench::bench_aggregation_terms_many_with_top_hits_agg_sparse ... bench: 46,435,919 ns/iter (+/- 13,681,935) * add comment --- src/aggregation/agg_bench.rs | 35 ++++++++++++++++++++++++++++ src/collector/top_score_collector.rs | 24 +++++++++++++++++-- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/aggregation/agg_bench.rs b/src/aggregation/agg_bench.rs index ec534d994..84c0bb382 100644 --- a/src/aggregation/agg_bench.rs +++ b/src/aggregation/agg_bench.rs @@ -290,6 +290,41 @@ mod bench { }); } + bench_all_cardinalities!(bench_aggregation_terms_many_with_top_hits_agg); + + fn bench_aggregation_terms_many_with_top_hits_agg_card( + b: &mut Bencher, + cardinality: Cardinality, + ) { + let index = get_test_index_bench(cardinality).unwrap(); + let reader = index.reader().unwrap(); + + b.iter(|| { + let agg_req: Aggregations = serde_json::from_value(json!({ + "my_texts": { + "terms": { "field": "text_many_terms" }, + "aggs": { + "top_hits": { "top_hits": + { + "sort": [ + { "score": "desc" } + ], + "size": 2, + "doc_value_fields": ["score_f64"] + } + } + } + }, + })) + .unwrap(); + + let collector = get_collector(agg_req); + + let searcher = reader.searcher(); + searcher.search(&AllQuery, &collector).unwrap() + }); + } + bench_all_cardinalities!(bench_aggregation_terms_many_with_sub_agg); fn bench_aggregation_terms_many_with_sub_agg_card(b: &mut Bencher, cardinality: Cardinality) { diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 834428bdb..917b2c3f7 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -719,8 +719,12 @@ impl SegmentCollector for TopScoreSegmentCollector { /// Fast TopN Computation /// +/// Capacity of the vec is 2 * top_n. +/// The buffer is truncated to the top_n elements when it reaches the capacity of the Vec. +/// That means capacity has special meaning and should be carried over when cloning or serializing. +/// /// For TopN == 0, it will be relative expensive. -#[derive(Clone, Serialize, Deserialize)] +#[derive(Serialize, Deserialize)] #[serde(from = "TopNComputerDeser")] pub struct TopNComputer { /// The buffer reverses sort order to get top-semantics instead of bottom-semantics @@ -728,7 +732,7 @@ pub struct TopNComputer { top_n: usize, pub(crate) threshold: Option, } -// Intermediate struct for TopNComputer for deserialization, to fix vec capacity +// Intermediate struct for TopNComputer for deserialization, to keep vec capacity #[derive(Deserialize)] struct TopNComputerDeser { buffer: Vec>, @@ -736,6 +740,22 @@ struct TopNComputerDeser { threshold: Option, } +// Custom clone to keep capacity +impl Clone + for TopNComputer +{ + fn clone(&self) -> Self { + let mut buffer_clone = Vec::with_capacity(self.buffer.capacity()); + buffer_clone.extend(self.buffer.iter().cloned()); + + TopNComputer { + buffer: buffer_clone, + top_n: self.top_n, + threshold: self.threshold.clone(), + } + } +} + impl From> for TopNComputer { fn from(mut value: TopNComputerDeser) -> Self { let expected_cap = value.top_n.max(1) * 2;