Compare commits

...

2 Commits

Author SHA1 Message Date
Pascal Seitz
e07f1970ea fix count type 2023-01-13 20:10:23 +08:00
Pascal Seitz
78273bfb0d reuse stats for average 2023-01-13 17:43:25 +08:00
4 changed files with 40 additions and 79 deletions

View File

@@ -204,21 +204,23 @@ pub enum IntermediateAggregationResult {
/// Holds the intermediate data for metric results
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum IntermediateMetricResult {
/// Average containing intermediate average data result
/// Intermediate average result
Average(IntermediateAverage),
/// AverageData variant
/// Intermediate stats result
Stats(IntermediateStats),
}
impl From<SegmentMetricResultCollector> for IntermediateMetricResult {
fn from(tree: SegmentMetricResultCollector) -> Self {
match tree {
SegmentMetricResultCollector::Average(collector) => {
IntermediateMetricResult::Average(IntermediateAverage::from_collector(collector))
}
SegmentMetricResultCollector::Stats(collector) => {
IntermediateMetricResult::Stats(collector.stats)
}
SegmentMetricResultCollector::Stats(collector) => match collector.collecting_for {
super::metric::SegmentStatsType::Stats => {
IntermediateMetricResult::Stats(collector.stats)
}
super::metric::SegmentStatsType::Avg => IntermediateMetricResult::Average(
IntermediateAverage::from_collector(collector),
),
},
}
}
}

View File

@@ -1,11 +1,8 @@
use std::fmt::Debug;
use fastfield_codecs::Column;
use serde::{Deserialize, Serialize};
use crate::aggregation::f64_from_fastfield_u64;
use crate::schema::Type;
use crate::DocId;
use super::SegmentStatsCollector;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
/// A single-value metric aggregation that computes the average of numeric values that are
@@ -36,51 +33,6 @@ impl AverageAggregation {
}
}
#[derive(Clone, PartialEq)]
pub(crate) struct SegmentAverageCollector {
pub data: IntermediateAverage,
field_type: Type,
}
impl Debug for SegmentAverageCollector {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("AverageCollector")
.field("data", &self.data)
.finish()
}
}
impl SegmentAverageCollector {
pub fn from_req(field_type: Type) -> Self {
Self {
field_type,
data: Default::default(),
}
}
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
let mut iter = doc.chunks_exact(4);
for docs in iter.by_ref() {
let val1 = field.get_val(docs[0]);
let val2 = field.get_val(docs[1]);
let val3 = field.get_val(docs[2]);
let val4 = field.get_val(docs[3]);
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
let val4 = f64_from_fastfield_u64(val4, &self.field_type);
self.data.collect(val1);
self.data.collect(val2);
self.data.collect(val3);
self.data.collect(val4);
}
for &doc in iter.remainder() {
let val = field.get_val(doc);
let val = f64_from_fastfield_u64(val, &self.field_type);
self.data.collect(val);
}
}
}
/// Contains mergeable version of average data.
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateAverage {
@@ -89,8 +41,11 @@ pub struct IntermediateAverage {
}
impl IntermediateAverage {
pub(crate) fn from_collector(collector: SegmentAverageCollector) -> Self {
collector.data
pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
Self {
sum: collector.stats.sum,
doc_count: collector.stats.count,
}
}
/// Merge average data into this instance.
@@ -106,9 +61,4 @@ impl IntermediateAverage {
Some(self.sum / self.doc_count as f64)
}
}
#[inline]
fn collect(&mut self, val: f64) {
self.doc_count += 1;
self.sum += val;
}
}

View File

@@ -40,7 +40,7 @@ impl StatsAggregation {
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Stats {
/// The number of documents.
pub count: usize,
pub count: u64,
/// The sum of the fast field values.
pub sum: f64,
/// The standard deviation of the fast field values. `None` for count == 0.
@@ -73,11 +73,16 @@ impl Stats {
/// `IntermediateStats` contains the mergeable version for stats.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateStats {
count: usize,
sum: f64,
squared_sum: f64,
min: f64,
max: f64,
/// the number of values
pub count: u64,
/// the sum of the values
pub sum: f64,
/// the squared sum of the values
pub squared_sum: f64,
/// the min value of the values
pub min: f64,
/// the max value of the values
pub max: f64,
}
impl Default for IntermediateStats {
fn default() -> Self {
@@ -150,17 +155,25 @@ impl IntermediateStats {
}
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum SegmentStatsType {
Stats,
Avg,
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct SegmentStatsCollector {
pub(crate) stats: IntermediateStats,
field_type: Type,
pub(crate) collecting_for: SegmentStatsType,
}
impl SegmentStatsCollector {
pub fn from_req(field_type: Type) -> Self {
pub fn from_req(field_type: Type, collecting_for: SegmentStatsType) -> Self {
Self {
field_type,
stats: IntermediateStats::default(),
collecting_for,
}
}
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {

View File

@@ -15,7 +15,7 @@ use super::bucket::{SegmentHistogramCollector, SegmentRangeCollector, SegmentTer
use super::collector::MAX_BUCKET_COUNT;
use super::intermediate_agg_result::{IntermediateAggregationResults, IntermediateBucketResult};
use super::metric::{
AverageAggregation, SegmentAverageCollector, SegmentStatsCollector, StatsAggregation,
AverageAggregation, SegmentStatsCollector, SegmentStatsType, StatsAggregation,
};
use super::VecWithNames;
use crate::aggregation::agg_req::BucketAggregationType;
@@ -163,7 +163,6 @@ impl SegmentAggregationResultsCollector {
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum SegmentMetricResultCollector {
Average(SegmentAverageCollector),
Stats(SegmentStatsCollector),
}
@@ -171,22 +170,19 @@ impl SegmentMetricResultCollector {
pub fn from_req_and_validate(req: &MetricAggregationWithAccessor) -> crate::Result<Self> {
match &req.metric {
MetricAggregation::Average(AverageAggregation { field: _ }) => {
Ok(SegmentMetricResultCollector::Average(
SegmentAverageCollector::from_req(req.field_type),
Ok(SegmentMetricResultCollector::Stats(
SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Avg),
))
}
MetricAggregation::Stats(StatsAggregation { field: _ }) => {
Ok(SegmentMetricResultCollector::Stats(
SegmentStatsCollector::from_req(req.field_type),
SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Stats),
))
}
}
}
pub(crate) fn collect_block(&mut self, doc: &[DocId], metric: &MetricAggregationWithAccessor) {
match self {
SegmentMetricResultCollector::Average(avg_collector) => {
avg_collector.collect_block(doc, &*metric.accessor);
}
SegmentMetricResultCollector::Stats(stats_collector) => {
stats_collector.collect_block(doc, &*metric.accessor);
}