Files
tantivy/src/aggregation/metric/stats.rs
2023-11-19 15:15:53 +01:00

1862 lines
65 KiB
Rust

use std::fmt::Debug;
use columnar::ColumnType;
use serde::{Deserialize, Serialize};
use super::*;
use crate::aggregation::agg_req_with_accessor::{
AggregationWithAccessor, AggregationsWithAccessor,
};
use crate::aggregation::intermediate_agg_result::{
IntermediateAggregationResult, IntermediateAggregationResults, IntermediateMetricResult,
};
use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64};
use crate::{DocId, TantivyError};
/// A multi-value metric aggregation that computes a collection of statistics on numeric values that
/// are extracted from the aggregated documents.
/// See [`Stats`] for returned statistics.
///
/// # JSON Format
/// ```json
/// {
/// "stats": {
/// "field": "score"
/// }
/// }
/// ```
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct StatsAggregation {
/// The field name to compute the stats on.
pub field: String,
/// The missing parameter defines how documents that are missing a value should be treated.
/// By default they will be ignored but it is also possible to treat them as if they had a
/// value. Examples in JSON format:
/// { "field": "my_numbers", "missing": "10.0" }
#[serde(default)]
pub missing: Option<f64>,
}
impl StatsAggregation {
/// Creates a new [`StatsAggregation`] instance from a field name.
pub fn from_field_name(field_name: String) -> Self {
StatsAggregation {
field: field_name,
missing: None,
}
}
/// Returns the field name the aggregation is computed on.
pub fn field_name(&self) -> &str {
&self.field
}
}
/// A multi-value metric aggregation that computes a collection of extended statistics
/// on numeric values that are extracted
/// from the aggregated documents.
/// See [`ExtendedStats`] for returned statistics.
///
/// # JSON Format
/// ```json
/// {
/// "extended_stats": {
/// "field": "score"
/// }
/// }
/// ```
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ExtendedStatsAggregation {
/// The field name to compute the stats on.
pub field: String,
/// The missing parameter defines how documents that are missing a value should be treated.
/// By default they will be ignored but it is also possible to treat them as if they had a
/// value. Examples in JSON format:
/// { "field": "my_numbers", "missing": "10.0" }
#[serde(default)]
pub missing: Option<f64>,
/// The sigma parameter defines how standard_deviation_bound_are_calculated.
/// This can be a useful way to visualize variance of your data.
/// The default value is 2. Examples in JSON format:
/// { "field": "my_numbers", "sigma": "3.0" }
#[serde(default)]
pub sigma: Option<f64>,
}
impl ExtendedStatsAggregation {
/// Creates a new [`ExtendedStatsAggregation`] instance from a field name.
pub fn from_field_name(field_name: String) -> Self {
ExtendedStatsAggregation {
field: field_name,
missing: None,
sigma: None,
}
}
/// Returns the field name the aggregation is computed on.
pub fn field_name(&self) -> &str {
&self.field
}
}
/// Stats contains a collection of statistics.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Stats {
/// The number of documents.
pub count: u64,
/// The sum of the fast field values.
pub sum: f64,
/// The min value of the fast field values.
pub min: Option<f64>,
/// The max value of the fast field values.
pub max: Option<f64>,
/// The average of the fast field values. `None` if count equals zero.
pub avg: Option<f64>,
}
impl Stats {
pub(crate) fn get_value(&self, agg_property: &str) -> crate::Result<Option<f64>> {
match agg_property {
"count" => Ok(Some(self.count as f64)),
"sum" => Ok(Some(self.sum)),
"min" => Ok(self.min),
"max" => Ok(self.max),
"avg" => Ok(self.avg),
_ => Err(TantivyError::InvalidArgument(format!(
"Unknown property {agg_property} on stats metric aggregation"
))),
}
}
}
/// Extended stats contains a collection of statistics
/// they extends stats adding variance, standard deviation
/// and bound informations
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ExtendedStats {
/// The number of documents.
pub count: u64,
/// The sum of the fast field values.
pub sum: f64,
/// The min value of the fast field values.
pub min: Option<f64>,
/// The max value of the fast field values.
pub max: Option<f64>,
/// The average of the fast field values. `None` if count equals zero.
pub avg: Option<f64>,
/// The sum of squares of the fast field values. `None` if count equals zero.
pub sum_of_squares: Option<f64>,
/// The variance of the fast field values. `None` if count is less then 2.
pub variance: Option<f64>,
/// The variance population of the fast field values, always equal to variance. `None` if count
/// is less then 2.
pub variance_population: Option<f64>,
/// The variance sampling of the fast field values, always equal to variance. `None` if count
/// is less then 2.
pub variance_sampling: Option<f64>,
/// The standard deviation of the fast field values. `None` if count is less then 2.
pub std_deviation: Option<f64>,
/// The standard deviation of the fast field values, always equal to variance. `None` if count
/// is less then 2.
pub std_deviation_population: Option<f64>,
/// The standard deviation sampling of the fast field values. `None`
/// if count is less then 2.
pub std_deviation_sampling: Option<f64>,
/// The standard deviation bounds of the fast field values, always equal to variance. `None`
/// if count is less then 2.
pub std_deviation_bounds: Option<StandardDeviationBounds>,
}
/// A sub struct for ExtendedStat containing deviation bounds
/// the values depend on sigma and represent
/// the bounds from the average with a distance of
/// std_deviation*sigma
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct StandardDeviationBounds {
/// upper bound -> avg + std_dev*sigma
pub upper: f64,
/// lower bound -> avg - std_dev*sigma
pub lower: f64,
/// upper bound sampling -> avg + std_dev_sampling*sigma
pub upper_sampling: f64,
/// lower bound sampling -> avg - std_dev_sampling*sigma
pub lower_sampling: f64,
/// same as upper
pub upper_population: f64,
/// same as lower
pub lower_population: f64,
}
impl ExtendedStats {
pub(crate) fn get_value(&self, agg_property: &str) -> crate::Result<Option<f64>> {
match agg_property {
"count" => Ok(Some(self.count as f64)),
"sum" => Ok(Some(self.sum)),
"min" => Ok(self.min),
"max" => Ok(self.max),
"avg" => Ok(self.avg),
"variance" => Ok(self.variance),
"variance_sampling" => Ok(self.variance_sampling),
"variance_population" => Ok(self.variance_population),
"sum_of_squares" => Ok(self.sum_of_squares),
"std_deviation" => Ok(self.std_deviation),
"std_deviation_sampling" => Ok(self.std_deviation_sampling),
"std_deviation_population" => Ok(self.std_deviation_population),
"std_deviation_bounds.lower" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.lower)),
"std_deviation_bounds.lower_population" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.lower_population)),
"std_deviation_bounds.lower_sampling" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.lower_sampling)),
"std_deviation_bounds.upper" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.upper)),
"std_deviation_bounds.upper_population" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.upper_population)),
"std_deviation_bounds.upper_sampling" => Ok(self
.std_deviation_bounds
.as_ref()
.map(|bounds| bounds.upper_sampling)),
_ => Err(TantivyError::InvalidArgument(format!(
"Unknown property {agg_property} on stats metric aggregation"
))),
}
}
}
/// Intermediate result of the stats aggregation that can be combined with other intermediate
/// results.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateStats {
/// The number of extracted values.
count: u64,
/// The sum of the extracted values.
sum: f64,
/// delta for sum needed for [Kahan algorithm for summation](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
delta: f64,
/// The min value.
min: f64,
/// The max value.
max: f64,
}
impl Default for IntermediateStats {
fn default() -> Self {
Self {
count: 0,
sum: 0.0,
delta: 0.0,
min: f64::MAX,
max: f64::MIN,
}
}
}
impl IntermediateStats {
/// Merges the other stats intermediate result into self.
pub fn merge_fruits(&mut self, other: IntermediateStats) {
self.count += other.count;
self.sum += other.sum;
self.delta += other.delta;
self.min = self.min.min(other.min);
self.max = self.max.max(other.max);
}
/// Computes the final stats value.
pub fn finalize(&self) -> Stats {
let min = if self.count == 0 {
None
} else {
Some(self.min)
};
let max = if self.count == 0 {
None
} else {
Some(self.max)
};
let avg = if self.count == 0 {
None
} else {
Some(self.sum / (self.count as f64))
};
Stats {
count: self.count,
sum: self.sum,
min,
max,
avg,
}
}
#[inline]
fn collect(&mut self, value: f64) {
self.count += 1;
// kahan algorithm for sum
let y = value - self.delta;
let t = self.sum + y;
self.delta = (t - self.sum) - y;
self.sum = t;
self.min = self.min.min(value);
self.max = self.max.max(value);
}
}
/// Intermediate result of the extended stats aggregation that can be combined with other
/// intermediate results.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateExtendedStats {
intermediate_stats: IntermediateStats,
/// The number of extracted values.
// The sum of square values, it's referred as M2 in Welford's online algorithm
sum_of_squares: f64,
// The sum of square values as computed by elastic search
sum_of_squares_elastic: f64,
/// delta for sum of squares as computed by elastic search needed for the Kahan algorithm
delta_sum_for_squares_elastic: f64,
// The mean an intermediate value need for calculating the variance
// as per [Welford's online algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm)
mean: f64,
// the value used for computing standard deviation bounds
sigma: f64,
}
impl Default for IntermediateExtendedStats {
fn default() -> Self {
Self {
intermediate_stats: IntermediateStats::default(),
sum_of_squares: 0.0,
sum_of_squares_elastic: 0.0,
delta_sum_for_squares_elastic: 0.0,
mean: 0.0,
sigma: 2.0,
}
}
}
impl IntermediateExtendedStats {
/// Creates a new IntermediateExtendedStats using an option
/// containing the sigma to be used for calculating bound values.
pub fn with_sigma(sigma: Option<f64>) -> Self {
Self {
intermediate_stats: IntermediateStats::default(),
sum_of_squares: 0.0,
sum_of_squares_elastic: 0.0,
delta_sum_for_squares_elastic: 0.0,
mean: 0.0,
sigma: sigma.unwrap_or(2.0),
}
}
/// Merges the other stats intermediate result into self.
pub fn merge_fruits(&mut self, other: IntermediateExtendedStats) {
if other.intermediate_stats.count != 0 {
if self.intermediate_stats.count == 0 {
self.sum_of_squares = other.sum_of_squares;
self.sum_of_squares_elastic = other.sum_of_squares_elastic;
self.intermediate_stats = other.intermediate_stats;
self.mean = other.mean;
self.delta_sum_for_squares_elastic = other.delta_sum_for_squares_elastic
} else {
if other.intermediate_stats.count == 1 {
self.collect(other.intermediate_stats.sum);
} else if self.intermediate_stats.count == 1 {
let sum = self.intermediate_stats.sum;
self.intermediate_stats = other.intermediate_stats;
self.sum_of_squares = other.sum_of_squares;
self.sum_of_squares_elastic = other.sum_of_squares_elastic;
self.mean = other.mean;
self.delta_sum_for_squares_elastic = other.delta_sum_for_squares_elastic;
self.collect(sum);
} else {
// parallel version of Welford's online algorithm
// the mean is computed using sum and count because
// it's more precise (and sum is already available)
let new_count = self.intermediate_stats.count + other.intermediate_stats.count;
let delta = other.mean - self.mean;
self.sum_of_squares += other.sum_of_squares
+ delta
* delta
* self.intermediate_stats.count as f64
* other.intermediate_stats.count as f64
/ new_count as f64;
self.intermediate_stats.count = new_count;
// self.mean=self.mean + delta*other.count as f64/new_count as f64;
self.mean = (self.intermediate_stats.sum as f64
+ other.intermediate_stats.sum as f64)
/ new_count as f64;
self.intermediate_stats.sum += other.intermediate_stats.sum;
self.intermediate_stats.delta += other.intermediate_stats.delta;
self.sum_of_squares_elastic += other.sum_of_squares_elastic;
self.delta_sum_for_squares_elastic += other.delta_sum_for_squares_elastic
}
}
}
}
/// Computes the final stats value.
pub fn finalize(&self) -> ExtendedStats {
let min = if self.intermediate_stats.count == 0 {
None
} else {
Some(self.intermediate_stats.min)
};
let max = if self.intermediate_stats.count == 0 {
None
} else {
Some(self.intermediate_stats.max)
};
let avg = if self.intermediate_stats.count == 0 {
None
} else {
Some(self.mean)
};
let sum_of_squares = if self.intermediate_stats.count == 0 {
None
} else {
Some(self.sum_of_squares_elastic)
};
let variance = if self.intermediate_stats.count <= 1 {
None
} else {
Some(self.sum_of_squares / self.intermediate_stats.count as f64)
};
let variance_sampling = if self.intermediate_stats.count <= 1 {
None
} else {
Some(self.sum_of_squares / (self.intermediate_stats.count - 1) as f64)
};
let std_deviation = variance.map(|v| v.sqrt());
let std_deviation_sampling = variance_sampling.map(|v| v.sqrt());
let std_deviation_bounds = if std_deviation.is_none() {
None
} else {
let upper = self.mean + std_deviation.unwrap() * self.sigma;
let lower = self.mean - std_deviation.unwrap() * self.sigma;
let upper_sampling = self.mean + std_deviation_sampling.unwrap() * self.sigma;
let lower_sampling = self.mean - std_deviation_sampling.unwrap() * self.sigma;
Some(StandardDeviationBounds {
upper,
lower,
upper_sampling,
lower_sampling,
upper_population: upper,
lower_population: lower,
})
};
ExtendedStats {
count: self.intermediate_stats.count,
sum: self.intermediate_stats.sum,
min,
max,
avg,
sum_of_squares,
variance,
variance_population: variance,
variance_sampling,
std_deviation,
std_deviation_population: std_deviation,
std_deviation_sampling,
std_deviation_bounds,
}
}
fn collect(&mut self, value: f64) {
self.intermediate_stats.collect(value);
// kahan algorithm for sum_of_squares_elastic
let y = value * value - self.delta_sum_for_squares_elastic;
let t = self.sum_of_squares_elastic + y;
self.delta_sum_for_squares_elastic = (t - self.sum_of_squares_elastic) - y;
self.sum_of_squares_elastic = t;
self.update_variance(value);
}
fn update_variance(&mut self, value: f64) {
let delta = value - self.mean;
// this is not what the Welford's online algorithm prescribes but
// using the pseudo code from wikipedia there was a small rounding
// error (in 15th decimal place) that caused a test
//(test_aggregation_level1 in agg_test.rs)
// failure
self.mean = self.intermediate_stats.sum / self.intermediate_stats.count as f64;
// self.mean += delta / self.count as f64;
let delta2 = value - self.mean;
self.sum_of_squares += delta * delta2;
}
}
/// trait implemented by IntermediateStats and
/// IntermediateExtendedStats that allows the
/// usage of a single [SegmentStatsCollector]
pub trait IntermediateInnerCollector {
/// collects the single field value to form
/// the statistic
fn collect(&mut self, val: f64);
/// transforms the intermediate stat
/// into a [IntermediateMetricResult]
fn into_intermediate_metric_result(self) -> IntermediateMetricResult;
}
/// Intermediate struct used by [SegmentStatsCollector]
/// for calculating Stats and creating [IntermediateMetricResult]
#[derive(Clone, Debug, PartialEq)]
pub struct IntermediateInnerStatsCollector {
stats: IntermediateStats,
collecting_for: SegmentStatsType,
}
impl IntermediateInnerStatsCollector {
pub(crate) fn from(collecting_for: SegmentStatsType) -> Self {
IntermediateInnerStatsCollector {
stats: IntermediateStats::default(),
collecting_for: collecting_for,
}
}
}
impl IntermediateInnerCollector for IntermediateInnerStatsCollector {
#[inline]
fn collect(&mut self, value: f64) {
self.stats.collect(value);
}
fn into_intermediate_metric_result(self) -> IntermediateMetricResult {
match self.collecting_for {
SegmentStatsType::Average => {
IntermediateMetricResult::Average(IntermediateAverage::from_stats(self.stats))
}
SegmentStatsType::Count => {
IntermediateMetricResult::Count(IntermediateCount::from_stats(self.stats))
}
SegmentStatsType::Max => {
IntermediateMetricResult::Max(IntermediateMax::from_stats(self.stats))
}
SegmentStatsType::Min => {
IntermediateMetricResult::Min(IntermediateMin::from_stats(self.stats))
}
SegmentStatsType::Stats => IntermediateMetricResult::Stats(self.stats),
SegmentStatsType::ExtendedStats => {
panic!("cannot create IntermediateMetricResult for ExtendStats from Stats");
}
SegmentStatsType::Sum => {
IntermediateMetricResult::Sum(IntermediateSum::from_stats(self.stats))
}
}
}
}
/// Intermediate struct used by [SegmentStatsCollector]
/// for calculating ExtendedStats and creating [IntermediateMetricResult]
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct IntermediateInnerExtendedStatsCollector {
stats: IntermediateExtendedStats,
collecting_for: SegmentStatsType,
}
impl IntermediateInnerExtendedStatsCollector {
pub fn from(collecting_for: SegmentStatsType, sigma: Option<f64>) -> Self {
IntermediateInnerExtendedStatsCollector {
stats: IntermediateExtendedStats::with_sigma(sigma),
collecting_for,
}
}
}
impl IntermediateInnerCollector for IntermediateInnerExtendedStatsCollector {
#[inline]
fn collect(&mut self, value: f64) {
self.stats.collect(value);
}
fn into_intermediate_metric_result(self) -> IntermediateMetricResult {
match self.collecting_for {
SegmentStatsType::ExtendedStats => IntermediateMetricResult::ExtendedStats(self.stats),
_ => {
panic!(
"cannot create IntermediateMetricResult for Stats/Min/Avg/Count/Min/Max/Sum \
from ExtendedStats"
);
}
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum SegmentStatsType {
Average,
Count,
Max,
Min,
Stats,
ExtendedStats,
Sum,
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct SegmentStatsCollector<T: IntermediateInnerCollector> {
missing: Option<u64>,
field_type: ColumnType,
inner_intermediate_collector: T,
// pub(crate) collecting_for: SegmentStatsType,
// pub(crate) stats: IntermediateStatType,
pub(crate) accessor_idx: usize,
val_cache: Vec<u64>,
}
impl<T: IntermediateInnerCollector> SegmentStatsCollector<T> {
pub fn from_req(
field_type: ColumnType,
// collecting_for: SegmentStatsType,
inner_intermediate_collector: T,
accessor_idx: usize,
missing: Option<f64>,
) -> Self {
let missing = missing.and_then(|val| f64_to_fastfield_u64(val, &field_type));
// let stats= match collecting_for {
// SegmentStatsType::Average |
// SegmentStatsType::Count |
// SegmentStatsType::Max |
// SegmentStatsType::Min|
// SegmentStatsType::Stats |
// SegmentStatsType::Sum => {
// IntermediateStatType::Stat(IntermediateStats::default())
// }
// SegmentStatsType::ExtendedStats =>
// IntermediateStatType::ExtendedStat(IntermediateExtendedStats::with_sigma(sigma))
//
// };
Self {
field_type,
inner_intermediate_collector,
// collecting_for,
// stats: stats,
accessor_idx,
missing,
val_cache: Default::default(),
}
}
#[inline]
pub(crate) fn collect_block_with_field(
&mut self,
docs: &[DocId],
agg_accessor: &mut AggregationWithAccessor,
) {
if let Some(missing) = self.missing.as_ref() {
agg_accessor.column_block_accessor.fetch_block_with_missing(
docs,
&agg_accessor.accessor,
*missing,
);
} else {
agg_accessor
.column_block_accessor
.fetch_block(docs, &agg_accessor.accessor);
}
for val in agg_accessor.column_block_accessor.iter_vals() {
let val1 = f64_from_fastfield_u64(val, &self.field_type);
self.inner_intermediate_collector.collect(val1);
}
}
// fn collect(&mut self, val: f64) {
// match &mut self.stats {
// IntermediateStatType::Stat(intermediate_stats) => {
// intermediate_stats.collect(val);
// }
// IntermediateStatType::ExtendedStat(intermediate_extended_stats) => {
// intermediate_extended_stats.collect(val);
// }
// }
// }
//
// pub fn stats(self) -> IntermediateStats {
// match self.stats {
// IntermediateStatType::Stat(intermediate_stats) => {
// intermediate_stats
// }
// IntermediateStatType::ExtendedStat(intermediate_extended_stats) => {
// intermediate_extended_stats.intermediate_stats
// }
// }
// }
// pub fn extended_stats(self) -> IntermediateExtendedStats {
// match self.stats {
// IntermediateStatType::ExtendedStat(intermediate_extended_stats) => {
// intermediate_extended_stats
// }
// _ => {
// panic!("incompatible values between collecting_for and stats");
// }
// }
// }
}
impl<T: IntermediateInnerCollector + Debug + Clone + 'static> SegmentAggregationCollector
for SegmentStatsCollector<T>
{
#[inline]
fn add_intermediate_aggregation_result(
self: Box<Self>,
agg_with_accessor: &AggregationsWithAccessor,
results: &mut IntermediateAggregationResults,
) -> crate::Result<()> {
let name = agg_with_accessor.aggs.keys[self.accessor_idx].to_string();
let intermediate_metric_result = self
.inner_intermediate_collector
.into_intermediate_metric_result();
results.push(
name,
IntermediateAggregationResult::Metric(intermediate_metric_result),
)?;
Ok(())
}
#[inline]
fn collect(
&mut self,
doc: crate::DocId,
agg_with_accessor: &mut AggregationsWithAccessor,
) -> crate::Result<()> {
let field = &agg_with_accessor.aggs.values[self.accessor_idx].accessor;
if let Some(missing) = self.missing {
let mut has_val = false;
for val in field.values_for_doc(doc) {
let val1 = f64_from_fastfield_u64(val, &self.field_type);
self.inner_intermediate_collector.collect(val1);
has_val = true;
}
if !has_val {
self.inner_intermediate_collector
.collect(f64_from_fastfield_u64(missing, &self.field_type));
}
} else {
for val in field.values_for_doc(doc) {
let val1 = f64_from_fastfield_u64(val, &self.field_type);
self.inner_intermediate_collector.collect(val1);
}
}
Ok(())
}
#[inline]
fn collect_block(
&mut self,
docs: &[crate::DocId],
agg_with_accessor: &mut AggregationsWithAccessor,
) -> crate::Result<()> {
let field = &mut agg_with_accessor.aggs.values[self.accessor_idx];
self.collect_block_with_field(docs, field);
Ok(())
}
}
#[cfg(test)]
mod tests {
use serde_json::Value;
use crate::aggregation::agg_req::{Aggregation, Aggregations};
use crate::aggregation::agg_result::AggregationResults;
use crate::aggregation::metric::IntermediateExtendedStats;
use crate::aggregation::tests::{
exec_request_with_query, get_test_index_2_segments, get_test_index_from_values,
};
use crate::aggregation::AggregationCollector;
use crate::query::{AllQuery, TermQuery};
use crate::schema::{IndexRecordOption, Schema, FAST};
use crate::{assert_nearly_equals, Index, IndexWriter, Term};
const EPSILON_FOR_TEST: f64 = 0.00000000000002;
#[test]
fn test_aggregation_stats_empty_index() -> crate::Result<()> {
// test index without segments
let values = vec![];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"stats": {
"stats": {
"field": "score",
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
assert_eq!(
res["stats"],
json!({
"avg": Value::Null,
"count": 0,
"max": Value::Null,
"min": Value::Null,
"sum": 0.0
})
);
Ok(())
}
#[test]
fn test_aggregation_stats_simple() -> crate::Result<()> {
let values = vec![10.0];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"stats": {
"stats": {
"field": "score",
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
assert_eq!(
res["stats"],
json!({
"avg": 10.0,
"count": 1,
"max": 10.0,
"min": 10.0,
"sum": 10.0
})
);
Ok(())
}
#[test]
fn test_aggregation_stats() -> crate::Result<()> {
let index = get_test_index_2_segments(false)?;
let reader = index.reader()?;
let text_field = reader.searcher().schema().get_field("text").unwrap();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "cool"),
IndexRecordOption::Basic,
);
let range_agg: Aggregation = {
serde_json::from_value(json!({
"range": {
"field": "score",
"ranges": [ { "from": 3.0f64, "to": 7.0f64 }, { "from": 7.0f64, "to": 19.0f64 }, { "from": 19.0f64, "to": 20.0f64 } ]
},
"aggs": {
"stats": {
"stats": {
"field": "score"
}
}
}
}))
.unwrap()
};
let agg_req_1: Aggregations = serde_json::from_value(json!({
"stats_i64": {
"stats": {
"field": "score_i64",
},
},
"stats_f64": {
"stats": {
"field": "score_f64",
},
},
"stats": {
"stats": {
"field": "score",
},
},
"range": range_agg
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
assert_eq!(
res["stats"],
json!({
"avg": 12.142857142857142,
"count": 7,
"max": 44.0,
"min": 1.0,
"sum": 85.0
})
);
assert_eq!(
res["stats_i64"],
json!({
"avg": 12.142857142857142,
"count": 7,
"max": 44.0,
"min": 1.0,
"sum": 85.0
})
);
assert_eq!(
res["stats_f64"],
json!({
"avg": 12.214285714285714,
"count": 7,
"max": 44.5,
"min": 1.0,
"sum": 85.5
})
);
assert_eq!(
res["range"]["buckets"][2]["stats"],
json!({
"avg": 10.666666666666666,
"count": 3,
"max": 14.0,
"min": 7.0,
"sum": 32.0
})
);
assert_eq!(
res["range"]["buckets"][3]["stats"],
json!({
"avg": serde_json::Value::Null,
"count": 0,
"max": serde_json::Value::Null,
"min": serde_json::Value::Null,
"sum": 0.0,
})
);
Ok(())
}
#[test]
fn test_stats_json() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let json = schema_builder.add_json_field("json", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
// => Segment with empty json
index_writer.add_document(doc!()).unwrap();
index_writer.commit().unwrap();
// => Segment with json, but no field partially_empty
index_writer
.add_document(doc!(json => json!({"different_field": "blue"})))
.unwrap();
index_writer.commit().unwrap();
//// => Segment with field partially_empty
index_writer
.add_document(doc!(json => json!({"partially_empty": 10.0})))
.unwrap();
index_writer.add_document(doc!())?;
index_writer.commit().unwrap();
let agg_req: Aggregations = serde_json::from_value(json!({
"my_stats": {
"stats": {
"field": "json.partially_empty"
},
}
}))
.unwrap();
let res = exec_request_with_query(agg_req, &index, None)?;
assert_eq!(
res["my_stats"],
json!({
"avg": 10.0,
"count": 1,
"max": 10.0,
"min": 10.0,
"sum": 10.0
})
);
Ok(())
}
#[test]
fn test_stats_json_missing() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let json = schema_builder.add_json_field("json", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
// => Segment with empty json
index_writer.add_document(doc!()).unwrap();
index_writer.commit().unwrap();
// => Segment with json, but no field partially_empty
index_writer
.add_document(doc!(json => json!({"different_field": "blue"})))
.unwrap();
index_writer.commit().unwrap();
//// => Segment with field partially_empty
index_writer
.add_document(doc!(json => json!({"partially_empty": 10.0})))
.unwrap();
index_writer.add_document(doc!())?;
index_writer.commit().unwrap();
let agg_req: Aggregations = serde_json::from_value(json!({
"my_stats": {
"stats": {
"field": "json.partially_empty",
"missing": 0.0
},
}
}))
.unwrap();
let res = exec_request_with_query(agg_req, &index, None)?;
assert_eq!(
res["my_stats"],
json!({
"avg": 2.5,
"count": 4,
"max": 10.0,
"min": 0.0,
"sum": 10.0
})
);
Ok(())
}
#[test]
fn test_stats_json_missing_sub_agg() -> crate::Result<()> {
// This test verifies the `collect` method (in contrast to `collect_block`), which is
// called when the sub-aggregations are flushed.
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("texts", FAST);
let score_field_f64 = schema_builder.add_f64_field("score", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
{
let mut index_writer = index.writer_for_tests()?;
// writing the segment
index_writer.add_document(doc!(
score_field_f64 => 10.0f64,
text_field => "a"
))?;
index_writer.add_document(doc!(text_field => "a"))?;
index_writer.commit()?;
}
let agg_req: Aggregations = {
serde_json::from_value(json!({
"range_with_stats": {
"terms": {
"field": "texts"
},
"aggs": {
"my_stats": {
"stats": {
"field": "score",
"missing": 0.0
}
}
}
}
}))
.unwrap()
};
let res = exec_request_with_query(agg_req, &index, None)?;
assert_eq!(
res["range_with_stats"]["buckets"][0]["my_stats"]["count"],
2
);
assert_eq!(
res["range_with_stats"]["buckets"][0]["my_stats"]["min"],
0.0
);
assert_eq!(
res["range_with_stats"]["buckets"][0]["my_stats"]["avg"],
5.0
);
Ok(())
}
#[test]
fn test_aggregation_extended_stats_no_variance() -> crate::Result<()> {
let values = vec![1.0];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"my_stats": {
"extended_stats": {
"field": "score_f64",
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "count")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "min")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "max")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "avg")?
.unwrap(),
1.0
);
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_population")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_sampling")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_population")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_sampling")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_population")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_sampling")?
.is_none());
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum_of_squares")?
.unwrap(),
1.0
);
assert!(agg_res
.get_value_from_aggregation("my_stats", "variance_population")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "variance")?
.is_none());
assert!(agg_res
.get_value_from_aggregation("my_stats", "variance_sampling")?
.is_none());
Ok(())
}
#[test]
fn test_aggregation_extended_stats() -> crate::Result<()> {
let values = vec![1.0, 3.0, 4.0, 5.0, 8.0, 10.0];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"my_stats": {
"extended_stats": {
"field": "score_f64",
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
const EXPECTED_VARIANCE: f64 = 9.138888888888888;
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "count")?
.unwrap(),
6.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "min")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "max")?
.unwrap(),
10.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum")?
.unwrap(),
31.0
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "avg")?
.unwrap(),
5.166666666666667,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_population")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_sampling")?
.unwrap(),
3.311595788538611,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower")?
.unwrap(),
-0.8794523824056837,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_population")?
.unwrap(),
-0.8794523824056837,
0.00000000000001
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_sampling")?
.unwrap(),
-1.4565249104105549,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper")?
.unwrap(),
11.212785715739017,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_population")?
.unwrap(),
11.212785715739017,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_sampling")?
.unwrap(),
11.78985824374389,
EPSILON_FOR_TEST
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum_of_squares")?
.unwrap(),
215.0
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_population")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_sampling")?
.unwrap(),
10.966666666666663,
EPSILON_FOR_TEST
);
Ok(())
}
#[test]
fn test_aggregation_extended_stats_with_sigma() -> crate::Result<()> {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"my_stats": {
"extended_stats": {
"field": "score_f64",
"sigma": 1.5
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
const EXPECTED_VARIANCE: f64 = 2.9166666666666665;
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "count")?
.unwrap(),
6.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "min")?
.unwrap(),
1.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "max")?
.unwrap(),
6.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum")?
.unwrap(),
21.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "avg")?
.unwrap(),
3.5
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_population")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_sampling")?
.unwrap(),
1.8708286933869709,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower")?
.unwrap(),
0.9382623085101005,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_population")?
.unwrap(),
0.9382623085101005,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_sampling")?
.unwrap(),
0.6937569599195434,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper")?
.unwrap(),
6.061737691489899,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_population")?
.unwrap(),
6.061737691489899,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_sampling")?
.unwrap(),
6.3062430400804566,
EPSILON_FOR_TEST
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum_of_squares")?
.unwrap(),
91.0
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_population")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_sampling")?
.unwrap(),
3.5,
EPSILON_FOR_TEST
);
Ok(())
}
#[test]
fn test_aggregation_extended_stats_with_variance_similar_to_mean() -> crate::Result<()> {
let values = vec![50.01, 50.02, 50.01, 50.03, 50.01, 50.02];
let index = get_test_index_from_values(false, &values)?;
let agg_req_1: Aggregations = serde_json::from_value(json!({
"my_stats": {
"extended_stats": {
"field": "score_f64",
"sigma": 1.5
},
}
}))
.unwrap();
let collector = AggregationCollector::from_aggs(agg_req_1, Default::default());
let reader = index.reader()?;
let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
const EXPECTED_VARIANCE: f64 = 5.5555555555608854e-5;
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "count")?
.unwrap(),
6.0
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "min")?
.unwrap(),
50.01
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "max")?
.unwrap(),
50.03
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum")?
.unwrap(),
300.1
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "avg")?
.unwrap(),
50.01666666666667,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_population")?
.unwrap(),
EXPECTED_VARIANCE.sqrt(),
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_sampling")?
.unwrap(),
0.008164965809279263,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower")?
.unwrap(),
50.00548632677917,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_population")?
.unwrap(),
50.00548632677917,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.lower_sampling")?
.unwrap(),
50.00441921795275,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper")?
.unwrap(),
50.027847006554175,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_population")?
.unwrap(),
50.027847006554175,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "std_deviation_bounds.upper_sampling")?
.unwrap(),
50.028914115380594,
EPSILON_FOR_TEST
);
assert_eq!(
agg_res
.get_value_from_aggregation("my_stats", "sum_of_squares")?
.unwrap(),
15010.002
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_population")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance")?
.unwrap(),
EXPECTED_VARIANCE,
EPSILON_FOR_TEST
);
assert_nearly_equals!(
agg_res
.get_value_from_aggregation("my_stats", "variance_sampling")?
.unwrap(),
6.666666666670718e-5,
EPSILON_FOR_TEST
);
Ok(())
}
#[test]
fn extended_stat_zero_value() {
let intermediate_extend_stats = IntermediateExtendedStats::default();
let extended_stats = intermediate_extend_stats.finalize();
assert!(extended_stats.variance.is_none());
assert!(extended_stats.variance_population.is_none());
assert!(extended_stats.variance_sampling.is_none());
assert!(extended_stats.sum_of_squares.is_none());
assert!(extended_stats.std_deviation.is_none());
assert!(extended_stats.std_deviation_population.is_none());
assert!(extended_stats.std_deviation_sampling.is_none());
assert!(extended_stats.std_deviation_bounds.is_none());
}
#[test]
fn extended_stat_one_value() {
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.collect(1.0f64);
let extended_stats = intermediate_extend_stats.finalize();
assert!(extended_stats.variance.is_none());
assert!(extended_stats.variance_population.is_none());
assert!(extended_stats.variance_sampling.is_none());
assert!(extended_stats.std_deviation.is_none());
assert!(extended_stats.std_deviation_population.is_none());
assert!(extended_stats.std_deviation_sampling.is_none());
assert!(extended_stats.std_deviation_bounds.is_none());
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(1.0f64, sum_of_squares);
}
#[test]
fn extended_stat_multiple_values() {
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.collect(1.0f64);
intermediate_extend_stats.collect(3.0f64);
intermediate_extend_stats.collect(4.0f64);
intermediate_extend_stats.collect(5.0f64);
intermediate_extend_stats.collect(8.0f64);
intermediate_extend_stats.collect(10.0f64);
let extended_stats = intermediate_extend_stats.finalize();
let variance = extended_stats.variance.unwrap();
const EXPECTED_VARIANCE: f64 = 9.138888888888888;
assert_eq!(EXPECTED_VARIANCE, variance);
let variance_population = extended_stats.variance_population.unwrap();
assert_eq!(EXPECTED_VARIANCE, variance_population);
let variance_sampling = extended_stats.variance_sampling.unwrap();
assert_eq!(10.966666666666665f64, variance_sampling);
let std_deviation = extended_stats.std_deviation.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation);
let std_deviation_population = extended_stats.std_deviation_population.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation_population);
let std_deviation_sampling = extended_stats.std_deviation_sampling.unwrap();
assert_eq!(10.966666666666665f64.sqrt(), std_deviation_sampling);
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(215.0, sum_of_squares);
let avg = extended_stats.avg.unwrap();
assert_eq!(5.166666666666667, avg);
}
#[test]
fn merge_empty_with_one_value() {
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
let mut intermediate_extend_stats1 = IntermediateExtendedStats::default();
intermediate_extend_stats1.collect(1.0f64);
intermediate_extend_stats.merge_fruits(intermediate_extend_stats1);
let extended_stats = intermediate_extend_stats.finalize();
assert!(extended_stats.variance.is_none());
assert!(extended_stats.variance_population.is_none());
assert!(extended_stats.variance_sampling.is_none());
assert!(extended_stats.std_deviation.is_none());
assert!(extended_stats.std_deviation_population.is_none());
assert!(extended_stats.std_deviation_sampling.is_none());
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(1.0f64, sum_of_squares);
}
#[test]
fn merge_empty_with_multiple_values() {
let mut intermediate_extend_stats1 = IntermediateExtendedStats::default();
intermediate_extend_stats1.collect(1.0f64);
intermediate_extend_stats1.collect(2.0f64);
intermediate_extend_stats1.collect(3.0f64);
intermediate_extend_stats1.collect(4.0f64);
intermediate_extend_stats1.collect(5.0f64);
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.merge_fruits(intermediate_extend_stats1);
let extended_stats = intermediate_extend_stats.finalize();
const EXPECTED_VARIANCE: f64 = 2.0;
let variance = extended_stats.variance.unwrap();
assert_eq!(EXPECTED_VARIANCE, variance);
let variance_population = extended_stats.variance_population.unwrap();
assert_eq!(EXPECTED_VARIANCE, variance_population);
let variance_sampling = extended_stats.variance_sampling.unwrap();
assert_eq!(2.5f64, variance_sampling);
let std_deviation = extended_stats.std_deviation.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation);
let std_deviation_population = extended_stats.std_deviation_population.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation_population);
let std_deviation_sampling = extended_stats.std_deviation_sampling.unwrap();
assert_eq!(2.5f64.sqrt(), std_deviation_sampling);
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(55f64, sum_of_squares);
}
#[test]
fn merge_non_empty_extended_stats() {
let mut intermediate_extend_stats1 = IntermediateExtendedStats::default();
intermediate_extend_stats1.collect(3.0f64);
intermediate_extend_stats1.collect(4.0f64);
intermediate_extend_stats1.collect(5.0f64);
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.collect(1.0f64);
intermediate_extend_stats.collect(2.0f64);
intermediate_extend_stats.merge_fruits(intermediate_extend_stats1);
let extended_stats = intermediate_extend_stats.finalize();
let variance = extended_stats.variance.unwrap();
assert_eq!(2.0f64, variance);
let variance_population = extended_stats.variance_population.unwrap();
assert_eq!(2.0f64, variance_population);
let variance_sampling = extended_stats.variance_sampling.unwrap();
assert_eq!(2.5f64, variance_sampling);
let std_deviation = extended_stats.std_deviation.unwrap();
assert_eq!(2.0f64.sqrt(), std_deviation);
let std_deviation_population = extended_stats.std_deviation_population.unwrap();
assert_eq!(2.0f64.sqrt(), std_deviation_population);
let std_deviation_sampling = extended_stats.std_deviation_sampling.unwrap();
assert_eq!(2.5f64.sqrt(), std_deviation_sampling);
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(55f64, sum_of_squares);
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.collect(1.0f64);
intermediate_extend_stats.collect(3.0f64);
intermediate_extend_stats.collect(4.0f64);
let mut intermediate_extend_stats1 = IntermediateExtendedStats::default();
intermediate_extend_stats1.collect(5.0f64);
intermediate_extend_stats1.collect(8.0f64);
intermediate_extend_stats1.collect(10.0f64);
intermediate_extend_stats.merge_fruits(intermediate_extend_stats1);
let extended_stats = intermediate_extend_stats.finalize();
const EXPECTED_VARIANCE: f64 = 9.138888888888888;
let variance = extended_stats.variance.unwrap();
assert_eq!(EXPECTED_VARIANCE, variance);
let variance_population = extended_stats.variance_population.unwrap();
assert_eq!(EXPECTED_VARIANCE, variance_population);
let variance_sampling = extended_stats.variance_sampling.unwrap();
assert_eq!(10.966666666666665f64, variance_sampling);
let std_deviation = extended_stats.std_deviation.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation);
let std_deviation_population = extended_stats.std_deviation_population.unwrap();
assert_eq!(EXPECTED_VARIANCE.sqrt(), std_deviation_population);
let std_deviation_sampling = extended_stats.std_deviation_sampling.unwrap();
assert_eq!(10.966666666666665f64.sqrt(), std_deviation_sampling);
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(215f64, sum_of_squares);
let avg = extended_stats.avg.unwrap();
assert_eq!(5.166666666666667, avg);
}
#[test]
fn merge_and_then_collect_non_empty_extended_stats() {
let mut intermediate_extend_stats = IntermediateExtendedStats::default();
intermediate_extend_stats.collect(1.0f64);
intermediate_extend_stats.collect(3.0f64);
let mut intermediate_extend_stats1 = IntermediateExtendedStats::default();
intermediate_extend_stats1.collect(5.0f64);
intermediate_extend_stats1.collect(8.0f64);
intermediate_extend_stats1.collect(10.0f64);
intermediate_extend_stats.merge_fruits(intermediate_extend_stats1);
intermediate_extend_stats.collect(4.0f64);
let extended_stats = intermediate_extend_stats.finalize();
const EXPECTED_VARIANCE: f64 = 9.138888888888888;
let variance = extended_stats.variance.unwrap();
assert_nearly_equals!(EXPECTED_VARIANCE, variance, EPSILON_FOR_TEST);
let variance_population = extended_stats.variance_population.unwrap();
assert_nearly_equals!(EXPECTED_VARIANCE, variance_population, EPSILON_FOR_TEST);
let variance_sampling = extended_stats.variance_sampling.unwrap();
assert_nearly_equals!(10.966666666666665, variance_sampling, EPSILON_FOR_TEST);
let std_deviation = extended_stats.std_deviation.unwrap();
assert_nearly_equals!(EXPECTED_VARIANCE.sqrt(), std_deviation, EPSILON_FOR_TEST);
let std_deviation_population = extended_stats.std_deviation_population.unwrap();
assert_nearly_equals!(
EXPECTED_VARIANCE.sqrt(),
std_deviation_population,
EPSILON_FOR_TEST
);
let std_deviation_sampling = extended_stats.std_deviation_sampling.unwrap();
assert_nearly_equals!(
10.966666666666665_f64.sqrt(),
std_deviation_sampling,
EPSILON_FOR_TEST
);
let sum_of_squares = extended_stats.sum_of_squares.unwrap();
assert_eq!(215.0, sum_of_squares);
let avg = extended_stats.avg.unwrap();
assert_eq!(5.166666666666667, avg);
}
}