mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 08:12:54 +00:00
Compare commits
2 Commits
agg_format
...
use_stats
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e07f1970ea | ||
|
|
78273bfb0d |
@@ -204,21 +204,23 @@ pub enum IntermediateAggregationResult {
|
|||||||
/// Holds the intermediate data for metric results
|
/// Holds the intermediate data for metric results
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub enum IntermediateMetricResult {
|
pub enum IntermediateMetricResult {
|
||||||
/// Average containing intermediate average data result
|
/// Intermediate average result
|
||||||
Average(IntermediateAverage),
|
Average(IntermediateAverage),
|
||||||
/// AverageData variant
|
/// Intermediate stats result
|
||||||
Stats(IntermediateStats),
|
Stats(IntermediateStats),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SegmentMetricResultCollector> for IntermediateMetricResult {
|
impl From<SegmentMetricResultCollector> for IntermediateMetricResult {
|
||||||
fn from(tree: SegmentMetricResultCollector) -> Self {
|
fn from(tree: SegmentMetricResultCollector) -> Self {
|
||||||
match tree {
|
match tree {
|
||||||
SegmentMetricResultCollector::Average(collector) => {
|
SegmentMetricResultCollector::Stats(collector) => match collector.collecting_for {
|
||||||
IntermediateMetricResult::Average(IntermediateAverage::from_collector(collector))
|
super::metric::SegmentStatsType::Stats => {
|
||||||
}
|
IntermediateMetricResult::Stats(collector.stats)
|
||||||
SegmentMetricResultCollector::Stats(collector) => {
|
}
|
||||||
IntermediateMetricResult::Stats(collector.stats)
|
super::metric::SegmentStatsType::Avg => IntermediateMetricResult::Average(
|
||||||
}
|
IntermediateAverage::from_collector(collector),
|
||||||
|
),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,8 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
use fastfield_codecs::Column;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::aggregation::f64_from_fastfield_u64;
|
use super::SegmentStatsCollector;
|
||||||
use crate::schema::Type;
|
|
||||||
use crate::DocId;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
/// A single-value metric aggregation that computes the average of numeric values that are
|
/// A single-value metric aggregation that computes the average of numeric values that are
|
||||||
@@ -36,51 +33,6 @@ impl AverageAggregation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq)]
|
|
||||||
pub(crate) struct SegmentAverageCollector {
|
|
||||||
pub data: IntermediateAverage,
|
|
||||||
field_type: Type,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Debug for SegmentAverageCollector {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
f.debug_struct("AverageCollector")
|
|
||||||
.field("data", &self.data)
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SegmentAverageCollector {
|
|
||||||
pub fn from_req(field_type: Type) -> Self {
|
|
||||||
Self {
|
|
||||||
field_type,
|
|
||||||
data: Default::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
|
|
||||||
let mut iter = doc.chunks_exact(4);
|
|
||||||
for docs in iter.by_ref() {
|
|
||||||
let val1 = field.get_val(docs[0]);
|
|
||||||
let val2 = field.get_val(docs[1]);
|
|
||||||
let val3 = field.get_val(docs[2]);
|
|
||||||
let val4 = field.get_val(docs[3]);
|
|
||||||
let val1 = f64_from_fastfield_u64(val1, &self.field_type);
|
|
||||||
let val2 = f64_from_fastfield_u64(val2, &self.field_type);
|
|
||||||
let val3 = f64_from_fastfield_u64(val3, &self.field_type);
|
|
||||||
let val4 = f64_from_fastfield_u64(val4, &self.field_type);
|
|
||||||
self.data.collect(val1);
|
|
||||||
self.data.collect(val2);
|
|
||||||
self.data.collect(val3);
|
|
||||||
self.data.collect(val4);
|
|
||||||
}
|
|
||||||
for &doc in iter.remainder() {
|
|
||||||
let val = field.get_val(doc);
|
|
||||||
let val = f64_from_fastfield_u64(val, &self.field_type);
|
|
||||||
self.data.collect(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Contains mergeable version of average data.
|
/// Contains mergeable version of average data.
|
||||||
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct IntermediateAverage {
|
pub struct IntermediateAverage {
|
||||||
@@ -89,8 +41,11 @@ pub struct IntermediateAverage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl IntermediateAverage {
|
impl IntermediateAverage {
|
||||||
pub(crate) fn from_collector(collector: SegmentAverageCollector) -> Self {
|
pub(crate) fn from_collector(collector: SegmentStatsCollector) -> Self {
|
||||||
collector.data
|
Self {
|
||||||
|
sum: collector.stats.sum,
|
||||||
|
doc_count: collector.stats.count,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Merge average data into this instance.
|
/// Merge average data into this instance.
|
||||||
@@ -106,9 +61,4 @@ impl IntermediateAverage {
|
|||||||
Some(self.sum / self.doc_count as f64)
|
Some(self.sum / self.doc_count as f64)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[inline]
|
|
||||||
fn collect(&mut self, val: f64) {
|
|
||||||
self.doc_count += 1;
|
|
||||||
self.sum += val;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ impl StatsAggregation {
|
|||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct Stats {
|
pub struct Stats {
|
||||||
/// The number of documents.
|
/// The number of documents.
|
||||||
pub count: usize,
|
pub count: u64,
|
||||||
/// The sum of the fast field values.
|
/// The sum of the fast field values.
|
||||||
pub sum: f64,
|
pub sum: f64,
|
||||||
/// The standard deviation of the fast field values. `None` for count == 0.
|
/// The standard deviation of the fast field values. `None` for count == 0.
|
||||||
@@ -73,11 +73,16 @@ impl Stats {
|
|||||||
/// `IntermediateStats` contains the mergeable version for stats.
|
/// `IntermediateStats` contains the mergeable version for stats.
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct IntermediateStats {
|
pub struct IntermediateStats {
|
||||||
count: usize,
|
/// the number of values
|
||||||
sum: f64,
|
pub count: u64,
|
||||||
squared_sum: f64,
|
/// the sum of the values
|
||||||
min: f64,
|
pub sum: f64,
|
||||||
max: f64,
|
/// the squared sum of the values
|
||||||
|
pub squared_sum: f64,
|
||||||
|
/// the min value of the values
|
||||||
|
pub min: f64,
|
||||||
|
/// the max value of the values
|
||||||
|
pub max: f64,
|
||||||
}
|
}
|
||||||
impl Default for IntermediateStats {
|
impl Default for IntermediateStats {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
@@ -150,17 +155,25 @@ impl IntermediateStats {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub(crate) enum SegmentStatsType {
|
||||||
|
Stats,
|
||||||
|
Avg,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub(crate) struct SegmentStatsCollector {
|
pub(crate) struct SegmentStatsCollector {
|
||||||
pub(crate) stats: IntermediateStats,
|
pub(crate) stats: IntermediateStats,
|
||||||
field_type: Type,
|
field_type: Type,
|
||||||
|
pub(crate) collecting_for: SegmentStatsType,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SegmentStatsCollector {
|
impl SegmentStatsCollector {
|
||||||
pub fn from_req(field_type: Type) -> Self {
|
pub fn from_req(field_type: Type, collecting_for: SegmentStatsType) -> Self {
|
||||||
Self {
|
Self {
|
||||||
field_type,
|
field_type,
|
||||||
stats: IntermediateStats::default(),
|
stats: IntermediateStats::default(),
|
||||||
|
collecting_for,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ use super::bucket::{SegmentHistogramCollector, SegmentRangeCollector, SegmentTer
|
|||||||
use super::collector::MAX_BUCKET_COUNT;
|
use super::collector::MAX_BUCKET_COUNT;
|
||||||
use super::intermediate_agg_result::{IntermediateAggregationResults, IntermediateBucketResult};
|
use super::intermediate_agg_result::{IntermediateAggregationResults, IntermediateBucketResult};
|
||||||
use super::metric::{
|
use super::metric::{
|
||||||
AverageAggregation, SegmentAverageCollector, SegmentStatsCollector, StatsAggregation,
|
AverageAggregation, SegmentStatsCollector, SegmentStatsType, StatsAggregation,
|
||||||
};
|
};
|
||||||
use super::VecWithNames;
|
use super::VecWithNames;
|
||||||
use crate::aggregation::agg_req::BucketAggregationType;
|
use crate::aggregation::agg_req::BucketAggregationType;
|
||||||
@@ -163,7 +163,6 @@ impl SegmentAggregationResultsCollector {
|
|||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub(crate) enum SegmentMetricResultCollector {
|
pub(crate) enum SegmentMetricResultCollector {
|
||||||
Average(SegmentAverageCollector),
|
|
||||||
Stats(SegmentStatsCollector),
|
Stats(SegmentStatsCollector),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -171,22 +170,19 @@ impl SegmentMetricResultCollector {
|
|||||||
pub fn from_req_and_validate(req: &MetricAggregationWithAccessor) -> crate::Result<Self> {
|
pub fn from_req_and_validate(req: &MetricAggregationWithAccessor) -> crate::Result<Self> {
|
||||||
match &req.metric {
|
match &req.metric {
|
||||||
MetricAggregation::Average(AverageAggregation { field: _ }) => {
|
MetricAggregation::Average(AverageAggregation { field: _ }) => {
|
||||||
Ok(SegmentMetricResultCollector::Average(
|
Ok(SegmentMetricResultCollector::Stats(
|
||||||
SegmentAverageCollector::from_req(req.field_type),
|
SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Avg),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
MetricAggregation::Stats(StatsAggregation { field: _ }) => {
|
MetricAggregation::Stats(StatsAggregation { field: _ }) => {
|
||||||
Ok(SegmentMetricResultCollector::Stats(
|
Ok(SegmentMetricResultCollector::Stats(
|
||||||
SegmentStatsCollector::from_req(req.field_type),
|
SegmentStatsCollector::from_req(req.field_type, SegmentStatsType::Stats),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn collect_block(&mut self, doc: &[DocId], metric: &MetricAggregationWithAccessor) {
|
pub(crate) fn collect_block(&mut self, doc: &[DocId], metric: &MetricAggregationWithAccessor) {
|
||||||
match self {
|
match self {
|
||||||
SegmentMetricResultCollector::Average(avg_collector) => {
|
|
||||||
avg_collector.collect_block(doc, &*metric.accessor);
|
|
||||||
}
|
|
||||||
SegmentMetricResultCollector::Stats(stats_collector) => {
|
SegmentMetricResultCollector::Stats(stats_collector) => {
|
||||||
stats_collector.collect_block(doc, &*metric.accessor);
|
stats_collector.collect_block(doc, &*metric.accessor);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user