From fdd5ef85e5abc3207a239bdbcbf6493f6ad5db63 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Tue, 22 Feb 2022 10:37:54 +0100 Subject: [PATCH] improve aggregation docs --- src/aggregation/agg_req.rs | 3 ++- src/aggregation/agg_result.rs | 28 ++++++++++++++++++++++++++++ src/aggregation/bucket/mod.rs | 5 ++++- src/aggregation/bucket/range.rs | 30 +++++++++++++++++++++++++----- src/aggregation/metric/average.rs | 9 +++++++++ src/aggregation/metric/mod.rs | 4 +++- src/aggregation/metric/stats.rs | 10 ++++++++++ src/aggregation/mod.rs | 17 +++++++++++++++-- 8 files changed, 96 insertions(+), 10 deletions(-) diff --git a/src/aggregation/agg_req.rs b/src/aggregation/agg_req.rs index 534c738d2..d686954ad 100644 --- a/src/aggregation/agg_req.rs +++ b/src/aggregation/agg_req.rs @@ -3,6 +3,7 @@ //! //! [Aggregations] is the top level entry point to create a request, which is a `HashMap`. +//! //! Requests are compatible with the json format of elasticsearch. //! //! # Example @@ -51,7 +52,7 @@ pub use super::bucket::RangeAggregation; use super::metric::{AverageAggregation, StatsAggregation}; /// The top-level aggregation request structure, which contains [Aggregation] and their user defined -/// names. +/// names. It is also used in [buckets](BucketAggregation) to define sub-aggregations. /// /// The key is the user defined name of the aggregation. pub type Aggregations = HashMap; diff --git a/src/aggregation/agg_result.rs b/src/aggregation/agg_result.rs index 584ec7f21..3d0ed20b2 100644 --- a/src/aggregation/agg_result.rs +++ b/src/aggregation/agg_result.rs @@ -112,6 +112,34 @@ impl From for BucketResult { /// This is the range entry for a bucket, which contains a key, count, and optionally /// sub_aggregations. +/// +/// # JSON Format +/// ```json +/// { +/// ... +/// "my_ranges": { +/// "buckets": [ +/// { +/// "key": "*-10", +/// "to": 10, +/// "doc_count": 5 +/// }, +/// { +/// "key": "10-20", +/// "from": 10, +/// "to": 20, +/// "doc_count": 2 +/// }, +/// { +/// "key": "20-*", +/// "from": 20, +/// "doc_count": 3 +/// } +/// ] +/// } +/// ... +/// } +/// ``` #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct RangeBucketEntry { /// The identifier of the bucket. diff --git a/src/aggregation/bucket/mod.rs b/src/aggregation/bucket/mod.rs index ac97393f9..e69d95be9 100644 --- a/src/aggregation/bucket/mod.rs +++ b/src/aggregation/bucket/mod.rs @@ -1,10 +1,13 @@ //! Module for all bucket aggregations. //! +//! BucketAggregations create buckets of documents +//! [BucketAggregation](super::agg_req::BucketAggregation). +//! //! Results of final buckets are [BucketResult](super::agg_result::BucketResult). //! Results of intermediate buckets are //! [IntermediateBucketResult](super::intermediate_agg_result::IntermediateBucketResult) mod range; -pub use range::RangeAggregation; pub(crate) use range::SegmentRangeCollector; +pub use range::*; diff --git a/src/aggregation/bucket/range.rs b/src/aggregation/bucket/range.rs index 4bceb70f4..8554d5fb8 100644 --- a/src/aggregation/bucket/range.rs +++ b/src/aggregation/bucket/range.rs @@ -18,18 +18,33 @@ use crate::{DocId, TantivyError}; /// Provide user-defined buckets to aggregate on. /// Two special buckets will automatically be created to cover the whole range of values. /// The provided buckets have to be continous. -/// During the aggregation, the values extracted from the fast_field `field_name` will be checked +/// During the aggregation, the values extracted from the fast_field `field` will be checked /// against each bucket range. Note that this aggregation includes the from value and excludes the /// to value for each range. /// /// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with -/// [BucketEntryKeyCount](crate::aggregation::agg_result::RangeBucketEntry) on the +/// [RangeBucketEntry](crate::aggregation::agg_result::RangeBucketEntry) on the /// AggregationCollector. /// /// Result type is /// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with /// [crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry] on the /// DistributedAggregationCollector. +/// +/// # Request JSON Format +/// ```json +/// { +/// "range": { +/// "field": "score", +/// "ranges": [ +/// { "to": 3.0 }, +/// { "from": 3.0, "to": 7.0 }, +/// { "from": 7.0, "to": 20.0 } +/// { "from": 20.0 } +/// ] +/// } +/// } +/// ``` #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct RangeAggregation { /// The field to aggregate on. @@ -40,9 +55,14 @@ pub struct RangeAggregation { } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +/// The range for one range bucket. pub struct RangeAggregationRange { + /// The from range value, which is inclusive in the range. + /// None equals to an open ended interval. #[serde(skip_serializing_if = "Option::is_none", default)] pub from: Option, + /// The to range value, which is not inclusive in the range. + /// None equals to an open ended interval. #[serde(skip_serializing_if = "Option::is_none", default)] pub to: Option, } @@ -64,7 +84,7 @@ impl From> for RangeAggregationRange { } #[derive(Clone, Debug, PartialEq)] -pub struct SegmentRangeAndBucketEntry { +pub(crate) struct SegmentRangeAndBucketEntry { range: Range, bucket: SegmentRangeBucketEntry, } @@ -274,7 +294,7 @@ fn extend_validate_ranges( Ok(converted_buckets) } -pub fn range_to_string(range: &Range, field_type: &Type) -> String { +pub(crate) fn range_to_string(range: &Range, field_type: &Type) -> String { // is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0, // it should be rendererd as "*-0" and not "*-*" let to_str = |val: u64, is_start: bool| { @@ -288,7 +308,7 @@ pub fn range_to_string(range: &Range, field_type: &Type) -> String { format!("{}-{}", to_str(range.start, true), to_str(range.end, false)) } -pub fn range_to_key(range: &Range, field_type: &Type) -> Key { +pub(crate) fn range_to_key(range: &Range, field_type: &Type) -> Key { Key::Str(range_to_string(range, field_type)) } diff --git a/src/aggregation/metric/average.rs b/src/aggregation/metric/average.rs index a83ae6530..2237e8b77 100644 --- a/src/aggregation/metric/average.rs +++ b/src/aggregation/metric/average.rs @@ -12,6 +12,15 @@ use crate::DocId; /// extracted from the aggregated documents. /// Supported field types are u64, i64, and f64. /// See [super::SingleMetricResult] for return value. +/// +/// # JSON Format +/// ```json +/// { +/// "avg": { +/// "field": "score", +/// } +/// } +/// ``` pub struct AverageAggregation { /// The field name to compute the stats on. pub field: String, diff --git a/src/aggregation/metric/mod.rs b/src/aggregation/metric/mod.rs index e7260ac30..4eaffcd5d 100644 --- a/src/aggregation/metric/mod.rs +++ b/src/aggregation/metric/mod.rs @@ -1,5 +1,7 @@ //! Module for all metric aggregations. - +//! +//! The aggregations in this family compute metrics, see [super::agg_req::MetricAggregation] for +//! details. mod average; mod stats; pub use average::*; diff --git a/src/aggregation/metric/stats.rs b/src/aggregation/metric/stats.rs index 90633510d..b6778e0cf 100644 --- a/src/aggregation/metric/stats.rs +++ b/src/aggregation/metric/stats.rs @@ -9,6 +9,16 @@ use crate::DocId; /// extracted from the aggregated documents. /// Supported field types are u64, i64, and f64. /// See [Stats] for returned statistics. +/// +/// # JSON Format +/// ```json +/// { +/// "stats": { +/// "field": "score", +/// } +/// } +/// ``` + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct StatsAggregation { /// The field name to compute the stats on. diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 493c0e2e8..ec8a6c1b0 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -1,7 +1,7 @@ //! # Aggregations //! //! -//! Aggregation summarizes your data as statistics on buckets or metrics. +//! An aggregation summarizes your data as statistics on buckets or metrics. //! //! Aggregations can provide answer to questions like: //! - What is the average price of all sold articles? @@ -13,10 +13,23 @@ //! # Usage //! //! -//! To use aggregations, build an aggregation request by constructing [agg_req::Aggregations]. +//! To use aggregations, build an aggregation request by constructing +//! [Aggregations](agg_req::Aggregations). //! Create an [AggregationCollector] from this request. AggregationCollector implements the //! `Collector` trait and can be passed as collector into `searcher.search()`. //! +//! # JSON Format +//! Aggregations request and result structures de/serialize into elasticsearch compatible JSON. +//! +//! ```verbatim +//! let agg_req: Aggregations = serde_json::from_str(json_request_string).unwrap(); +//! let collector = AggregationCollector::from_aggs(agg_req); +//! let searcher = reader.searcher(); +//! let agg_res = searcher.search(&term_query, &collector).unwrap_err(); +//! let json_response_string: String = &serde_json::to_string(&agg_res)?; +//! ``` +//! +//! //! # Example //! Compute the average metric, by building [agg_req::Aggregations], which is built from an (String, //! [agg_req::Aggregation]) iterator.