improve aggregation docs

2026-01-05 16:52:55 +00:00 · 2022-02-22 10:37:54 +01:00
parent 1232af7928
commit fdd5ef85e5
8 changed files with 96 additions and 10 deletions
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -3,6 +3,7 @@
 //!
 //! [Aggregations] is the top level entry point to create a request, which is a `HashMap<String,
 //! Aggregation>`.
+//!
 //! Requests are compatible with the json format of elasticsearch.
 //!
 //! # Example
@@ -51,7 +52,7 @@ pub use super::bucket::RangeAggregation;
 use super::metric::{AverageAggregation, StatsAggregation};

 /// The top-level aggregation request structure, which contains [Aggregation] and their user defined
-/// names.
+/// names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
 ///
 /// The key is the user defined name of the aggregation.
 pub type Aggregations = HashMap<String, Aggregation>;
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -112,6 +112,34 @@ impl From<IntermediateBucketResult> for BucketResult {

 /// This is the range entry for a bucket, which contains a key, count, and optionally
 /// sub_aggregations.
+///
+/// # JSON Format
+/// ```json
+/// {
+///   ...
+///     "my_ranges": {
+///       "buckets": [
+///         {
+///           "key": "*-10",
+///           "to": 10,
+///           "doc_count": 5
+///         },
+///         {
+///           "key": "10-20",
+///           "from": 10,
+///           "to": 20,
+///           "doc_count": 2
+///         },
+///         {
+///           "key": "20-*",
+///           "from": 20,
+///           "doc_count": 3
+///         }
+///       ]
+///    }
+///    ...
+/// }
+///  ```
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct RangeBucketEntry {
    /// The identifier of the bucket.
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -1,10 +1,13 @@
 //! Module for all bucket aggregations.
 //!
+//! BucketAggregations create buckets of documents
+//! [BucketAggregation](super::agg_req::BucketAggregation).
+//!
 //! Results of final buckets are [BucketResult](super::agg_result::BucketResult).
 //! Results of intermediate buckets are
 //! [IntermediateBucketResult](super::intermediate_agg_result::IntermediateBucketResult)

 mod range;

-pub use range::RangeAggregation;
 pub(crate) use range::SegmentRangeCollector;
+pub use range::*;
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -18,18 +18,33 @@ use crate::{DocId, TantivyError};
 /// Provide user-defined buckets to aggregate on.
 /// Two special buckets will automatically be created to cover the whole range of values.
 /// The provided buckets have to be continous.
-/// During the aggregation, the values extracted from the fast_field `field_name` will be checked
+/// During the aggregation, the values extracted from the fast_field `field` will be checked
 /// against each bucket range. Note that this aggregation includes the from value and excludes the
 /// to value for each range.
 ///
 /// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
-/// [BucketEntryKeyCount](crate::aggregation::agg_result::RangeBucketEntry) on the
+/// [RangeBucketEntry](crate::aggregation::agg_result::RangeBucketEntry) on the
 /// AggregationCollector.
 ///
 /// Result type is
 /// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with
 /// [crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry] on the
 /// DistributedAggregationCollector.
+///
+/// # Request JSON Format
+/// ```json
+/// {
+///     "range": {
+///         "field": "score",
+///         "ranges": [
+///             { "to": 3.0 },
+///             { "from": 3.0, "to": 7.0 },
+///             { "from": 7.0, "to": 20.0 }
+///             { "from": 20.0 }
+///         ]
+///     }
+///  }
+///  ```
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct RangeAggregation {
    /// The field to aggregate on.
@@ -40,9 +55,14 @@ pub struct RangeAggregation {
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+/// The range for one range bucket.
 pub struct RangeAggregationRange {
+    /// The from range value, which is inclusive in the range.
+    /// None equals to an open ended interval.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub from: Option<f64>,
+    /// The to range value, which is not inclusive in the range.
+    /// None equals to an open ended interval.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub to: Option<f64>,
 }
@@ -64,7 +84,7 @@ impl From<Range<f64>> for RangeAggregationRange {
 }

 #[derive(Clone, Debug, PartialEq)]
-pub struct SegmentRangeAndBucketEntry {
+pub(crate) struct SegmentRangeAndBucketEntry {
    range: Range<u64>,
    bucket: SegmentRangeBucketEntry,
 }
@@ -274,7 +294,7 @@ fn extend_validate_ranges(
    Ok(converted_buckets)
 }

-pub fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
+pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
    // is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0,
    // it should be rendererd as "*-0" and not "*-*"
    let to_str = |val: u64, is_start: bool| {
@@ -288,7 +308,7 @@ pub fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
    format!("{}-{}", to_str(range.start, true), to_str(range.end, false))
 }

-pub fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
+pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
    Key::Str(range_to_string(range, field_type))
 }

--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -12,6 +12,15 @@ use crate::DocId;
 /// extracted from the aggregated documents.
 /// Supported field types are u64, i64, and f64.
 /// See [super::SingleMetricResult] for return value.
+///
+/// # JSON Format
+/// ```json
+/// {
+///     "avg": {
+///         "field": "score",
+///     }
+///  }
+///  ```
 pub struct AverageAggregation {
    /// The field name to compute the stats on.
    pub field: String,
--- a/src/aggregation/metric/mod.rs
+++ b/src/aggregation/metric/mod.rs
@@ -1,5 +1,7 @@
 //! Module for all metric aggregations.
-
+//!
+//! The aggregations in this family compute metrics, see [super::agg_req::MetricAggregation] for
+//! details.
 mod average;
 mod stats;
 pub use average::*;
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -9,6 +9,16 @@ use crate::DocId;
 /// extracted from the aggregated documents.
 /// Supported field types are u64, i64, and f64.
 /// See [Stats] for returned statistics.
+///
+/// # JSON Format
+/// ```json
+/// {
+///     "stats": {
+///         "field": "score",
+///     }
+///  }
+///  ```
+
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct StatsAggregation {
    /// The field name to compute the stats on.
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -1,7 +1,7 @@
 //! # Aggregations
 //!
 //!
-//! Aggregation summarizes your data as statistics on buckets or metrics.
+//! An aggregation summarizes your data as statistics on buckets or metrics.
 //!
 //! Aggregations can provide answer to questions like:
 //! - What is the average price of all sold articles?
@@ -13,10 +13,23 @@
 //! # Usage
 //!
 //!
-//! To use aggregations, build an aggregation request by constructing [agg_req::Aggregations].
+//! To use aggregations, build an aggregation request by constructing
+//! [Aggregations](agg_req::Aggregations).
 //! Create an [AggregationCollector] from this request. AggregationCollector implements the
 //! `Collector` trait and can be passed as collector into `searcher.search()`.
 //!
+//! # JSON Format
+//! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
+//!
+//! ```verbatim
+//! let agg_req: Aggregations = serde_json::from_str(json_request_string).unwrap();
+//! let collector = AggregationCollector::from_aggs(agg_req);
+//! let searcher = reader.searcher();
+//! let agg_res = searcher.search(&term_query, &collector).unwrap_err();
+//! let json_response_string: String = &serde_json::to_string(&agg_res)?;
+//! ```
+//!
+//!
 //! # Example
 //! Compute the average metric, by building [agg_req::Aggregations], which is built from an (String,
 //! [agg_req::Aggregation]) iterator.