mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-05 16:52:55 +00:00
improve aggregation docs
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
//!
|
||||
//! [Aggregations] is the top level entry point to create a request, which is a `HashMap<String,
|
||||
//! Aggregation>`.
|
||||
//!
|
||||
//! Requests are compatible with the json format of elasticsearch.
|
||||
//!
|
||||
//! # Example
|
||||
@@ -51,7 +52,7 @@ pub use super::bucket::RangeAggregation;
|
||||
use super::metric::{AverageAggregation, StatsAggregation};
|
||||
|
||||
/// The top-level aggregation request structure, which contains [Aggregation] and their user defined
|
||||
/// names.
|
||||
/// names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
|
||||
///
|
||||
/// The key is the user defined name of the aggregation.
|
||||
pub type Aggregations = HashMap<String, Aggregation>;
|
||||
|
||||
@@ -112,6 +112,34 @@ impl From<IntermediateBucketResult> for BucketResult {
|
||||
|
||||
/// This is the range entry for a bucket, which contains a key, count, and optionally
|
||||
/// sub_aggregations.
|
||||
///
|
||||
/// # JSON Format
|
||||
/// ```json
|
||||
/// {
|
||||
/// ...
|
||||
/// "my_ranges": {
|
||||
/// "buckets": [
|
||||
/// {
|
||||
/// "key": "*-10",
|
||||
/// "to": 10,
|
||||
/// "doc_count": 5
|
||||
/// },
|
||||
/// {
|
||||
/// "key": "10-20",
|
||||
/// "from": 10,
|
||||
/// "to": 20,
|
||||
/// "doc_count": 2
|
||||
/// },
|
||||
/// {
|
||||
/// "key": "20-*",
|
||||
/// "from": 20,
|
||||
/// "doc_count": 3
|
||||
/// }
|
||||
/// ]
|
||||
/// }
|
||||
/// ...
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RangeBucketEntry {
|
||||
/// The identifier of the bucket.
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
//! Module for all bucket aggregations.
|
||||
//!
|
||||
//! BucketAggregations create buckets of documents
|
||||
//! [BucketAggregation](super::agg_req::BucketAggregation).
|
||||
//!
|
||||
//! Results of final buckets are [BucketResult](super::agg_result::BucketResult).
|
||||
//! Results of intermediate buckets are
|
||||
//! [IntermediateBucketResult](super::intermediate_agg_result::IntermediateBucketResult)
|
||||
|
||||
mod range;
|
||||
|
||||
pub use range::RangeAggregation;
|
||||
pub(crate) use range::SegmentRangeCollector;
|
||||
pub use range::*;
|
||||
|
||||
@@ -18,18 +18,33 @@ use crate::{DocId, TantivyError};
|
||||
/// Provide user-defined buckets to aggregate on.
|
||||
/// Two special buckets will automatically be created to cover the whole range of values.
|
||||
/// The provided buckets have to be continous.
|
||||
/// During the aggregation, the values extracted from the fast_field `field_name` will be checked
|
||||
/// During the aggregation, the values extracted from the fast_field `field` will be checked
|
||||
/// against each bucket range. Note that this aggregation includes the from value and excludes the
|
||||
/// to value for each range.
|
||||
///
|
||||
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
|
||||
/// [BucketEntryKeyCount](crate::aggregation::agg_result::RangeBucketEntry) on the
|
||||
/// [RangeBucketEntry](crate::aggregation::agg_result::RangeBucketEntry) on the
|
||||
/// AggregationCollector.
|
||||
///
|
||||
/// Result type is
|
||||
/// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with
|
||||
/// [crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry] on the
|
||||
/// DistributedAggregationCollector.
|
||||
///
|
||||
/// # Request JSON Format
|
||||
/// ```json
|
||||
/// {
|
||||
/// "range": {
|
||||
/// "field": "score",
|
||||
/// "ranges": [
|
||||
/// { "to": 3.0 },
|
||||
/// { "from": 3.0, "to": 7.0 },
|
||||
/// { "from": 7.0, "to": 20.0 }
|
||||
/// { "from": 20.0 }
|
||||
/// ]
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RangeAggregation {
|
||||
/// The field to aggregate on.
|
||||
@@ -40,9 +55,14 @@ pub struct RangeAggregation {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
/// The range for one range bucket.
|
||||
pub struct RangeAggregationRange {
|
||||
/// The from range value, which is inclusive in the range.
|
||||
/// None equals to an open ended interval.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub from: Option<f64>,
|
||||
/// The to range value, which is not inclusive in the range.
|
||||
/// None equals to an open ended interval.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub to: Option<f64>,
|
||||
}
|
||||
@@ -64,7 +84,7 @@ impl From<Range<f64>> for RangeAggregationRange {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct SegmentRangeAndBucketEntry {
|
||||
pub(crate) struct SegmentRangeAndBucketEntry {
|
||||
range: Range<u64>,
|
||||
bucket: SegmentRangeBucketEntry,
|
||||
}
|
||||
@@ -274,7 +294,7 @@ fn extend_validate_ranges(
|
||||
Ok(converted_buckets)
|
||||
}
|
||||
|
||||
pub fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
|
||||
pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
|
||||
// is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0,
|
||||
// it should be rendererd as "*-0" and not "*-*"
|
||||
let to_str = |val: u64, is_start: bool| {
|
||||
@@ -288,7 +308,7 @@ pub fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
|
||||
format!("{}-{}", to_str(range.start, true), to_str(range.end, false))
|
||||
}
|
||||
|
||||
pub fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
|
||||
pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
|
||||
Key::Str(range_to_string(range, field_type))
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,15 @@ use crate::DocId;
|
||||
/// extracted from the aggregated documents.
|
||||
/// Supported field types are u64, i64, and f64.
|
||||
/// See [super::SingleMetricResult] for return value.
|
||||
///
|
||||
/// # JSON Format
|
||||
/// ```json
|
||||
/// {
|
||||
/// "avg": {
|
||||
/// "field": "score",
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
pub struct AverageAggregation {
|
||||
/// The field name to compute the stats on.
|
||||
pub field: String,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
//! Module for all metric aggregations.
|
||||
|
||||
//!
|
||||
//! The aggregations in this family compute metrics, see [super::agg_req::MetricAggregation] for
|
||||
//! details.
|
||||
mod average;
|
||||
mod stats;
|
||||
pub use average::*;
|
||||
|
||||
@@ -9,6 +9,16 @@ use crate::DocId;
|
||||
/// extracted from the aggregated documents.
|
||||
/// Supported field types are u64, i64, and f64.
|
||||
/// See [Stats] for returned statistics.
|
||||
///
|
||||
/// # JSON Format
|
||||
/// ```json
|
||||
/// {
|
||||
/// "stats": {
|
||||
/// "field": "score",
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct StatsAggregation {
|
||||
/// The field name to compute the stats on.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! # Aggregations
|
||||
//!
|
||||
//!
|
||||
//! Aggregation summarizes your data as statistics on buckets or metrics.
|
||||
//! An aggregation summarizes your data as statistics on buckets or metrics.
|
||||
//!
|
||||
//! Aggregations can provide answer to questions like:
|
||||
//! - What is the average price of all sold articles?
|
||||
@@ -13,10 +13,23 @@
|
||||
//! # Usage
|
||||
//!
|
||||
//!
|
||||
//! To use aggregations, build an aggregation request by constructing [agg_req::Aggregations].
|
||||
//! To use aggregations, build an aggregation request by constructing
|
||||
//! [Aggregations](agg_req::Aggregations).
|
||||
//! Create an [AggregationCollector] from this request. AggregationCollector implements the
|
||||
//! `Collector` trait and can be passed as collector into `searcher.search()`.
|
||||
//!
|
||||
//! # JSON Format
|
||||
//! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
|
||||
//!
|
||||
//! ```verbatim
|
||||
//! let agg_req: Aggregations = serde_json::from_str(json_request_string).unwrap();
|
||||
//! let collector = AggregationCollector::from_aggs(agg_req);
|
||||
//! let searcher = reader.searcher();
|
||||
//! let agg_res = searcher.search(&term_query, &collector).unwrap_err();
|
||||
//! let json_response_string: String = &serde_json::to_string(&agg_res)?;
|
||||
//! ```
|
||||
//!
|
||||
//!
|
||||
//! # Example
|
||||
//! Compute the average metric, by building [agg_req::Aggregations], which is built from an (String,
|
||||
//! [agg_req::Aggregation]) iterator.
|
||||
|
||||
Reference in New Issue
Block a user