diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index 79015072e..a2a4a87e5 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -1364,4 +1364,29 @@ mod tests { Ok(()) } + + #[test] + fn histogram_invalid_request() -> crate::Result<()> { + let index = get_test_index_2_segments(true)?; + + let agg_req: Aggregations = vec![( + "histogram".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Histogram(HistogramAggregation { + field: "score_f64".to_string(), + interval: 0.0, + ..Default::default() + }), + sub_aggregation: Default::default(), + }), + )] + .into_iter() + .collect(); + + let agg_res = exec_request(agg_req, &index); + + assert!(agg_res.is_err()); + + Ok(()) + } } diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs index 3323e09bc..c9833c885 100644 --- a/src/aggregation/bucket/term_agg.rs +++ b/src/aggregation/bucket/term_agg.rs @@ -81,7 +81,8 @@ pub struct TermsAggregation { /// /// Should never be smaller than size. #[serde(skip_serializing_if = "Option::is_none", default)] - pub shard_size: Option, + #[serde(alias = "shard_size")] + pub split_size: Option, /// The get more accurate results, we fetch more than `size` from each segment. /// @@ -96,11 +97,11 @@ pub struct TermsAggregation { /// doc_count returned by each shard. It’s the sum of the size of the largest bucket on /// each segment that didn’t fit into `shard_size`. /// - /// Defaults to true when ordering by counts desc. + /// Defaults to true when ordering by count desc. #[serde(skip_serializing_if = "Option::is_none", default)] pub show_term_doc_count_error: Option, - /// Filter all terms than are lower `min_doc_count`. Defaults to 1. + /// Filter all terms that are lower than `min_doc_count`. Defaults to 1. /// /// **Expensive**: When set to 0, this will return all terms in the field. #[serde(skip_serializing_if = "Option::is_none", default)] @@ -143,7 +144,7 @@ pub(crate) struct TermsAggregationInternal { /// Increasing this value is will increase the cost for more accuracy. pub segment_size: u32, - /// Filter all terms than are lower `min_doc_count`. Defaults to 1. + /// Filter all terms that are lower than `min_doc_count`. Defaults to 1. /// /// *Expensive*: When set to 0, this will return all terms in the field. pub min_doc_count: u64, @@ -572,7 +573,7 @@ mod tests { bucket_agg: BucketAggregationType::Terms(TermsAggregation { field: "string_id".to_string(), size: Some(2), - shard_size: Some(2), + split_size: Some(2), ..Default::default() }), sub_aggregation: Default::default(), @@ -1210,6 +1211,51 @@ mod tests { .unwrap(); assert_eq!(agg_req, agg_req_deser); + let elasticsearch_compatible_json = json!( + { + "term_agg_test":{ + "terms": { + "field": "string_id", + "split_size": 2u64, + } + } + }); + + // test alias shard_size, split_size + let agg_req: Aggregations = vec![( + "term_agg_test".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Terms(TermsAggregation { + field: "string_id".to_string(), + split_size: Some(2), + ..Default::default() + }), + sub_aggregation: Default::default(), + }), + )] + .into_iter() + .collect(); + + let agg_req_deser: Aggregations = + serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap()) + .unwrap(); + assert_eq!(agg_req, agg_req_deser); + + let elasticsearch_compatible_json = json!( + { + "term_agg_test":{ + "terms": { + "field": "string_id", + "shard_size": 2u64, + } + } + }); + + let agg_req_deser: Aggregations = + serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap()) + .unwrap(); + assert_eq!(agg_req, agg_req_deser); + Ok(()) } } diff --git a/src/aggregation/intermediate_agg_result.rs b/src/aggregation/intermediate_agg_result.rs index 936caf38a..9bde00707 100644 --- a/src/aggregation/intermediate_agg_result.rs +++ b/src/aggregation/intermediate_agg_result.rs @@ -24,7 +24,9 @@ use crate::aggregation::bucket::TermsAggregationInternal; /// intermediate results. #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct IntermediateAggregationResults { + #[serde(skip_serializing_if = "Option::is_none")] pub(crate) metrics: Option>, + #[serde(skip_serializing_if = "Option::is_none")] pub(crate) buckets: Option>, } diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 193a94d04..37fa05c0f 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -20,7 +20,8 @@ //! //! #### Limitations //! -//! Currently aggregations work only on single value fast fields of type u64, f64 and i64. +//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and +//! fast fields on text fields. //! //! # JSON Format //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON. diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a28bf732c..90f1916e6 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -300,7 +300,7 @@ impl IntFastFieldWriter { /// If the document has more than one value for the given field, /// only the first one is taken in account. /// - /// Values for string fast fields are skipped. + /// Values on text fast fields are skipped. pub fn add_document(&mut self, doc: &Document) { match doc.get_first(self.field) { Some(v) => { diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index b164ada31..14728154a 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -42,6 +42,11 @@ impl TextOptions { /// Text fast fields will have the term ids stored in the fast field. /// The fast field will be a multivalued fast field. /// + /// The effective cardinality depends on the tokenizer. When creating fast fields on text + /// fields it is recommended to use the "raw" tokenizer, since it will store the original text + /// unchanged. The "default" tokenizer will store the terms as lower case and this will be + /// reflected in the dictionary. + /// /// The original text can be retrieved via `ord_to_term` from the dictionary. #[must_use] pub fn set_fast(mut self) -> TextOptions {