From bc607a921ba0a326f5ac4239adb288321d61e97e Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Wed, 4 May 2022 18:51:18 +0800
Subject: [PATCH 1/3] add alias shard_size split_size for quickwit

improve some docs
---
 src/aggregation/bucket/histogram/histogram.rs | 25 ++++++++++
 src/aggregation/bucket/term_agg.rs            | 46 +++++++++++++++++++
 src/aggregation/mod.rs                        |  3 +-
 src/schema/text_options.rs                    |  5 ++
 4 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs
index 79015072e..a2a4a87e5 100644
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1364,4 +1364,29 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn histogram_invalid_request() -> crate::Result<()> {
+        let index = get_test_index_2_segments(true)?;
+
+        let agg_req: Aggregations = vec![(
+            "histogram".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
+                    field: "score_f64".to_string(),
+                    interval: 0.0,
+                    ..Default::default()
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let agg_res = exec_request(agg_req, &index);
+
+        assert!(agg_res.is_err());
+
+        Ok(())
+    }
 }
diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs
index 3323e09bc..af23352f1 100644
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -81,6 +81,7 @@ pub struct TermsAggregation {
     ///
     /// Should never be smaller than size.
     #[serde(skip_serializing_if = "Option::is_none", default)]
+    #[serde(alias = "split_size")]
     pub shard_size: Option<u32>,
 
     /// The get more accurate results, we fetch more than `size` from each segment.
@@ -1210,6 +1211,51 @@ mod tests {
                 .unwrap();
         assert_eq!(agg_req, agg_req_deser);
 
+        let elasticsearch_compatible_json = json!(
+        {
+        "term_agg_test":{
+            "terms": {
+                "field": "string_id",
+                "split_size": 2u64,
+            }
+        }
+        });
+
+        // test alias shard_size, split_size
+        let agg_req: Aggregations = vec![(
+            "term_agg_test".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
+                    field: "string_id".to_string(),
+                    shard_size: Some(2),
+                    ..Default::default()
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let agg_req_deser: Aggregations =
+            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
+                .unwrap();
+        assert_eq!(agg_req, agg_req_deser);
+
+        let elasticsearch_compatible_json = json!(
+        {
+        "term_agg_test":{
+            "terms": {
+                "field": "string_id",
+                "shard_size": 2u64,
+            }
+        }
+        });
+
+        let agg_req_deser: Aggregations =
+            serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
+                .unwrap();
+        assert_eq!(agg_req, agg_req_deser);
+
         Ok(())
     }
 }
diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs
index 193a94d04..ac7fc606a 100644
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -20,7 +20,8 @@
 //!
 //! #### Limitations
 //!
-//! Currently aggregations work only on single value fast fields of type u64, f64 and i64.
+//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and on
+//! string fast fields.
 //!
 //! # JSON Format
 //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs
index b164ada31..14728154a 100644
--- a/src/schema/text_options.rs
+++ b/src/schema/text_options.rs
@@ -42,6 +42,11 @@ impl TextOptions {
     /// Text fast fields will have the term ids stored in the fast field.
     /// The fast field will be a multivalued fast field.
     ///
+    /// The effective cardinality depends on the tokenizer. When creating fast fields on text
+    /// fields it is recommended to use the "raw" tokenizer, since it will store the original text
+    /// unchanged. The "default" tokenizer will store the terms as lower case and this will be
+    /// reflected in the dictionary.
+    ///
     /// The original text can be retrieved via `ord_to_term` from the dictionary.
     #[must_use]
     pub fn set_fast(mut self) -> TextOptions {

From d11a8cce26f58eb3360c54581ab883ad9d9e69fc Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Thu, 5 May 2022 17:33:33 +0800
Subject: [PATCH 2/3] minor docs fix

---
 src/aggregation/bucket/term_agg.rs         | 6 +++---
 src/aggregation/intermediate_agg_result.rs | 2 ++
 src/aggregation/mod.rs                     | 4 ++--
 src/fastfield/writer.rs                    | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs
index af23352f1..8199dfd05 100644
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -97,11 +97,11 @@ pub struct TermsAggregation {
     /// doc_count returned by each shard. It’s the sum of the size of the largest bucket on
     /// each segment that didn’t fit into `shard_size`.
     ///
-    /// Defaults to true when ordering by counts desc.
+    /// Defaults to true when ordering by count desc.
     #[serde(skip_serializing_if = "Option::is_none", default)]
     pub show_term_doc_count_error: Option<bool>,
 
-    /// Filter all terms than are lower `min_doc_count`. Defaults to 1.
+    /// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
     ///
     /// **Expensive**: When set to 0, this will return all terms in the field.
     #[serde(skip_serializing_if = "Option::is_none", default)]
@@ -144,7 +144,7 @@ pub(crate) struct TermsAggregationInternal {
     /// Increasing this value is will increase the cost for more accuracy.
     pub segment_size: u32,
 
-    /// Filter all terms than are lower `min_doc_count`. Defaults to 1.
+    /// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
     ///
     /// *Expensive*: When set to 0, this will return all terms in the field.
     pub min_doc_count: u64,
diff --git a/src/aggregation/intermediate_agg_result.rs b/src/aggregation/intermediate_agg_result.rs
index 936caf38a..9bde00707 100644
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -24,7 +24,9 @@ use crate::aggregation::bucket::TermsAggregationInternal;
 /// intermediate results.
 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateAggregationResults {
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub(crate) metrics: Option<VecWithNames<IntermediateMetricResult>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub(crate) buckets: Option<VecWithNames<IntermediateBucketResult>>,
 }
 
diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs
index ac7fc606a..37fa05c0f 100644
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -20,8 +20,8 @@
 //!
 //! #### Limitations
 //!
-//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and on
-//! string fast fields.
+//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
+//! fast fields on text fields.
 //!
 //! # JSON Format
 //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs
index a28bf732c..90f1916e6 100644
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -300,7 +300,7 @@ impl IntFastFieldWriter {
     /// If the document has more than one value for the given field,
     /// only the first one is taken in account.
     ///
-    /// Values for string fast fields are skipped.
+    /// Values on text fast fields are skipped.
     pub fn add_document(&mut self, doc: &Document) {
         match doc.get_first(self.field) {
             Some(v) => {

From d77e8de36a03400c72e88f70c7213f70f7e08e3b Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 6 May 2022 17:52:18 +0800
Subject: [PATCH 3/3] flip alias variable name

---
 src/aggregation/bucket/term_agg.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs
index 8199dfd05..c9833c885 100644
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -81,8 +81,8 @@ pub struct TermsAggregation {
     ///
     /// Should never be smaller than size.
     #[serde(skip_serializing_if = "Option::is_none", default)]
-    #[serde(alias = "split_size")]
-    pub shard_size: Option<u32>,
+    #[serde(alias = "shard_size")]
+    pub split_size: Option<u32>,
 
     /// The get more accurate results, we fetch more than `size` from each segment.
     ///
@@ -573,7 +573,7 @@ mod tests {
                 bucket_agg: BucketAggregationType::Terms(TermsAggregation {
                     field: "string_id".to_string(),
                     size: Some(2),
-                    shard_size: Some(2),
+                    split_size: Some(2),
                     ..Default::default()
                 }),
                 sub_aggregation: Default::default(),
@@ -1227,7 +1227,7 @@ mod tests {
             Aggregation::Bucket(BucketAggregation {
                 bucket_agg: BucketAggregationType::Terms(TermsAggregation {
                     field: "string_id".to_string(),
-                    shard_size: Some(2),
+                    split_size: Some(2),
                     ..Default::default()
                 }),
                 sub_aggregation: Default::default(),