change Err from Debug to Display

as reported by a user on Discord, the output of the Debug on error is a useless: 'Any { .. }' switch to `Display` via to_string
Fix DateHistogram bucket gap (#2183 )
2026-01-16 22:12:55 +00:00 · 2023-09-27 11:02:53 +08:00 · 2023-09-21 10:41:35 +02:00
7 changed files with 178 additions and 23 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -17,11 +17,11 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
-        run: rustup toolchain install nightly --profile minimal --component llvm-tools-preview
+        run: rustup toolchain install nightly-2023-09-10 --profile minimal --component llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
-        run: cargo +nightly llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
+        run: cargo +nightly-2023-09-10 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
        continue-on-error: true
--- a/src/aggregation/agg_limits.rs
+++ b/src/aggregation/agg_limits.rs
@@ -134,3 +134,142 @@ impl Drop for ResourceLimitGuard {
            .fetch_sub(self.allocated_with_the_guard, Ordering::Relaxed);
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::aggregation::tests::exec_request_with_query;
+
+    // https://github.com/quickwit-oss/quickwit/issues/3837
+    #[test]
+    fn test_agg_limits_with_empty_merge() {
+        use crate::aggregation::agg_req::Aggregations;
+        use crate::aggregation::bucket::tests::get_test_index_from_docs;
+
+        let docs = vec![
+            vec![r#"{ "date": "2015-01-02T00:00:00Z", "text": "bbb", "text2": "bbb" }"#],
+            vec![r#"{ "text": "aaa", "text2": "bbb" }"#],
+        ];
+        let index = get_test_index_from_docs(false, &docs).unwrap();
+
+        {
+            let elasticsearch_compatible_json = json!(
+                {
+                    "1": {
+                        "terms": {"field": "text2", "min_doc_count": 0},
+                        "aggs": {
+                            "2":{
+                                "date_histogram": {
+                                    "field": "date",
+                                    "fixed_interval": "1d",
+                                    "extended_bounds": {
+                                        "min": "2015-01-01T00:00:00Z",
+                                        "max": "2015-01-10T00:00:00Z"
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request_with_query(agg_req, &index, Some(("text", "bbb"))).unwrap();
+            let expected_res = json!({
+             "1": {
+                "buckets": [
+                  {
+                    "2": {
+                      "buckets": [
+                        { "doc_count": 0, "key": 1420070400000.0, "key_as_string": "2015-01-01T00:00:00Z" },
+                        { "doc_count": 1, "key": 1420156800000.0, "key_as_string": "2015-01-02T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420243200000.0, "key_as_string": "2015-01-03T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420329600000.0, "key_as_string": "2015-01-04T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420416000000.0, "key_as_string": "2015-01-05T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420502400000.0, "key_as_string": "2015-01-06T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420588800000.0, "key_as_string": "2015-01-07T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420675200000.0, "key_as_string": "2015-01-08T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420761600000.0, "key_as_string": "2015-01-09T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420848000000.0, "key_as_string": "2015-01-10T00:00:00Z" }
+                      ]
+                    },
+                    "doc_count": 1,
+                    "key": "bbb"
+                  }
+                ],
+                "doc_count_error_upper_bound": 0,
+                "sum_other_doc_count": 0
+              }
+            });
+            assert_eq!(res, expected_res);
+        }
+    }
+
+    // https://github.com/quickwit-oss/quickwit/issues/3837
+    #[test]
+    fn test_agg_limits_with_empty_data() {
+        use crate::aggregation::agg_req::Aggregations;
+        use crate::aggregation::bucket::tests::get_test_index_from_docs;
+
+        let docs = vec![vec![r#"{ "text": "aaa", "text2": "bbb" }"#]];
+        let index = get_test_index_from_docs(false, &docs).unwrap();
+
+        {
+            // Empty result since there is no doc with dates
+            let elasticsearch_compatible_json = json!(
+                {
+                    "1": {
+                        "terms": {"field": "text2", "min_doc_count": 0},
+                        "aggs": {
+                            "2":{
+                                "date_histogram": {
+                                    "field": "date",
+                                    "fixed_interval": "1d",
+                                    "extended_bounds": {
+                                        "min": "2015-01-01T00:00:00Z",
+                                        "max": "2015-01-10T00:00:00Z"
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            );
+
+            let agg_req: Aggregations = serde_json::from_str(
+                &serde_json::to_string(&elasticsearch_compatible_json).unwrap(),
+            )
+            .unwrap();
+            let res = exec_request_with_query(agg_req, &index, Some(("text", "bbb"))).unwrap();
+            let expected_res = json!({
+             "1": {
+                "buckets": [
+                  {
+                    "2": {
+                      "buckets": [
+                        { "doc_count": 0, "key": 1420070400000.0, "key_as_string": "2015-01-01T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420156800000.0, "key_as_string": "2015-01-02T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420243200000.0, "key_as_string": "2015-01-03T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420329600000.0, "key_as_string": "2015-01-04T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420416000000.0, "key_as_string": "2015-01-05T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420502400000.0, "key_as_string": "2015-01-06T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420588800000.0, "key_as_string": "2015-01-07T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420675200000.0, "key_as_string": "2015-01-08T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420761600000.0, "key_as_string": "2015-01-09T00:00:00Z" },
+                        { "doc_count": 0, "key": 1420848000000.0, "key_as_string": "2015-01-10T00:00:00Z" }
+                      ]
+                    },
+                    "doc_count": 0,
+                    "key": "bbb"
+                  }
+                ],
+                "doc_count_error_upper_bound": 0,
+                "sum_other_doc_count": 0
+              }
+            });
+            assert_eq!(res, expected_res);
+        }
+    }
+}
--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -103,7 +103,8 @@ impl AggregationWithAccessor {
                field: field_name, ..
            }) => {
                let (accessor, column_type) =
-                    get_ff_reader(reader, field_name, Some(get_numeric_or_date_column_types()))?;
+                    // Only DateTime is supported for DateHistogram
+                    get_ff_reader(reader, field_name, Some(&[ColumnType::DateTime]))?;
                add_agg_with_accessor(accessor, column_type, &mut res)?;
            }
            Terms(TermsAggregation {
--- a/src/aggregation/bucket/histogram/date_histogram.rs
+++ b/src/aggregation/bucket/histogram/date_histogram.rs
@@ -132,6 +132,7 @@ impl DateHistogramAggregationReq {
            hard_bounds: self.hard_bounds,
            extended_bounds: self.extended_bounds,
            keyed: self.keyed,
+            is_normalized_to_ns: false,
        })
    }

@@ -243,14 +244,14 @@ fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
 }

 #[cfg(test)]
-mod tests {
+pub mod tests {
    use pretty_assertions::assert_eq;

    use super::*;
    use crate::aggregation::agg_req::Aggregations;
    use crate::aggregation::tests::exec_request;
    use crate::indexer::NoMergePolicy;
-    use crate::schema::{Schema, FAST};
+    use crate::schema::{Schema, FAST, STRING};
    use crate::Index;

    #[test]
@@ -306,7 +307,8 @@ mod tests {
    ) -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
        schema_builder.add_date_field("date", FAST);
-        schema_builder.add_text_field("text", FAST);
+        schema_builder.add_text_field("text", FAST | STRING);
+        schema_builder.add_text_field("text2", FAST | STRING);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema.clone());
        {
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -122,11 +122,14 @@ pub struct HistogramAggregation {
    /// Whether to return the buckets as a hash map
    #[serde(default)]
    pub keyed: bool,
+    /// Whether the values are normalized to ns for date time values. Defaults to false.
+    #[serde(default)]
+    pub is_normalized_to_ns: bool,
 }

 impl HistogramAggregation {
-    pub(crate) fn normalize(&mut self, column_type: ColumnType) {
-        if column_type.is_date_time() {
+    pub(crate) fn normalize_date_time(&mut self) {
+        if !self.is_normalized_to_ns {
            // values are provided in ms, but the fastfield is in nano seconds
            self.interval *= 1_000_000.0;
            self.offset = self.offset.map(|off| off * 1_000_000.0);
@@ -138,6 +141,7 @@ impl HistogramAggregation {
                min: bounds.min * 1_000_000.0,
                max: bounds.max * 1_000_000.0,
            });
+            self.is_normalized_to_ns = true;
        }
    }

@@ -370,7 +374,7 @@ impl SegmentHistogramCollector {

        Ok(IntermediateBucketResult::Histogram {
            buckets,
-            column_type: Some(self.column_type),
+            is_date_agg: self.column_type == ColumnType::DateTime,
        })
    }

@@ -381,7 +385,9 @@ impl SegmentHistogramCollector {
        accessor_idx: usize,
    ) -> crate::Result<Self> {
        req.validate()?;
-        req.normalize(field_type);
+        if field_type == ColumnType::DateTime {
+            req.normalize_date_time();
+        }

        let sub_aggregation_blueprint = if sub_aggregation.is_empty() {
            None
@@ -439,6 +445,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
    // memory check upfront
    let (_, first_bucket_num, last_bucket_num) =
        generate_bucket_pos_with_opt_minmax(histogram_req, min_max);
+
    // It's based on user input, so we need to account for overflows
    let added_buckets = ((last_bucket_num.saturating_sub(first_bucket_num)).max(0) as u64)
        .saturating_sub(buckets.len() as u64);
@@ -482,7 +489,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
 // Convert to BucketEntry
 pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    buckets: Vec<IntermediateHistogramBucketEntry>,
-    column_type: Option<ColumnType>,
+    is_date_agg: bool,
    histogram_req: &HistogramAggregation,
    sub_aggregation: &Aggregations,
    limits: &AggregationLimits,
@@ -491,8 +498,8 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    // The request used in the the call to final is not yet be normalized.
    // Normalization is changing the precision from milliseconds to nanoseconds.
    let mut histogram_req = histogram_req.clone();
-    if let Some(column_type) = column_type {
-        histogram_req.normalize(column_type);
+    if is_date_agg {
+        histogram_req.normalize_date_time();
    }
    let mut buckets = if histogram_req.min_doc_count() == 0 {
        // With min_doc_count != 0, we may need to add buckets, so that there are no
@@ -516,7 +523,7 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(

    // If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339
    // and normalize from nanoseconds to milliseconds
-    if column_type == Some(ColumnType::DateTime) {
+    if is_date_agg {
        for bucket in buckets.iter_mut() {
            if let crate::aggregation::Key::F64(ref mut val) = bucket.key {
                let key_as_string = format_date(*val as i64)?;
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -172,10 +172,16 @@ pub(crate) fn empty_from_req(req: &Aggregation) -> IntermediateAggregationResult
        Range(_) => IntermediateAggregationResult::Bucket(IntermediateBucketResult::Range(
            Default::default(),
        )),
-        Histogram(_) | DateHistogram(_) => {
+        Histogram(_) => {
            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Histogram {
                buckets: Vec::new(),
-                column_type: None,
+                is_date_agg: false,
+            })
+        }
+        DateHistogram(_) => {
+            IntermediateAggregationResult::Bucket(IntermediateBucketResult::Histogram {
+                buckets: Vec::new(),
+                is_date_agg: true,
            })
        }
        Average(_) => IntermediateAggregationResult::Metric(IntermediateMetricResult::Average(
@@ -343,8 +349,8 @@ pub enum IntermediateBucketResult {
    /// This is the histogram entry for a bucket, which contains a key, count, and optionally
    /// sub_aggregations.
    Histogram {
-        /// The column_type of the underlying `Column`
-        column_type: Option<ColumnType>,
+        /// The column_type of the underlying `Column` is DateTime
+        is_date_agg: bool,
        /// The buckets
        buckets: Vec<IntermediateHistogramBucketEntry>,
    },
@@ -399,7 +405,7 @@ impl IntermediateBucketResult {
                Ok(BucketResult::Range { buckets })
            }
            IntermediateBucketResult::Histogram {
-                column_type,
+                is_date_agg,
                buckets,
            } => {
                let histogram_req = &req
@@ -408,7 +414,7 @@ impl IntermediateBucketResult {
                    .expect("unexpected aggregation, expected histogram aggregation");
                let buckets = intermediate_histogram_buckets_to_final_buckets(
                    buckets,
-                    column_type,
+                    is_date_agg,
                    histogram_req,
                    req.sub_aggregation(),
                    limits,
@@ -457,11 +463,11 @@ impl IntermediateBucketResult {
            (
                IntermediateBucketResult::Histogram {
                    buckets: buckets_left,
-                    ..
+                    is_date_agg: _,
                },
                IntermediateBucketResult::Histogram {
                    buckets: buckets_right,
-                    ..
+                    is_date_agg: _,
                },
            ) => {
                let buckets: Result<Vec<IntermediateHistogramBucketEntry>, TantivyError> =
--- a/src/indexer/index_writer.rs
+++ b/src/indexer/index_writer.rs
@@ -620,7 +620,7 @@ impl IndexWriter {
        for worker_handle in former_workers_join_handle {
            let indexing_worker_result = worker_handle
                .join()
-                .map_err(|e| TantivyError::ErrorInThread(format!("{e:?}")))?;
+                .map_err(|e| TantivyError::ErrorInThread(e.to_string()))?;
            indexing_worker_result?;
            self.add_indexing_worker()?;
        }