diff --git a/src/aggregation/agg_req.rs b/src/aggregation/agg_req.rs index 9dfe662d9..898c816ad 100644 --- a/src/aggregation/agg_req.rs +++ b/src/aggregation/agg_req.rs @@ -187,6 +187,7 @@ impl BucketAggregation { if let BucketAggregationType::Terms(terms) = &self.bucket_agg { term_dict_field_names.insert(terms.field.to_string()); } + term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation)); } fn get_fast_field_names(&self, fast_field_names: &mut HashSet) { self.bucket_agg.get_fast_field_names(fast_field_names); diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs index b66a37838..8988e8c3b 100644 --- a/src/aggregation/bucket/term_agg.rs +++ b/src/aggregation/bucket/term_agg.rs @@ -37,8 +37,8 @@ use crate::DocId; /// ## Per bucket document count error /// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will include /// doc_count_error_upper_bound, which is an upper bound to the error on the doc_count returned by -/// each segment. It’s the sum of the size of the largest bucket on each shard that didn’t fit into -/// shard_size. +/// each segment. It’s the sum of the size of the largest bucket on each segment that didn’t fit +/// into segment_size. /// /// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with /// [TermBucketEntry](crate::aggregation::agg_result::BucketEntry) on the @@ -131,7 +131,7 @@ pub(crate) struct TermsAggregationInternal { /// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will /// include doc_count_error_upper_bound, which is an upper bound to the error on the /// doc_count returned by each shard. It’s the sum of the size of the largest bucket on - /// each segment that didn’t fit into `shard_size`. + /// each segment that didn’t fit into `segment_size`. pub show_term_doc_count_error: bool, /// The get more accurate results, we fetch more than `size` from each segment. @@ -524,7 +524,6 @@ mod tests { bucket_agg: BucketAggregationType::Terms(TermsAggregation { field: "string_id".to_string(), size: Some(2), - shard_size: Some(2), min_doc_count: Some(3), ..Default::default() }), @@ -554,10 +553,8 @@ mod tests { #[test] fn terms_aggregation_min_doc_count_special_case() -> crate::Result<()> { let terms_per_segment = vec![ - vec!["terma", "terma", "termb", "termb", "termb", "termc"], /* termc doesn't make it - * from this segment */ - vec!["terma", "terma", "termb", "termc", "termc"], /* termb doesn't make it from - * this segment */ + vec!["terma", "terma", "termb", "termb", "termb", "termc"], + vec!["terma", "terma", "termb", "termc", "termc"], ]; let index = get_test_index_from_terms(false, &terms_per_segment)?; @@ -576,8 +573,8 @@ mod tests { .into_iter() .collect(); + // searching for terma, but min_doc_count will return all terms let res = exec_request_with_query(agg_req, &index, Some(("string_id", "terma")))?; - println!("{}", &serde_json::to_string_pretty(&res).unwrap()); assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma"); assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4); @@ -618,7 +615,6 @@ mod tests { .collect(); let res = exec_request(agg_req, &index)?; - println!("{}", &serde_json::to_string_pretty(&res).unwrap()); assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma"); assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4); diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index cf974d367..197a9496f 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -313,8 +313,11 @@ mod tests { use super::bucket::RangeAggregation; use super::collector::AggregationCollector; use super::metric::AverageAggregation; - use crate::aggregation::agg_req::{BucketAggregationType, MetricAggregation}; + use crate::aggregation::agg_req::{ + get_term_dict_field_names, BucketAggregationType, MetricAggregation, + }; use crate::aggregation::agg_result::AggregationResults; + use crate::aggregation::bucket::TermsAggregation; use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults; use crate::aggregation::segment_agg_result::DOC_BLOCK_SIZE; use crate::aggregation::DistributedAggregationCollector; @@ -628,8 +631,10 @@ mod tests { .set_indexing_options( TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs), ) + .set_fast() .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); + schema_builder.add_text_field("dummy_text", STRING); let score_fieldtype = crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone()); @@ -834,10 +839,21 @@ mod tests { IndexRecordOption::Basic, ); - let sub_agg_req: Aggregations = - vec![("average_in_range".to_string(), get_avg_req("score"))] - .into_iter() - .collect(); + let sub_agg_req: Aggregations = vec![ + ("average_in_range".to_string(), get_avg_req("score")), + ( + "term_agg".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Terms(TermsAggregation { + field: "text".to_string(), + ..Default::default() + }), + sub_aggregation: Default::default(), + }), + ), + ] + .into_iter() + .collect(); let agg_req: Aggregations = if use_elastic_json_req { let elasticsearch_compatible_json_req = r#" { @@ -853,7 +869,8 @@ mod tests { ] }, "aggs": { - "average_in_range": { "avg": { "field": "score" } } + "average_in_range": { "avg": { "field": "score" } }, + "term_agg": { "terms": { "field": "text" } } } }, "rangei64": { @@ -868,7 +885,8 @@ mod tests { ] }, "aggs": { - "average_in_range": { "avg": { "field": "score" } } + "average_in_range": { "avg": { "field": "score" } }, + "term_agg": { "terms": { "field": "text" } } } }, "average": { @@ -886,7 +904,8 @@ mod tests { ] }, "aggs": { - "average_in_range": { "avg": { "field": "score" } } + "average_in_range": { "avg": { "field": "score" } }, + "term_agg": { "terms": { "field": "text" } } } } } @@ -945,6 +964,9 @@ mod tests { agg_req }; + let field_names = get_term_dict_field_names(&agg_req); + assert_eq!(field_names, vec!["text".to_string()].into_iter().collect()); + let agg_res: AggregationResults = if use_distributed_collector { let collector = DistributedAggregationCollector::from_aggs(agg_req.clone()); @@ -1085,7 +1107,7 @@ mod tests { searcher.search(&AllQuery, &collector).unwrap_err() }; - let agg_res = avg_on_field("text"); + let agg_res = avg_on_field("dummy_text"); assert_eq!( format!("{:?}", agg_res), r#"InvalidArgument("Only fast fields of type f64, u64, i64 are supported, but got Str ")"#