Merge pull request #1340 from PSeitz/term_agg

fix collecting term_dict field names
This commit is contained in:
PSeitz
2022-04-18 08:21:27 +02:00
committed by GitHub
3 changed files with 38 additions and 19 deletions

View File

@@ -187,6 +187,7 @@ impl BucketAggregation {
if let BucketAggregationType::Terms(terms) = &self.bucket_agg {
term_dict_field_names.insert(terms.field.to_string());
}
term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation));
}
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
self.bucket_agg.get_fast_field_names(fast_field_names);

View File

@@ -37,8 +37,8 @@ use crate::DocId;
/// ## Per bucket document count error
/// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will include
/// doc_count_error_upper_bound, which is an upper bound to the error on the doc_count returned by
/// each segment. Its the sum of the size of the largest bucket on each shard that didnt fit into
/// shard_size.
/// each segment. Its the sum of the size of the largest bucket on each segment that didnt fit
/// into segment_size.
///
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
/// [TermBucketEntry](crate::aggregation::agg_result::BucketEntry) on the
@@ -131,7 +131,7 @@ pub(crate) struct TermsAggregationInternal {
/// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will
/// include doc_count_error_upper_bound, which is an upper bound to the error on the
/// doc_count returned by each shard. Its the sum of the size of the largest bucket on
/// each segment that didnt fit into `shard_size`.
/// each segment that didnt fit into `segment_size`.
pub show_term_doc_count_error: bool,
/// The get more accurate results, we fetch more than `size` from each segment.
@@ -524,7 +524,6 @@ mod tests {
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
field: "string_id".to_string(),
size: Some(2),
shard_size: Some(2),
min_doc_count: Some(3),
..Default::default()
}),
@@ -554,10 +553,8 @@ mod tests {
#[test]
fn terms_aggregation_min_doc_count_special_case() -> crate::Result<()> {
let terms_per_segment = vec![
vec!["terma", "terma", "termb", "termb", "termb", "termc"], /* termc doesn't make it
* from this segment */
vec!["terma", "terma", "termb", "termc", "termc"], /* termb doesn't make it from
* this segment */
vec!["terma", "terma", "termb", "termb", "termb", "termc"],
vec!["terma", "terma", "termb", "termc", "termc"],
];
let index = get_test_index_from_terms(false, &terms_per_segment)?;
@@ -576,8 +573,8 @@ mod tests {
.into_iter()
.collect();
// searching for terma, but min_doc_count will return all terms
let res = exec_request_with_query(agg_req, &index, Some(("string_id", "terma")))?;
println!("{}", &serde_json::to_string_pretty(&res).unwrap());
assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
@@ -618,7 +615,6 @@ mod tests {
.collect();
let res = exec_request(agg_req, &index)?;
println!("{}", &serde_json::to_string_pretty(&res).unwrap());
assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);

View File

@@ -313,8 +313,11 @@ mod tests {
use super::bucket::RangeAggregation;
use super::collector::AggregationCollector;
use super::metric::AverageAggregation;
use crate::aggregation::agg_req::{BucketAggregationType, MetricAggregation};
use crate::aggregation::agg_req::{
get_term_dict_field_names, BucketAggregationType, MetricAggregation,
};
use crate::aggregation::agg_result::AggregationResults;
use crate::aggregation::bucket::TermsAggregation;
use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
use crate::aggregation::segment_agg_result::DOC_BLOCK_SIZE;
use crate::aggregation::DistributedAggregationCollector;
@@ -628,8 +631,10 @@ mod tests {
.set_indexing_options(
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
)
.set_fast()
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
schema_builder.add_text_field("dummy_text", STRING);
let score_fieldtype =
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
@@ -834,10 +839,21 @@ mod tests {
IndexRecordOption::Basic,
);
let sub_agg_req: Aggregations =
vec![("average_in_range".to_string(), get_avg_req("score"))]
.into_iter()
.collect();
let sub_agg_req: Aggregations = vec![
("average_in_range".to_string(), get_avg_req("score")),
(
"term_agg".to_string(),
Aggregation::Bucket(BucketAggregation {
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
field: "text".to_string(),
..Default::default()
}),
sub_aggregation: Default::default(),
}),
),
]
.into_iter()
.collect();
let agg_req: Aggregations = if use_elastic_json_req {
let elasticsearch_compatible_json_req = r#"
{
@@ -853,7 +869,8 @@ mod tests {
]
},
"aggs": {
"average_in_range": { "avg": { "field": "score" } }
"average_in_range": { "avg": { "field": "score" } },
"term_agg": { "terms": { "field": "text" } }
}
},
"rangei64": {
@@ -868,7 +885,8 @@ mod tests {
]
},
"aggs": {
"average_in_range": { "avg": { "field": "score" } }
"average_in_range": { "avg": { "field": "score" } },
"term_agg": { "terms": { "field": "text" } }
}
},
"average": {
@@ -886,7 +904,8 @@ mod tests {
]
},
"aggs": {
"average_in_range": { "avg": { "field": "score" } }
"average_in_range": { "avg": { "field": "score" } },
"term_agg": { "terms": { "field": "text" } }
}
}
}
@@ -945,6 +964,9 @@ mod tests {
agg_req
};
let field_names = get_term_dict_field_names(&agg_req);
assert_eq!(field_names, vec!["text".to_string()].into_iter().collect());
let agg_res: AggregationResults = if use_distributed_collector {
let collector = DistributedAggregationCollector::from_aggs(agg_req.clone());
@@ -1085,7 +1107,7 @@ mod tests {
searcher.search(&AllQuery, &collector).unwrap_err()
};
let agg_res = avg_on_field("text");
let agg_res = avg_on_field("dummy_text");
assert_eq!(
format!("{:?}", agg_res),
r#"InvalidArgument("Only fast fields of type f64, u64, i64 are supported, but got Str ")"#