mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-02 08:30:41 +00:00
Merge pull request #1340 from PSeitz/term_agg
fix collecting term_dict field names
This commit is contained in:
@@ -187,6 +187,7 @@ impl BucketAggregation {
|
||||
if let BucketAggregationType::Terms(terms) = &self.bucket_agg {
|
||||
term_dict_field_names.insert(terms.field.to_string());
|
||||
}
|
||||
term_dict_field_names.extend(get_term_dict_field_names(&self.sub_aggregation));
|
||||
}
|
||||
fn get_fast_field_names(&self, fast_field_names: &mut HashSet<String>) {
|
||||
self.bucket_agg.get_fast_field_names(fast_field_names);
|
||||
|
||||
@@ -37,8 +37,8 @@ use crate::DocId;
|
||||
/// ## Per bucket document count error
|
||||
/// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will include
|
||||
/// doc_count_error_upper_bound, which is an upper bound to the error on the doc_count returned by
|
||||
/// each segment. It’s the sum of the size of the largest bucket on each shard that didn’t fit into
|
||||
/// shard_size.
|
||||
/// each segment. It’s the sum of the size of the largest bucket on each segment that didn’t fit
|
||||
/// into segment_size.
|
||||
///
|
||||
/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
|
||||
/// [TermBucketEntry](crate::aggregation::agg_result::BucketEntry) on the
|
||||
@@ -131,7 +131,7 @@ pub(crate) struct TermsAggregationInternal {
|
||||
/// If you set the `show_term_doc_count_error` parameter to true, the terms aggregation will
|
||||
/// include doc_count_error_upper_bound, which is an upper bound to the error on the
|
||||
/// doc_count returned by each shard. It’s the sum of the size of the largest bucket on
|
||||
/// each segment that didn’t fit into `shard_size`.
|
||||
/// each segment that didn’t fit into `segment_size`.
|
||||
pub show_term_doc_count_error: bool,
|
||||
|
||||
/// The get more accurate results, we fetch more than `size` from each segment.
|
||||
@@ -524,7 +524,6 @@ mod tests {
|
||||
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
|
||||
field: "string_id".to_string(),
|
||||
size: Some(2),
|
||||
shard_size: Some(2),
|
||||
min_doc_count: Some(3),
|
||||
..Default::default()
|
||||
}),
|
||||
@@ -554,10 +553,8 @@ mod tests {
|
||||
#[test]
|
||||
fn terms_aggregation_min_doc_count_special_case() -> crate::Result<()> {
|
||||
let terms_per_segment = vec![
|
||||
vec!["terma", "terma", "termb", "termb", "termb", "termc"], /* termc doesn't make it
|
||||
* from this segment */
|
||||
vec!["terma", "terma", "termb", "termc", "termc"], /* termb doesn't make it from
|
||||
* this segment */
|
||||
vec!["terma", "terma", "termb", "termb", "termb", "termc"],
|
||||
vec!["terma", "terma", "termb", "termc", "termc"],
|
||||
];
|
||||
|
||||
let index = get_test_index_from_terms(false, &terms_per_segment)?;
|
||||
@@ -576,8 +573,8 @@ mod tests {
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// searching for terma, but min_doc_count will return all terms
|
||||
let res = exec_request_with_query(agg_req, &index, Some(("string_id", "terma")))?;
|
||||
println!("{}", &serde_json::to_string_pretty(&res).unwrap());
|
||||
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
|
||||
@@ -618,7 +615,6 @@ mod tests {
|
||||
.collect();
|
||||
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
println!("{}", &serde_json::to_string_pretty(&res).unwrap());
|
||||
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
|
||||
|
||||
@@ -313,8 +313,11 @@ mod tests {
|
||||
use super::bucket::RangeAggregation;
|
||||
use super::collector::AggregationCollector;
|
||||
use super::metric::AverageAggregation;
|
||||
use crate::aggregation::agg_req::{BucketAggregationType, MetricAggregation};
|
||||
use crate::aggregation::agg_req::{
|
||||
get_term_dict_field_names, BucketAggregationType, MetricAggregation,
|
||||
};
|
||||
use crate::aggregation::agg_result::AggregationResults;
|
||||
use crate::aggregation::bucket::TermsAggregation;
|
||||
use crate::aggregation::intermediate_agg_result::IntermediateAggregationResults;
|
||||
use crate::aggregation::segment_agg_result::DOC_BLOCK_SIZE;
|
||||
use crate::aggregation::DistributedAggregationCollector;
|
||||
@@ -628,8 +631,10 @@ mod tests {
|
||||
.set_indexing_options(
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_fast()
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
schema_builder.add_text_field("dummy_text", STRING);
|
||||
let score_fieldtype =
|
||||
crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
|
||||
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
|
||||
@@ -834,10 +839,21 @@ mod tests {
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
|
||||
let sub_agg_req: Aggregations =
|
||||
vec![("average_in_range".to_string(), get_avg_req("score"))]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let sub_agg_req: Aggregations = vec![
|
||||
("average_in_range".to_string(), get_avg_req("score")),
|
||||
(
|
||||
"term_agg".to_string(),
|
||||
Aggregation::Bucket(BucketAggregation {
|
||||
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
|
||||
field: "text".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
sub_aggregation: Default::default(),
|
||||
}),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let agg_req: Aggregations = if use_elastic_json_req {
|
||||
let elasticsearch_compatible_json_req = r#"
|
||||
{
|
||||
@@ -853,7 +869,8 @@ mod tests {
|
||||
]
|
||||
},
|
||||
"aggs": {
|
||||
"average_in_range": { "avg": { "field": "score" } }
|
||||
"average_in_range": { "avg": { "field": "score" } },
|
||||
"term_agg": { "terms": { "field": "text" } }
|
||||
}
|
||||
},
|
||||
"rangei64": {
|
||||
@@ -868,7 +885,8 @@ mod tests {
|
||||
]
|
||||
},
|
||||
"aggs": {
|
||||
"average_in_range": { "avg": { "field": "score" } }
|
||||
"average_in_range": { "avg": { "field": "score" } },
|
||||
"term_agg": { "terms": { "field": "text" } }
|
||||
}
|
||||
},
|
||||
"average": {
|
||||
@@ -886,7 +904,8 @@ mod tests {
|
||||
]
|
||||
},
|
||||
"aggs": {
|
||||
"average_in_range": { "avg": { "field": "score" } }
|
||||
"average_in_range": { "avg": { "field": "score" } },
|
||||
"term_agg": { "terms": { "field": "text" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -945,6 +964,9 @@ mod tests {
|
||||
agg_req
|
||||
};
|
||||
|
||||
let field_names = get_term_dict_field_names(&agg_req);
|
||||
assert_eq!(field_names, vec!["text".to_string()].into_iter().collect());
|
||||
|
||||
let agg_res: AggregationResults = if use_distributed_collector {
|
||||
let collector = DistributedAggregationCollector::from_aggs(agg_req.clone());
|
||||
|
||||
@@ -1085,7 +1107,7 @@ mod tests {
|
||||
searcher.search(&AllQuery, &collector).unwrap_err()
|
||||
};
|
||||
|
||||
let agg_res = avg_on_field("text");
|
||||
let agg_res = avg_on_field("dummy_text");
|
||||
assert_eq!(
|
||||
format!("{:?}", agg_res),
|
||||
r#"InvalidArgument("Only fast fields of type f64, u64, i64 are supported, but got Str ")"#
|
||||
|
||||
Reference in New Issue
Block a user