Merge pull request #1358 from quickwit-oss/fix_docs

add alias shard_size to split_size for quickwit
This commit is contained in:
PSeitz
2022-05-06 18:41:34 +08:00
committed by GitHub
6 changed files with 86 additions and 7 deletions

View File

@@ -1364,4 +1364,29 @@ mod tests {
Ok(())
}
#[test]
fn histogram_invalid_request() -> crate::Result<()> {
let index = get_test_index_2_segments(true)?;
let agg_req: Aggregations = vec![(
"histogram".to_string(),
Aggregation::Bucket(BucketAggregation {
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
field: "score_f64".to_string(),
interval: 0.0,
..Default::default()
}),
sub_aggregation: Default::default(),
}),
)]
.into_iter()
.collect();
let agg_res = exec_request(agg_req, &index);
assert!(agg_res.is_err());
Ok(())
}
}

View File

@@ -81,7 +81,8 @@ pub struct TermsAggregation {
///
/// Should never be smaller than size.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub shard_size: Option<u32>,
#[serde(alias = "shard_size")]
pub split_size: Option<u32>,
/// The get more accurate results, we fetch more than `size` from each segment.
///
@@ -96,11 +97,11 @@ pub struct TermsAggregation {
/// doc_count returned by each shard. Its the sum of the size of the largest bucket on
/// each segment that didnt fit into `shard_size`.
///
/// Defaults to true when ordering by counts desc.
/// Defaults to true when ordering by count desc.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub show_term_doc_count_error: Option<bool>,
/// Filter all terms than are lower `min_doc_count`. Defaults to 1.
/// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
///
/// **Expensive**: When set to 0, this will return all terms in the field.
#[serde(skip_serializing_if = "Option::is_none", default)]
@@ -143,7 +144,7 @@ pub(crate) struct TermsAggregationInternal {
/// Increasing this value is will increase the cost for more accuracy.
pub segment_size: u32,
/// Filter all terms than are lower `min_doc_count`. Defaults to 1.
/// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
///
/// *Expensive*: When set to 0, this will return all terms in the field.
pub min_doc_count: u64,
@@ -572,7 +573,7 @@ mod tests {
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
field: "string_id".to_string(),
size: Some(2),
shard_size: Some(2),
split_size: Some(2),
..Default::default()
}),
sub_aggregation: Default::default(),
@@ -1210,6 +1211,51 @@ mod tests {
.unwrap();
assert_eq!(agg_req, agg_req_deser);
let elasticsearch_compatible_json = json!(
{
"term_agg_test":{
"terms": {
"field": "string_id",
"split_size": 2u64,
}
}
});
// test alias shard_size, split_size
let agg_req: Aggregations = vec![(
"term_agg_test".to_string(),
Aggregation::Bucket(BucketAggregation {
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
field: "string_id".to_string(),
split_size: Some(2),
..Default::default()
}),
sub_aggregation: Default::default(),
}),
)]
.into_iter()
.collect();
let agg_req_deser: Aggregations =
serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
.unwrap();
assert_eq!(agg_req, agg_req_deser);
let elasticsearch_compatible_json = json!(
{
"term_agg_test":{
"terms": {
"field": "string_id",
"shard_size": 2u64,
}
}
});
let agg_req_deser: Aggregations =
serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
.unwrap();
assert_eq!(agg_req, agg_req_deser);
Ok(())
}
}

View File

@@ -24,7 +24,9 @@ use crate::aggregation::bucket::TermsAggregationInternal;
/// intermediate results.
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateAggregationResults {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) metrics: Option<VecWithNames<IntermediateMetricResult>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) buckets: Option<VecWithNames<IntermediateBucketResult>>,
}

View File

@@ -20,7 +20,8 @@
//!
//! #### Limitations
//!
//! Currently aggregations work only on single value fast fields of type u64, f64 and i64.
//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
//! fast fields on text fields.
//!
//! # JSON Format
//! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.

View File

@@ -300,7 +300,7 @@ impl IntFastFieldWriter {
/// If the document has more than one value for the given field,
/// only the first one is taken in account.
///
/// Values for string fast fields are skipped.
/// Values on text fast fields are skipped.
pub fn add_document(&mut self, doc: &Document) {
match doc.get_first(self.field) {
Some(v) => {

View File

@@ -42,6 +42,11 @@ impl TextOptions {
/// Text fast fields will have the term ids stored in the fast field.
/// The fast field will be a multivalued fast field.
///
/// The effective cardinality depends on the tokenizer. When creating fast fields on text
/// fields it is recommended to use the "raw" tokenizer, since it will store the original text
/// unchanged. The "default" tokenizer will store the terms as lower case and this will be
/// reflected in the dictionary.
///
/// The original text can be retrieved via `ord_to_term` from the dictionary.
#[must_use]
pub fn set_fast(mut self) -> TextOptions {