mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
Merge pull request #1358 from quickwit-oss/fix_docs
add alias shard_size to split_size for quickwit
This commit is contained in:
@@ -1364,4 +1364,29 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_invalid_request() -> crate::Result<()> {
|
||||
let index = get_test_index_2_segments(true)?;
|
||||
|
||||
let agg_req: Aggregations = vec![(
|
||||
"histogram".to_string(),
|
||||
Aggregation::Bucket(BucketAggregation {
|
||||
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
|
||||
field: "score_f64".to_string(),
|
||||
interval: 0.0,
|
||||
..Default::default()
|
||||
}),
|
||||
sub_aggregation: Default::default(),
|
||||
}),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let agg_res = exec_request(agg_req, &index);
|
||||
|
||||
assert!(agg_res.is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,7 +81,8 @@ pub struct TermsAggregation {
|
||||
///
|
||||
/// Should never be smaller than size.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub shard_size: Option<u32>,
|
||||
#[serde(alias = "shard_size")]
|
||||
pub split_size: Option<u32>,
|
||||
|
||||
/// The get more accurate results, we fetch more than `size` from each segment.
|
||||
///
|
||||
@@ -96,11 +97,11 @@ pub struct TermsAggregation {
|
||||
/// doc_count returned by each shard. It’s the sum of the size of the largest bucket on
|
||||
/// each segment that didn’t fit into `shard_size`.
|
||||
///
|
||||
/// Defaults to true when ordering by counts desc.
|
||||
/// Defaults to true when ordering by count desc.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub show_term_doc_count_error: Option<bool>,
|
||||
|
||||
/// Filter all terms than are lower `min_doc_count`. Defaults to 1.
|
||||
/// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
|
||||
///
|
||||
/// **Expensive**: When set to 0, this will return all terms in the field.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
@@ -143,7 +144,7 @@ pub(crate) struct TermsAggregationInternal {
|
||||
/// Increasing this value is will increase the cost for more accuracy.
|
||||
pub segment_size: u32,
|
||||
|
||||
/// Filter all terms than are lower `min_doc_count`. Defaults to 1.
|
||||
/// Filter all terms that are lower than `min_doc_count`. Defaults to 1.
|
||||
///
|
||||
/// *Expensive*: When set to 0, this will return all terms in the field.
|
||||
pub min_doc_count: u64,
|
||||
@@ -572,7 +573,7 @@ mod tests {
|
||||
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
|
||||
field: "string_id".to_string(),
|
||||
size: Some(2),
|
||||
shard_size: Some(2),
|
||||
split_size: Some(2),
|
||||
..Default::default()
|
||||
}),
|
||||
sub_aggregation: Default::default(),
|
||||
@@ -1210,6 +1211,51 @@ mod tests {
|
||||
.unwrap();
|
||||
assert_eq!(agg_req, agg_req_deser);
|
||||
|
||||
let elasticsearch_compatible_json = json!(
|
||||
{
|
||||
"term_agg_test":{
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"split_size": 2u64,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// test alias shard_size, split_size
|
||||
let agg_req: Aggregations = vec![(
|
||||
"term_agg_test".to_string(),
|
||||
Aggregation::Bucket(BucketAggregation {
|
||||
bucket_agg: BucketAggregationType::Terms(TermsAggregation {
|
||||
field: "string_id".to_string(),
|
||||
split_size: Some(2),
|
||||
..Default::default()
|
||||
}),
|
||||
sub_aggregation: Default::default(),
|
||||
}),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let agg_req_deser: Aggregations =
|
||||
serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
|
||||
.unwrap();
|
||||
assert_eq!(agg_req, agg_req_deser);
|
||||
|
||||
let elasticsearch_compatible_json = json!(
|
||||
{
|
||||
"term_agg_test":{
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"shard_size": 2u64,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let agg_req_deser: Aggregations =
|
||||
serde_json::from_str(&serde_json::to_string(&elasticsearch_compatible_json).unwrap())
|
||||
.unwrap();
|
||||
assert_eq!(agg_req, agg_req_deser);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,7 +24,9 @@ use crate::aggregation::bucket::TermsAggregationInternal;
|
||||
/// intermediate results.
|
||||
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct IntermediateAggregationResults {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) metrics: Option<VecWithNames<IntermediateMetricResult>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) buckets: Option<VecWithNames<IntermediateBucketResult>>,
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@
|
||||
//!
|
||||
//! #### Limitations
|
||||
//!
|
||||
//! Currently aggregations work only on single value fast fields of type u64, f64 and i64.
|
||||
//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
|
||||
//! fast fields on text fields.
|
||||
//!
|
||||
//! # JSON Format
|
||||
//! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
|
||||
|
||||
@@ -300,7 +300,7 @@ impl IntFastFieldWriter {
|
||||
/// If the document has more than one value for the given field,
|
||||
/// only the first one is taken in account.
|
||||
///
|
||||
/// Values for string fast fields are skipped.
|
||||
/// Values on text fast fields are skipped.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
match doc.get_first(self.field) {
|
||||
Some(v) => {
|
||||
|
||||
@@ -42,6 +42,11 @@ impl TextOptions {
|
||||
/// Text fast fields will have the term ids stored in the fast field.
|
||||
/// The fast field will be a multivalued fast field.
|
||||
///
|
||||
/// The effective cardinality depends on the tokenizer. When creating fast fields on text
|
||||
/// fields it is recommended to use the "raw" tokenizer, since it will store the original text
|
||||
/// unchanged. The "default" tokenizer will store the terms as lower case and this will be
|
||||
/// reflected in the dictionary.
|
||||
///
|
||||
/// The original text can be retrieved via `ord_to_term` from the dictionary.
|
||||
#[must_use]
|
||||
pub fn set_fast(mut self) -> TextOptions {
|
||||
|
||||
Reference in New Issue
Block a user