mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 00:02:55 +00:00
* Initial impl * Added `Filter` impl in `build_single_agg_segment_collector_with_reader` + Added tests * Added `Filter(FilterBucketResult)` + Made tests work. * Fixed type issues. * Fixed a test. * 8a7a73a: Pass `segment_reader` * Added more tests. * Improved parsing + tests * refactoring * Added more tests. * refactoring: moved parsing code under QueryParser * Use Tantivy syntax instead of ES * Added a sanity check test. * Simplified impl + tests * Added back tests in a more maintable way * nitz. * nitz * implemented very simple fast-path * improved a comment * implemented fast field support * Used `BoundsRange` * Improved fast field impl + tests * Simplified execution. * Fixed exports + nitz * Improved the tests to check to the expected result. * Improved test by checking the whole result JSON * Removed brittle perf checks. * Added efficiency verification tests. * Added one more efficiency check test. * Improved the efficiency tests. * Removed unnecessary parsing code + added direct Query obj * Fixed tests. * Improved tests * Fixed code structure * Fixed lint issues * nitz. * nitz * nitz. * nitz. * nitz. * Added an example * Fixed PR comments. * Applied PR comments + nitz * nitz. * Improved the code. * Fixed a perf issue. * Added batch processing. * Made the example more interesting * Fixed bucket count * Renamed Direct to CustomQuery * Fixed lint issues. * No need for scorer to be an `Option` * nitz * Used BitSet * Added an optimization for AllQuery * Fixed merge issues. * Fixed lint issues. * Added benchmark for FILTER * Removed the Option wrapper. * nitz. * Applied PR comments. * Fixed the AllQuery optimization * Applied PR comments. * feat: used `erased_serde` to allow filter query to be serialized * further improved a comment * Added back tests. * removed an unused method * removed an unused method * Added documentation * nitz. * Added query builder. * Fixed a comment. * Applied PR comments. * Fixed doctest issues. * Added ser/de * Removed bench in test * Fixed a lint issue.
209 lines
6.9 KiB
Rust
209 lines
6.9 KiB
Rust
//! Module for all bucket aggregations.
|
|
//!
|
|
//! BucketAggregations create buckets of documents.
|
|
//! Each bucket is associated with a rule which
|
|
//! determines whether or not a document in the falls into it. In other words, the buckets
|
|
//! effectively define document sets. Buckets are not necessarily disjunct, therefore a document can
|
|
//! fall into multiple buckets. In addition to the buckets themselves, the bucket aggregations also
|
|
//! compute and return the number of documents for each bucket. Bucket aggregations, as opposed to
|
|
//! metric aggregations, can hold sub-aggregations. These sub-aggregations will be aggregated for
|
|
//! the buckets created by their "parent" bucket aggregation. There are different bucket
|
|
//! aggregators, each with a different "bucketing" strategy. Some define a single bucket, some
|
|
//! define fixed number of multiple buckets, and others dynamically create the buckets during the
|
|
//! aggregation process.
|
|
//!
|
|
//! Results of final buckets are [`BucketResult`](super::agg_result::BucketResult).
|
|
//! Results of intermediate buckets are
|
|
//! [`IntermediateBucketResult`](super::intermediate_agg_result::IntermediateBucketResult)
|
|
//!
|
|
//! ## Supported Bucket Aggregations
|
|
//! - [Histogram](HistogramAggregation)
|
|
//! - [DateHistogram](DateHistogramAggregationReq)
|
|
//! - [Range](RangeAggregation)
|
|
//! - [Terms](TermsAggregation)
|
|
|
|
mod filter;
|
|
mod histogram;
|
|
mod range;
|
|
mod term_agg;
|
|
mod term_missing_agg;
|
|
|
|
use std::collections::HashMap;
|
|
use std::fmt;
|
|
|
|
pub use filter::*;
|
|
pub use histogram::*;
|
|
pub use range::*;
|
|
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
|
|
pub use term_agg::*;
|
|
pub use term_missing_agg::*;
|
|
|
|
/// Order for buckets in a bucket aggregation.
|
|
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize, Default)]
|
|
pub enum Order {
|
|
/// Asc order
|
|
#[serde(rename = "asc")]
|
|
Asc,
|
|
/// Desc order
|
|
#[serde(rename = "desc")]
|
|
#[default]
|
|
Desc,
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
/// Order property by which to apply the order
|
|
#[derive(Default)]
|
|
pub enum OrderTarget {
|
|
/// The key of the bucket
|
|
Key,
|
|
/// The doc count of the bucket
|
|
#[default]
|
|
Count,
|
|
/// Order by value of the sub aggregation metric with identified by given `String`.
|
|
///
|
|
/// Only single value metrics are supported currently
|
|
SubAggregation(String),
|
|
}
|
|
|
|
impl From<&str> for OrderTarget {
|
|
fn from(val: &str) -> Self {
|
|
match val {
|
|
"_key" => OrderTarget::Key,
|
|
"_count" => OrderTarget::Count,
|
|
_ => OrderTarget::SubAggregation(val.to_string()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for OrderTarget {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
OrderTarget::Key => f.write_str("_key"),
|
|
OrderTarget::Count => f.write_str("_count"),
|
|
OrderTarget::SubAggregation(agg) => agg.fmt(f),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Set the order. target is either "_count", "_key", or the name of
|
|
/// a metric sub_aggregation.
|
|
///
|
|
/// De/Serializes to elasticsearch compatible JSON.
|
|
///
|
|
/// Examples in JSON format:
|
|
/// { "_count": "asc" }
|
|
/// { "_key": "asc" }
|
|
/// { "average_price": "asc" }
|
|
#[derive(Clone, Default, Debug, PartialEq)]
|
|
pub struct CustomOrder {
|
|
/// The target property by which to sort by
|
|
pub target: OrderTarget,
|
|
/// The order asc or desc
|
|
pub order: Order,
|
|
}
|
|
|
|
impl Serialize for CustomOrder {
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
where S: Serializer {
|
|
let map: HashMap<String, Order> =
|
|
std::iter::once((self.target.to_string(), self.order)).collect();
|
|
map.serialize(serializer)
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for CustomOrder {
|
|
fn deserialize<D>(deserializer: D) -> Result<CustomOrder, D::Error>
|
|
where D: Deserializer<'de> {
|
|
let value = serde_json::Value::deserialize(deserializer)?;
|
|
let return_err = |message, val: serde_json::Value| {
|
|
de::Error::custom(format!(
|
|
"{}, but got {}",
|
|
message,
|
|
serde_json::to_string(&val).unwrap()
|
|
))
|
|
};
|
|
|
|
match value {
|
|
serde_json::Value::Object(map) => {
|
|
if map.len() != 1 {
|
|
return Err(return_err(
|
|
"expected exactly one key-value pair in the order map",
|
|
map.into(),
|
|
));
|
|
}
|
|
|
|
let (key, value) = map.into_iter().next().unwrap();
|
|
let order = serde_json::from_value(value).map_err(de::Error::custom)?;
|
|
|
|
Ok(CustomOrder {
|
|
target: key.as_str().into(),
|
|
order,
|
|
})
|
|
}
|
|
serde_json::Value::Array(arr) => {
|
|
if arr.is_empty() {
|
|
return Err(return_err("unexpected empty array in order", arr.into()));
|
|
}
|
|
if arr.len() != 1 {
|
|
return Err(return_err(
|
|
"only one sort order supported currently",
|
|
arr.into(),
|
|
));
|
|
}
|
|
let entry = arr.into_iter().next().unwrap();
|
|
let map = entry
|
|
.as_object()
|
|
.ok_or_else(|| return_err("expected object as sort order", entry.clone()))?;
|
|
let (key, value) = map.into_iter().next().ok_or_else(|| {
|
|
return_err(
|
|
"expected exactly one key-value pair in the order map",
|
|
entry.clone(),
|
|
)
|
|
})?;
|
|
let order = serde_json::from_value(value.clone()).map_err(de::Error::custom)?;
|
|
|
|
Ok(CustomOrder {
|
|
target: key.as_str().into(),
|
|
order,
|
|
})
|
|
}
|
|
_ => Err(return_err(
|
|
"unexpected type, expected an object or array",
|
|
value,
|
|
)),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn custom_order_serde_test() {
|
|
let order = CustomOrder {
|
|
target: OrderTarget::Key,
|
|
order: Order::Desc,
|
|
};
|
|
|
|
let order_str = serde_json::to_string(&order).unwrap();
|
|
assert_eq!(order_str, "{\"_key\":\"desc\"}");
|
|
let order_deser = serde_json::from_str(&order_str).unwrap();
|
|
|
|
assert_eq!(order, order_deser);
|
|
let order_deser: CustomOrder = serde_json::from_str("[{\"_key\":\"desc\"}]").unwrap();
|
|
assert_eq!(order, order_deser);
|
|
|
|
let order_deser: serde_json::Result<CustomOrder> = serde_json::from_str("{}");
|
|
assert!(order_deser.is_err());
|
|
|
|
let order_deser: serde_json::Result<CustomOrder> = serde_json::from_str("[]");
|
|
assert!(order_deser
|
|
.unwrap_err()
|
|
.to_string()
|
|
.contains("unexpected empty array in order"));
|
|
|
|
let order_deser: serde_json::Result<CustomOrder> =
|
|
serde_json::from_str(r#"[{"_key":"desc"},{"_key":"desc"}]"#);
|
|
assert_eq!(
|
|
order_deser.unwrap_err().to_string(),
|
|
r#"only one sort order supported currently, but got [{"_key":"desc"},{"_key":"desc"}]"#
|
|
);
|
|
}
|