mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
* Initial impl * Added `Filter` impl in `build_single_agg_segment_collector_with_reader` + Added tests * Added `Filter(FilterBucketResult)` + Made tests work. * Fixed type issues. * Fixed a test. * 8a7a73a: Pass `segment_reader` * Added more tests. * Improved parsing + tests * refactoring * Added more tests. * refactoring: moved parsing code under QueryParser * Use Tantivy syntax instead of ES * Added a sanity check test. * Simplified impl + tests * Added back tests in a more maintable way * nitz. * nitz * implemented very simple fast-path * improved a comment * implemented fast field support * Used `BoundsRange` * Improved fast field impl + tests * Simplified execution. * Fixed exports + nitz * Improved the tests to check to the expected result. * Improved test by checking the whole result JSON * Removed brittle perf checks. * Added efficiency verification tests. * Added one more efficiency check test. * Improved the efficiency tests. * Removed unnecessary parsing code + added direct Query obj * Fixed tests. * Improved tests * Fixed code structure * Fixed lint issues * nitz. * nitz * nitz. * nitz. * nitz. * Added an example * Fixed PR comments. * Applied PR comments + nitz * nitz. * Improved the code. * Fixed a perf issue. * Added batch processing. * Made the example more interesting * Fixed bucket count * Renamed Direct to CustomQuery * Fixed lint issues. * No need for scorer to be an `Option` * nitz * Used BitSet * Added an optimization for AllQuery * Fixed merge issues. * Fixed lint issues. * Added benchmark for FILTER * Removed the Option wrapper. * nitz. * Applied PR comments. * Fixed the AllQuery optimization * Applied PR comments. * feat: used `erased_serde` to allow filter query to be serialized * further improved a comment * Added back tests. * removed an unused method * removed an unused method * Added documentation * nitz. * Added query builder. * Fixed a comment. * Applied PR comments. * Fixed doctest issues. * Added ser/de * Removed bench in test * Fixed a lint issue.
213 lines
7.2 KiB
Rust
213 lines
7.2 KiB
Rust
// # Filter Aggregation Example
|
|
//
|
|
// This example demonstrates filter aggregations - creating buckets of documents
|
|
// matching specific queries, with nested aggregations computed on each bucket.
|
|
//
|
|
// Filter aggregations are useful for computing metrics on different subsets of
|
|
// your data in a single query, like "average price overall + average price for
|
|
// electronics + count of in-stock items".
|
|
|
|
use serde_json::json;
|
|
use tantivy::aggregation::agg_req::Aggregations;
|
|
use tantivy::aggregation::AggregationCollector;
|
|
use tantivy::query::AllQuery;
|
|
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
|
|
use tantivy::{doc, Index};
|
|
|
|
fn main() -> tantivy::Result<()> {
|
|
// Create a simple product schema
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_text_field("category", TEXT | FAST);
|
|
schema_builder.add_text_field("brand", TEXT | FAST);
|
|
schema_builder.add_u64_field("price", FAST);
|
|
schema_builder.add_f64_field("rating", FAST);
|
|
schema_builder.add_bool_field("in_stock", FAST | INDEXED);
|
|
let schema = schema_builder.build();
|
|
|
|
// Create index and add sample products
|
|
let index = Index::create_in_ram(schema.clone());
|
|
let mut writer = index.writer(50_000_000)?;
|
|
|
|
writer.add_document(doc!(
|
|
schema.get_field("category")? => "electronics",
|
|
schema.get_field("brand")? => "apple",
|
|
schema.get_field("price")? => 999u64,
|
|
schema.get_field("rating")? => 4.5f64,
|
|
schema.get_field("in_stock")? => true
|
|
))?;
|
|
writer.add_document(doc!(
|
|
schema.get_field("category")? => "electronics",
|
|
schema.get_field("brand")? => "samsung",
|
|
schema.get_field("price")? => 799u64,
|
|
schema.get_field("rating")? => 4.2f64,
|
|
schema.get_field("in_stock")? => true
|
|
))?;
|
|
writer.add_document(doc!(
|
|
schema.get_field("category")? => "clothing",
|
|
schema.get_field("brand")? => "nike",
|
|
schema.get_field("price")? => 120u64,
|
|
schema.get_field("rating")? => 4.1f64,
|
|
schema.get_field("in_stock")? => false
|
|
))?;
|
|
writer.add_document(doc!(
|
|
schema.get_field("category")? => "books",
|
|
schema.get_field("brand")? => "penguin",
|
|
schema.get_field("price")? => 25u64,
|
|
schema.get_field("rating")? => 4.8f64,
|
|
schema.get_field("in_stock")? => true
|
|
))?;
|
|
|
|
writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
|
|
// Example 1: Basic filter with metric aggregation
|
|
println!("=== Example 1: Electronics average price ===");
|
|
let agg_req = json!({
|
|
"electronics": {
|
|
"filter": "category:electronics",
|
|
"aggs": {
|
|
"avg_price": { "avg": { "field": "price" } }
|
|
}
|
|
}
|
|
});
|
|
|
|
let agg: Aggregations = serde_json::from_value(agg_req)?;
|
|
let collector = AggregationCollector::from_aggs(agg, Default::default());
|
|
let result = searcher.search(&AllQuery, &collector)?;
|
|
|
|
let expected = json!({
|
|
"electronics": {
|
|
"doc_count": 2,
|
|
"avg_price": { "value": 899.0 }
|
|
}
|
|
});
|
|
assert_eq!(serde_json::to_value(&result)?, expected);
|
|
println!("{}\n", serde_json::to_string_pretty(&result)?);
|
|
|
|
// Example 2: Multiple independent filters
|
|
println!("=== Example 2: Multiple filters in one query ===");
|
|
let agg_req = json!({
|
|
"electronics": {
|
|
"filter": "category:electronics",
|
|
"aggs": { "avg_price": { "avg": { "field": "price" } } }
|
|
},
|
|
"in_stock": {
|
|
"filter": "in_stock:true",
|
|
"aggs": { "count": { "value_count": { "field": "brand" } } }
|
|
},
|
|
"high_rated": {
|
|
"filter": "rating:[4.5 TO *]",
|
|
"aggs": { "count": { "value_count": { "field": "brand" } } }
|
|
}
|
|
});
|
|
|
|
let agg: Aggregations = serde_json::from_value(agg_req)?;
|
|
let collector = AggregationCollector::from_aggs(agg, Default::default());
|
|
let result = searcher.search(&AllQuery, &collector)?;
|
|
|
|
let expected = json!({
|
|
"electronics": {
|
|
"doc_count": 2,
|
|
"avg_price": { "value": 899.0 }
|
|
},
|
|
"in_stock": {
|
|
"doc_count": 3,
|
|
"count": { "value": 3.0 }
|
|
},
|
|
"high_rated": {
|
|
"doc_count": 2,
|
|
"count": { "value": 2.0 }
|
|
}
|
|
});
|
|
assert_eq!(serde_json::to_value(&result)?, expected);
|
|
println!("{}\n", serde_json::to_string_pretty(&result)?);
|
|
|
|
// Example 3: Nested filters - progressive refinement
|
|
println!("=== Example 3: Nested filters ===");
|
|
let agg_req = json!({
|
|
"in_stock": {
|
|
"filter": "in_stock:true",
|
|
"aggs": {
|
|
"electronics": {
|
|
"filter": "category:electronics",
|
|
"aggs": {
|
|
"expensive": {
|
|
"filter": "price:[800 TO *]",
|
|
"aggs": {
|
|
"avg_rating": { "avg": { "field": "rating" } }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
let agg: Aggregations = serde_json::from_value(agg_req)?;
|
|
let collector = AggregationCollector::from_aggs(agg, Default::default());
|
|
let result = searcher.search(&AllQuery, &collector)?;
|
|
|
|
let expected = json!({
|
|
"in_stock": {
|
|
"doc_count": 3, // apple, samsung, penguin
|
|
"electronics": {
|
|
"doc_count": 2, // apple, samsung
|
|
"expensive": {
|
|
"doc_count": 1, // only apple (999)
|
|
"avg_rating": { "value": 4.5 }
|
|
}
|
|
}
|
|
}
|
|
});
|
|
assert_eq!(serde_json::to_value(&result)?, expected);
|
|
println!("{}\n", serde_json::to_string_pretty(&result)?);
|
|
|
|
// Example 4: Filter with sub-aggregation (terms)
|
|
println!("=== Example 4: Filter with terms sub-aggregation ===");
|
|
let agg_req = json!({
|
|
"electronics": {
|
|
"filter": "category:electronics",
|
|
"aggs": {
|
|
"by_brand": {
|
|
"terms": { "field": "brand" },
|
|
"aggs": {
|
|
"avg_price": { "avg": { "field": "price" } }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
let agg: Aggregations = serde_json::from_value(agg_req)?;
|
|
let collector = AggregationCollector::from_aggs(agg, Default::default());
|
|
let result = searcher.search(&AllQuery, &collector)?;
|
|
|
|
let expected = json!({
|
|
"electronics": {
|
|
"doc_count": 2,
|
|
"by_brand": {
|
|
"buckets": [
|
|
{
|
|
"key": "samsung",
|
|
"doc_count": 1,
|
|
"avg_price": { "value": 799.0 }
|
|
},
|
|
{
|
|
"key": "apple",
|
|
"doc_count": 1,
|
|
"avg_price": { "value": 999.0 }
|
|
}
|
|
],
|
|
"sum_other_doc_count": 0,
|
|
"doc_count_error_upper_bound": 0
|
|
}
|
|
}
|
|
});
|
|
assert_eq!(serde_json::to_value(&result)?, expected);
|
|
println!("{}", serde_json::to_string_pretty(&result)?);
|
|
|
|
Ok(())
|
|
}
|