mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 09:00:42 +00:00
Compare commits
14 Commits
paul.masur
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb474cc0f6 | ||
|
|
d47abdf104 | ||
|
|
c11952eb7c | ||
|
|
09667ee9c8 | ||
|
|
333ccf5300 | ||
|
|
60a39a4689 | ||
|
|
f8f3e4277f | ||
|
|
ff1433713a | ||
|
|
ca139d8eb1 | ||
|
|
ac508108aa | ||
|
|
4fbae92187 | ||
|
|
a5d297c75f | ||
|
|
3a6a3de8d7 | ||
|
|
af3c6c0070 |
15
.github/workflows/coverage.yml
vendored
15
.github/workflows/coverage.yml
vendored
@@ -4,6 +4,9 @@ on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Ensures that we cancel running jobs for the same PR / same workflow.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -12,16 +15,20 @@ concurrency:
|
||||
jobs:
|
||||
coverage:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Install Rust
|
||||
run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: taiki-e/install-action@cargo-llvm-cov
|
||||
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
|
||||
- uses: taiki-e/install-action@e4b3a0453201addddc06d3a72db90326aad87084 # cargo-llvm-cov
|
||||
- name: Generate code coverage
|
||||
run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v6
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
continue-on-error: true
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
|
||||
|
||||
10
.github/workflows/long_running.yml
vendored
10
.github/workflows/long_running.yml
vendored
@@ -8,6 +8,9 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
NUM_FUNCTIONAL_TEST_ITERATIONS: 20000
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Ensures that we cancel running jobs for the same PR / same workflow.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -18,10 +21,13 @@ jobs:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Install stable
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
|
||||
49
.github/workflows/scorecard.yml
vendored
Normal file
49
.github/workflows/scorecard.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: OpenSSF Scorecard
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
analysis:
|
||||
name: Scorecards analysis
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed to upload the results to code-scanning dashboard.
|
||||
security-events: write
|
||||
# Needed to publish results
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: 'Checkout code'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: 'Run analysis'
|
||||
uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3
|
||||
with:
|
||||
results_file: results.sarif
|
||||
results_format: sarif
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_results: true
|
||||
|
||||
# Upload the results as artifacts.
|
||||
- name: 'Upload artifact'
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: SARIF file
|
||||
path: results.sarif
|
||||
retention-days: 5
|
||||
|
||||
# Upload the results to GitHub's code scanning dashboard.
|
||||
- name: 'Upload to code-scanning'
|
||||
uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
28
.github/workflows/test.yml
vendored
28
.github/workflows/test.yml
vendored
@@ -9,6 +9,9 @@ on:
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Ensures that we cancel running jobs for the same PR / same workflow.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -19,23 +22,27 @@ jobs:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
checks: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install nightly
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: nightly
|
||||
profile: minimal
|
||||
components: rustfmt
|
||||
- name: Install stable
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
components: clippy
|
||||
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
|
||||
|
||||
- name: Check Formatting
|
||||
run: cargo +nightly fmt --all -- --check
|
||||
@@ -47,7 +54,7 @@ jobs:
|
||||
- name: Check Bench Compilation
|
||||
run: cargo +nightly bench --no-run --profile=dev --all-features
|
||||
|
||||
- uses: actions-rs/clippy-check@v1
|
||||
- uses: actions-rs/clippy-check@b5b5f21f4797c02da247df37026fcd0a5024aa4d # v1.0.7
|
||||
with:
|
||||
toolchain: stable
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -57,6 +64,9 @@ jobs:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
features:
|
||||
@@ -67,17 +77,17 @@ jobs:
|
||||
name: test-${{ matrix.features.label}}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Install stable
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
override: true
|
||||
|
||||
- uses: taiki-e/install-action@nextest
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: taiki-e/install-action@56cc9adf3a3e2c23eafb56e8acaf9d0373cb845a # nextest
|
||||
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
[](https://docs.rs/crate/tantivy/)
|
||||
[](https://github.com/quickwit-oss/tantivy/actions/workflows/test.yml)
|
||||
[](https://codecov.io/gh/quickwit-oss/tantivy)
|
||||
[](https://scorecard.dev/viewer/?uri=github.com/quickwit-oss/tantivy)
|
||||
[](https://discord.gg/MT27AG5EVE)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://crates.io/crates/tantivy)
|
||||
|
||||
@@ -81,6 +81,11 @@ fn bench_agg(mut group: InputGroup<Index>) {
|
||||
register!(group, cardinality_agg);
|
||||
register!(group, terms_status_with_cardinality_agg);
|
||||
register!(group, terms_100_buckets_with_cardinality_agg);
|
||||
register!(group, terms_many_with_single_term_order_by_cardinality_agg);
|
||||
register!(
|
||||
group,
|
||||
terms_many_with_nested_terms_double_order_by_cardinality_agg
|
||||
);
|
||||
|
||||
register!(group, range_agg);
|
||||
register!(group, range_agg_with_avg_sub_agg);
|
||||
@@ -200,6 +205,60 @@ fn terms_100_buckets_with_cardinality_agg(index: &Index) {
|
||||
execute_agg(index, agg_req);
|
||||
}
|
||||
|
||||
fn terms_many_with_single_term_order_by_cardinality_agg(index: &Index) {
|
||||
let agg_req = json!({
|
||||
"my_texts": {
|
||||
"terms": { "field": "text_many_terms" },
|
||||
"aggs": {
|
||||
"nested_terms": {
|
||||
"terms": {
|
||||
"field": "single_term",
|
||||
"order": { "cardinality": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"cardinality": {
|
||||
"cardinality": { "field": "text_many_terms" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
execute_agg(index, agg_req);
|
||||
}
|
||||
|
||||
// Two-level terms ordered by cardinality at each level: a high-card outer terms
|
||||
// (text_many_terms) ordered by a cardinality sub-agg, with a nested low-card terms
|
||||
// (text_few_terms_status) also ordered by a cardinality sub-agg, plus an avg.
|
||||
fn terms_many_with_nested_terms_double_order_by_cardinality_agg(index: &Index) {
|
||||
let agg_req = json!({
|
||||
"by_ip": {
|
||||
"terms": {
|
||||
"field": "text_many_terms",
|
||||
"size": 50,
|
||||
"order": { "distinct_path": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"distinct_path": {
|
||||
"cardinality": { "field": "text_few_terms" }
|
||||
},
|
||||
"by_asn": {
|
||||
"terms": {
|
||||
"field": " single_term",
|
||||
"size": 10,
|
||||
"order": { "distinct_path2": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"avg_botscore": { "avg": { "field": "score" } },
|
||||
"distinct_path2": { "cardinality": { "field": "text_few_terms" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
execute_agg(index, agg_req);
|
||||
}
|
||||
|
||||
fn terms_7(index: &Index) {
|
||||
let agg_req = json!({
|
||||
"my_texts": { "terms": { "field": "text_few_terms_status" } },
|
||||
@@ -609,7 +668,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
|
||||
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
|
||||
)
|
||||
.set_stored();
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype);
|
||||
let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
|
||||
let single_term = schema_builder.add_text_field("single_term", FAST);
|
||||
let json_field = schema_builder.add_json_field("json", FAST);
|
||||
let text_field_all_unique_terms =
|
||||
schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
|
||||
@@ -673,6 +733,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
|
||||
index_writer.add_document(doc!(
|
||||
json_field => json!({"mixed_type": 10.0}),
|
||||
json_field => json!({"mixed_type": 10.0}),
|
||||
single_term => "single_term",
|
||||
single_term => "single_term",
|
||||
text_field => "cool",
|
||||
text_field => "cool",
|
||||
text_field_all_unique_terms => "cool",
|
||||
@@ -707,6 +769,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
|
||||
json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
|
||||
};
|
||||
index_writer.add_document(doc!(
|
||||
single_term => "single_term",
|
||||
text_field => "cool",
|
||||
json_field => json,
|
||||
text_field_all_unique_terms => format!("unique_term_{}", rng.random::<u64>()),
|
||||
|
||||
@@ -199,6 +199,17 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// Composite is a multi-bucket agg with no single value to extract.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentCompositeCollector {
|
||||
|
||||
@@ -559,34 +559,30 @@ mod tests {
|
||||
page_size,
|
||||
agg_req,
|
||||
);
|
||||
if page_idx + 1 < page_count {
|
||||
assert!(
|
||||
res["my_composite"].get("after_key").is_some(),
|
||||
"expected after_key on all but last page"
|
||||
);
|
||||
after_key = Some(res["my_composite"]["after_key"].clone());
|
||||
} else if res["my_composite"].get("after_key").is_some() {
|
||||
// currently we sometime have an after_key on the last page,
|
||||
// check that the next "page" is empty
|
||||
let agg_req_json = json!({
|
||||
"my_composite": {
|
||||
"composite": {
|
||||
"sources": composite_agg_sources,
|
||||
"size": page_size,
|
||||
"after": res["my_composite"]["after_key"].clone(),
|
||||
}
|
||||
}
|
||||
});
|
||||
let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
|
||||
let res = exec_request(agg_req.clone(), index).unwrap();
|
||||
assert_eq!(
|
||||
res["my_composite"]["buckets"],
|
||||
json!([]),
|
||||
"expected no buckets when using after_key from last page, query: {:?}",
|
||||
agg_req
|
||||
);
|
||||
}
|
||||
assert!(
|
||||
res["my_composite"].get("after_key").is_some(),
|
||||
"expected after_key on every non-empty page"
|
||||
);
|
||||
after_key = Some(res["my_composite"]["after_key"].clone());
|
||||
}
|
||||
// Using the after_key from the last page must yield an empty page.
|
||||
let agg_req_json = json!({
|
||||
"my_composite": {
|
||||
"composite": {
|
||||
"sources": composite_agg_sources,
|
||||
"size": page_size,
|
||||
"after": after_key,
|
||||
}
|
||||
}
|
||||
});
|
||||
let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
|
||||
let res = exec_request(agg_req.clone(), index).unwrap();
|
||||
assert_eq!(
|
||||
res["my_composite"]["buckets"],
|
||||
json!([]),
|
||||
"expected no buckets when using after_key from last page, query: {:?}",
|
||||
agg_req
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -711,8 +707,28 @@ mod tests {
|
||||
{"key": {"myterm": "terme"}, "doc_count": 1}
|
||||
])
|
||||
);
|
||||
assert!(res["my_composite"].get("after_key").is_none());
|
||||
|
||||
// paginating past last page should be empty
|
||||
let agg_req_json = json!({
|
||||
"my_composite": {
|
||||
"composite": {
|
||||
"sources": [
|
||||
{"myterm": {"terms": {"field": "string_id"}}}
|
||||
],
|
||||
"size": 3,
|
||||
"after": &res["my_composite"]["after_key"]
|
||||
}
|
||||
}
|
||||
});
|
||||
let agg_req: Aggregations = serde_json::from_value(agg_req_json).unwrap();
|
||||
let res = exec_request(agg_req.clone(), &index).unwrap();
|
||||
assert!(res["my_composite"].get("after_key").is_none());
|
||||
assert_eq!(
|
||||
res["my_composite"]["buckets"],
|
||||
json!([]),
|
||||
"expected no buckets when using after_key from last page, query: {:?}",
|
||||
agg_req
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -820,7 +836,10 @@ mod tests {
|
||||
{"key": {"myterm": "apple"}, "doc_count": 1}
|
||||
])
|
||||
);
|
||||
assert!(res["fruity_aggreg"].get("after_key").is_none());
|
||||
assert_eq!(
|
||||
res["fruity_aggreg"]["after_key"],
|
||||
json!({"myterm": "str:apple"})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1792,7 +1811,14 @@ mod tests {
|
||||
{"key": {"month": ms_timestamp_from_iso_str("2021-02-01T00:00:00Z"), "category": "books"}, "doc_count": 1},
|
||||
]),
|
||||
);
|
||||
assert!(res["my_composite"].get("after_key").is_none());
|
||||
let feb_2021_ns = ms_timestamp_from_iso_str("2021-02-01T00:00:00Z") * 1_000_000;
|
||||
assert_eq!(
|
||||
res["my_composite"]["after_key"],
|
||||
json!({
|
||||
"month": format!("dt:{}", feb_2021_ns),
|
||||
"category": "str:books"
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -674,6 +674,17 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// TODO: forward into the inner `sub_agg` for nested order paths (`filter.metric`).
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Intermediate result for filter aggregation
|
||||
|
||||
@@ -394,6 +394,17 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// Histogram is a multi-bucket agg with no single value to extract.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentHistogramCollector {
|
||||
|
||||
@@ -328,6 +328,17 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// Range is a multi-bucket agg with no single value to extract.
|
||||
None
|
||||
}
|
||||
}
|
||||
/// Build a concrete `SegmentRangeCollector` with either a Vec- or HashMap-backed
|
||||
/// bucket storage, depending on the column type and aggregation level.
|
||||
|
||||
@@ -352,19 +352,15 @@ pub(crate) fn build_segment_term_collector(
|
||||
)));
|
||||
}
|
||||
|
||||
// Validate sub aggregation exists when ordering by sub-aggregation.
|
||||
{
|
||||
if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
|
||||
let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
|
||||
|
||||
node.get_sub_agg(agg_name, &req_data.per_request)
|
||||
.ok_or_else(|| {
|
||||
TantivyError::InvalidArgument(format!(
|
||||
"could not find aggregation with name {agg_name} in metric \
|
||||
sub_aggregations"
|
||||
))
|
||||
})?;
|
||||
}
|
||||
// Validate that the referenced sub-aggregation exists when ordering by one.
|
||||
if let OrderTarget::SubAggregation(sub_agg_name) = &terms_req_data.req.order.target {
|
||||
let (agg_name, _agg_property) = get_agg_name_and_property(sub_agg_name);
|
||||
node.get_sub_agg(agg_name, &req_data.per_request)
|
||||
.ok_or_else(|| {
|
||||
TantivyError::InvalidArgument(format!(
|
||||
"could not find aggregation with name {agg_name} in metric sub_aggregations"
|
||||
))
|
||||
})?;
|
||||
}
|
||||
|
||||
// Build sub-aggregation blueprint if there are children.
|
||||
@@ -887,6 +883,17 @@ impl<TermMap: TermAggregationMap, B: SubAggBuffer> SegmentAggregationCollector
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// Terms is a multi-bucket agg with no single value to extract.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Missing value are represented as a sentinel value in the column.
|
||||
@@ -960,9 +967,6 @@ where
|
||||
) -> crate::Result<IntermediateBucketResult> {
|
||||
let mut entries: Vec<(u64, Bucket)> = term_buckets.into_vec();
|
||||
|
||||
let order_by_sub_aggregation =
|
||||
matches!(term_req.req.order.target, OrderTarget::SubAggregation(_));
|
||||
|
||||
match &term_req.req.order.target {
|
||||
OrderTarget::Key => {
|
||||
// We rely on the fact, that term ordinals match the order of the strings
|
||||
@@ -974,10 +978,37 @@ where
|
||||
entries.sort_unstable_by_key(|bucket| bucket.0);
|
||||
}
|
||||
}
|
||||
OrderTarget::SubAggregation(_name) => {
|
||||
// don't sort and cut off since it's hard to make assumptions on the quality of the
|
||||
// results when cutting off du to unknown nature of the sub_aggregation (possible
|
||||
// to check).
|
||||
OrderTarget::SubAggregation(sub_agg_path) => {
|
||||
// Peek segment-level metric values, sort, then fall through to
|
||||
// `cut_off_buckets`. Like Elasticsearch, we always cut off when ordering
|
||||
// by a sub-agg: top-K results are approximate and may differ from the
|
||||
// global ordering, especially for non-monotonic metrics like avg/min.
|
||||
let coll = sub_agg_collector.as_deref().ok_or_else(|| {
|
||||
TantivyError::InvalidArgument(format!(
|
||||
"Could not find sub-aggregation collector for path {sub_agg_path}"
|
||||
))
|
||||
})?;
|
||||
let (agg_name, agg_prop) = get_agg_name_and_property(sub_agg_path);
|
||||
// Fetch values up-front; otherwise sort would re-compute per comparison
|
||||
let mut keyed: Vec<(f64, (u64, Bucket))> = entries
|
||||
.into_iter()
|
||||
.map(|bucket| {
|
||||
let metric_value = coll
|
||||
.compute_metric_value(bucket.1.bucket_id, agg_name, agg_prop, agg_data)
|
||||
.unwrap_or(0.0);
|
||||
(metric_value, bucket)
|
||||
})
|
||||
.collect();
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
keyed.sort_unstable_by(|a, b| {
|
||||
b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
} else {
|
||||
keyed.sort_unstable_by(|a, b| {
|
||||
a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
}
|
||||
entries = keyed.into_iter().map(|(_, e)| e).collect();
|
||||
}
|
||||
OrderTarget::Count => {
|
||||
if term_req.req.order.order == Order::Desc {
|
||||
@@ -988,11 +1019,8 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
let (term_doc_count_before_cutoff, sum_other_doc_count) = if order_by_sub_aggregation {
|
||||
(0, 0)
|
||||
} else {
|
||||
cut_off_buckets(&mut entries, term_req.req.segment_size as usize)
|
||||
};
|
||||
let (term_doc_count_before_cutoff, sum_other_doc_count) =
|
||||
cut_off_buckets(&mut entries, term_req.req.segment_size as usize);
|
||||
|
||||
let mut dict: FxHashMap<IntermediateKey, IntermediateTermBucketEntry> = Default::default();
|
||||
dict.reserve(entries.len());
|
||||
@@ -1767,6 +1795,263 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terms_aggregation_order_by_cardinality_desc_single_segment() -> crate::Result<()> {
|
||||
terms_aggregation_order_by_cardinality_desc(true)
|
||||
}
|
||||
#[test]
|
||||
fn terms_aggregation_order_by_cardinality_desc_multi_segment() -> crate::Result<()> {
|
||||
terms_aggregation_order_by_cardinality_desc(false)
|
||||
}
|
||||
fn terms_aggregation_order_by_cardinality_desc(merge_segments: bool) -> crate::Result<()> {
|
||||
// Distinct score values per bucket key: A→5, B→1, C→3.
|
||||
// Order by cardinality desc must yield A, C, B.
|
||||
let segment_and_terms = vec![vec![
|
||||
(1.0, "A".to_string()),
|
||||
(2.0, "A".to_string()),
|
||||
(3.0, "A".to_string()),
|
||||
(4.0, "A".to_string()),
|
||||
(5.0, "A".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(1.0, "C".to_string()),
|
||||
(2.0, "C".to_string()),
|
||||
(3.0, "C".to_string()),
|
||||
]];
|
||||
let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
|
||||
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "card": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"card": { "cardinality": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["card"]["value"], 5.0);
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["card"]["value"], 3.0);
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["card"]["value"], 1.0);
|
||||
|
||||
// Asc engages the segment-cutoff path too (monotonic-safe: discarded buckets had
|
||||
// local card >= cutoff, so merged card >= cutoff and they cannot be globally smallest).
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "card": "asc" }
|
||||
},
|
||||
"aggs": {
|
||||
"card": { "cardinality": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
|
||||
|
||||
// size=2 with desc engages the segment cutoff: must keep top-2 by cardinality (A, C),
|
||||
// and `sum_other_doc_count` reflects the dropped B (3 docs).
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"size": 2,
|
||||
"order": { "card": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"card": { "cardinality": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
|
||||
|
||||
// size=2 with asc engages the segment cutoff: must keep bottom-2 by cardinality (B, C).
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"size": 2,
|
||||
"order": { "card": "asc" }
|
||||
},
|
||||
"aggs": {
|
||||
"card": { "cardinality": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terms_aggregation_order_by_sum_single_segment() -> crate::Result<()> {
|
||||
terms_aggregation_order_by_sum(true)
|
||||
}
|
||||
#[test]
|
||||
fn terms_aggregation_order_by_sum_multi_segment() -> crate::Result<()> {
|
||||
terms_aggregation_order_by_sum(false)
|
||||
}
|
||||
fn terms_aggregation_order_by_sum(merge_segments: bool) -> crate::Result<()> {
|
||||
// Per-bucket sums on the U64 `score` column (non-negative => sum is monotonic):
|
||||
// A → 1+2+3+4+5 = 15, B → 1+1+1 = 3, C → 1+2+3 = 6.
|
||||
let segment_and_terms = vec![
|
||||
vec![
|
||||
(1.0, "A".to_string()),
|
||||
(2.0, "A".to_string()),
|
||||
(3.0, "A".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(1.0, "C".to_string()),
|
||||
],
|
||||
vec![
|
||||
(4.0, "A".to_string()),
|
||||
(5.0, "A".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(1.0, "B".to_string()),
|
||||
(2.0, "C".to_string()),
|
||||
(3.0, "C".to_string()),
|
||||
],
|
||||
];
|
||||
let index = get_test_index_from_values_and_terms(merge_segments, &segment_and_terms)?;
|
||||
|
||||
// Desc on a Sum metric engages the fast path (column is U64).
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "total": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"total": { "sum": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["total"]["value"], 15.0);
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["total"]["value"], 6.0);
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["total"]["value"], 3.0);
|
||||
|
||||
// Asc engages the fast path too — discarded buckets had local sum >= cutoff,
|
||||
// and merged sum >= local (non-negative addends), so they cannot be globally smallest.
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "total": "asc" }
|
||||
},
|
||||
"aggs": {
|
||||
"total": { "sum": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "B");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "A");
|
||||
|
||||
// size=2 desc with cutoff: top-2 by sum (A, C).
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"size": 2,
|
||||
"order": { "total": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"total": { "sum": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"].as_array().unwrap().len(), 2);
|
||||
|
||||
// Stats sub-property: ordering by `mystats.sum` on a U64 column also engages.
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "mystats.sum": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"mystats": { "stats": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
|
||||
|
||||
// Sum on a signed column (I64) takes the same cutoff path. Results may be
|
||||
// approximate near the boundary on adversarial data, but for this dataset the
|
||||
// top-K is unambiguous.
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "total": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"total": { "sum": { "field": "score_i64" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
|
||||
|
||||
// Order by extended_stats sub-property exercises compute_metric_value on the
|
||||
// ExtendedStats collector. A→max=5, B→max=1, C→max=3, so desc by max → A, C, B.
|
||||
let agg_req: Aggregations = serde_json::from_value(json!({
|
||||
"my_texts": {
|
||||
"terms": {
|
||||
"field": "string_id",
|
||||
"order": { "ext.max": "desc" }
|
||||
},
|
||||
"aggs": {
|
||||
"ext": { "extended_stats": { "field": "score" } }
|
||||
}
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
let res = exec_request(agg_req, &index)?;
|
||||
assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
|
||||
assert_eq!(res["my_texts"]["buckets"][1]["key"], "C");
|
||||
assert_eq!(res["my_texts"]["buckets"][2]["key"], "B");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn terms_aggregation_test_order_key_single_segment() -> crate::Result<()> {
|
||||
terms_aggregation_test_order_key_merge_segment(true)
|
||||
|
||||
@@ -177,6 +177,17 @@ impl SegmentAggregationCollector for TermMissingAgg {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// TODO: forward to `sub_agg` for nested order paths (`missing_agg>metric`).
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1004,24 +1004,20 @@ impl IntermediateCompositeBucketResult {
|
||||
) -> crate::Result<BucketResult> {
|
||||
let trimmed_entry_vec =
|
||||
trim_composite_buckets(self.entries, &self.orders, self.target_size)?;
|
||||
let after_key = if trimmed_entry_vec.len() == req.size as usize {
|
||||
trimmed_entry_vec
|
||||
.last()
|
||||
.map(|bucket| {
|
||||
let (intermediate_key, _entry) = bucket;
|
||||
intermediate_key
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, intermediate_key)| {
|
||||
let source = &req.sources[idx];
|
||||
(source.name().to_string(), intermediate_key.clone().into())
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap()
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
};
|
||||
let after_key = trimmed_entry_vec
|
||||
.last()
|
||||
.map(|bucket| {
|
||||
let (intermediate_key, _entry) = bucket;
|
||||
intermediate_key
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, intermediate_key)| {
|
||||
let source = &req.sources[idx];
|
||||
(source.name().to_string(), intermediate_key.clone().into())
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let buckets = trimmed_entry_vec
|
||||
.into_iter()
|
||||
|
||||
@@ -445,6 +445,28 @@ impl SegmentAggregationCollector for SegmentCardinalityCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
let req_data = &agg_data.get_cardinality_req_data(self.accessor_idx);
|
||||
if req_data.name != sub_agg_name || !sub_agg_property.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let bucket = self.buckets.get(bucket_id as usize)?.as_ref()?;
|
||||
// For string columns the HLL sketch is empty until materialization; entries holds
|
||||
// the deduplicated term ordinals seen, which is the exact distinct count.
|
||||
// For numeric columns the sketch is populated during collect.
|
||||
if self.column_type == ColumnType::Str {
|
||||
Some(bucket.entries.len() as f64)
|
||||
} else {
|
||||
Some(bucket.cardinality.sketch.estimate().trunc())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -399,6 +399,26 @@ impl SegmentAggregationCollector for SegmentExtendedStatsCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
if self.name != sub_agg_name {
|
||||
return None;
|
||||
}
|
||||
let extended = self.buckets.get(bucket_id as usize)?;
|
||||
// Finalize is a pure read of accumulators — calling it here for the cutoff sort
|
||||
// doesn't disturb the eventual intermediate result.
|
||||
extended
|
||||
.finalize()
|
||||
.get_value(sub_agg_property)
|
||||
.ok()
|
||||
.flatten()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -312,6 +312,26 @@ impl SegmentAggregationCollector for SegmentPercentilesCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
if agg_data.get_metric_req_data(self.accessor_idx).name != sub_agg_name {
|
||||
return None;
|
||||
}
|
||||
let percentile: f64 = sub_agg_property.parse().ok()?;
|
||||
if !(0.0..=100.0).contains(&percentile) {
|
||||
return None;
|
||||
}
|
||||
let bucket = self.buckets.get(bucket_id as usize)?;
|
||||
// DDSketch.quantile is a pure read; calling it here for the cutoff sort does
|
||||
// not affect the intermediate state used for the final result.
|
||||
bucket.sketch.quantile(percentile / 100.0).ok().flatten()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -321,6 +321,40 @@ impl<const COLUMN_TYPE_ID: u8> SegmentAggregationCollector
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
if self.name != sub_agg_name {
|
||||
return None;
|
||||
}
|
||||
let stats = self.buckets.get(bucket_id as usize)?;
|
||||
// The property depends on what we're collecting:
|
||||
// - StatsType::Stats exposes count/sum/min/max/avg via dotted property.
|
||||
// - Single-value kinds (Sum/Count/Min/Max/Average) expect an empty property and return
|
||||
// the value they were configured to collect.
|
||||
let prop = match self.collecting_for {
|
||||
StatsType::Stats if !sub_agg_property.is_empty() => sub_agg_property,
|
||||
StatsType::Sum if sub_agg_property.is_empty() => "sum",
|
||||
StatsType::Count if sub_agg_property.is_empty() => "count",
|
||||
StatsType::Max if sub_agg_property.is_empty() => "max",
|
||||
StatsType::Min if sub_agg_property.is_empty() => "min",
|
||||
StatsType::Average if sub_agg_property.is_empty() => "avg",
|
||||
_ => return None,
|
||||
};
|
||||
match prop {
|
||||
"count" => Some(stats.count as f64),
|
||||
"sum" => Some(stats.sum),
|
||||
"min" if stats.count > 0 => Some(stats.min),
|
||||
"max" if stats.count > 0 => Some(stats.max),
|
||||
"avg" if stats.count > 0 => Some(stats.sum / stats.count as f64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -644,6 +644,17 @@ impl SegmentAggregationCollector for TopHitsSegmentCollector {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
_bucket_id: BucketId,
|
||||
_sub_agg_name: &str,
|
||||
_sub_agg_property: &str,
|
||||
_agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
// top_hits is not a numeric metric and cannot be used as an order target.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -76,6 +76,31 @@ pub trait SegmentAggregationCollector: Debug {
|
||||
fn flush(&mut self, _agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the segment-level metric value of the named direct-child metric for `bucket_id`.
|
||||
///
|
||||
/// Used by parent term aggs that order by a sub-aggregation: the parent sorts on
|
||||
/// this value and cuts off at segment time, matching the approximation tradeoff
|
||||
/// Elasticsearch makes for any sub-agg ordering.
|
||||
///
|
||||
/// `sub_agg_property` is the dotted suffix (e.g. `"sum"` in `mystats.sum`); empty when
|
||||
/// the metric is a single-value kind such as cardinality.
|
||||
///
|
||||
/// Returns `None` only on name mismatch, unknown property, or empty bucket. Implementations
|
||||
/// may finalize their per-bucket state (e.g. compute a percentile from a sketch); calls
|
||||
/// must be idempotent so the final intermediate result is unaffected.
|
||||
///
|
||||
/// No default impl on purpose: every collector must decide explicitly whether it
|
||||
/// produces a metric value, forwards into children (single-bucket aggs), or rejects
|
||||
/// the lookup. A silent `None` default would let a parent term agg's cutoff sort all
|
||||
/// buckets to the same key and drop arbitrary winners.
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64>;
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -137,4 +162,21 @@ impl SegmentAggregationCollector for GenericSegmentAggregationResultsCollector {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_metric_value(
|
||||
&self,
|
||||
bucket_id: BucketId,
|
||||
sub_agg_name: &str,
|
||||
sub_agg_property: &str,
|
||||
agg_data: &AggregationsSegmentCtx,
|
||||
) -> Option<f64> {
|
||||
for agg in &self.aggs {
|
||||
if let Some(value) =
|
||||
agg.compute_metric_value(bucket_id, sub_agg_name, sub_agg_property, agg_data)
|
||||
{
|
||||
return Some(value);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user