Compare commits

..

1 Commits

Author SHA1 Message Date
Paul Masurel
468850e9f4 Buffer up to 2048 doc ids in for_each_docset_buffered
The no-score collection path (Weight::for_each_no_score) handed the
collector's collect_block one COLLECT_BLOCK_BUFFER_LEN (64) block at a
time. For aggregations this is the dominant path, and 64 docs per
collect_block under-amortizes the per-call overhead.

for_each_docset_buffered now owns a 2048-element heap buffer and fills
it through successive fill_buffer calls over 64-element windows, flushing
a single larger block to collect_block. fill_buffer keeps its 64-element
window contract, so no DocSet implementation changes.

The buffer is allocated with Box::new_zeroed_slice (stable since 1.92,
hence the MSRV bump) to zero directly on the heap.
2026-06-01 15:25:39 +02:00
40 changed files with 559 additions and 2551 deletions

View File

@@ -20,7 +20,7 @@ jobs:
contents: read
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install Rust
run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
@@ -28,7 +28,7 @@ jobs:
- name: Generate code coverage
run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
- name: Upload coverage to Codecov
uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos

View File

@@ -25,7 +25,7 @@ jobs:
contents: read
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install stable
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:

View File

@@ -22,7 +22,7 @@ jobs:
steps:
- name: 'Checkout code'
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
@@ -36,7 +36,7 @@ jobs:
# Upload the results as artifacts.
- name: 'Upload artifact'
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: SARIF file
path: results.sarif
@@ -44,6 +44,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: 'Upload to code-scanning'
uses: github/codeql-action/upload-sarif@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
with:
sarif_file: results.sarif

View File

@@ -27,7 +27,7 @@ jobs:
checks: write
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install nightly
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
@@ -77,7 +77,7 @@ jobs:
name: test-${{ matrix.features.label}}
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install stable
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7

View File

@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
readme = "README.md"
keywords = ["search", "information", "retrieval"]
edition = "2021"
rust-version = "1.86"
rust-version = "1.92"
exclude = ["benches/*.json", "benches/*.txt"]
[dependencies]

View File

@@ -66,9 +66,6 @@ fn bench_agg(mut group: InputGroup<Index>) {
register!(group, terms_status_with_terms_zipf_1000_sub_agg);
register!(group, terms_zipf_1000_with_terms_status_sub_agg);
register!(group, terms_status_with_histogram);
register!(group, terms_status_with_date_histogram);
register!(group, terms_status_with_date_histogram_hard_bounds);
register!(group, terms_status_with_date_histogram_and_sibling_terms);
register!(group, terms_zipf_1000);
register!(group, terms_zipf_1000_with_histogram);
register!(group, terms_zipf_1000_with_avg_sub_agg);
@@ -393,57 +390,6 @@ fn terms_status_with_histogram(index: &Index) {
execute_agg(index, agg_req);
}
fn terms_status_with_date_histogram(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"over_time": { "date_histogram": { "field": "timestamp", "fixed_interval": "1h" } }
}
}
});
execute_agg(index, agg_req);
}
/// Same fused terms × date_histogram, but with `hard_bounds`. The timestamps span 0..120h; the
/// bounds drop only the first and last hour (ms: 1h=3_600_000, 119h=428_400_000), so almost every
/// doc is in-bounds. This exercises the collector's hard-bounds path: `bounds.contains` runs per
/// doc (the `all_docs_in_bounds` short-circuit is off) and the rare out-of-bounds doc takes the
/// `term_counts` branch.
fn terms_status_with_date_histogram_hard_bounds(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"over_time": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "1h",
"hard_bounds": { "min": 3_600_000, "max": 428_400_000 }
}
}
}
}
});
execute_agg(index, agg_req);
}
/// Same fused terms × date_histogram, but with a sibling terms aggregation next to it. The fused
/// fast path should still trigger for `my_texts` (sibling aggregations are independent top-level
/// aggregations, so they don't change its eligibility).
fn terms_status_with_date_histogram_and_sibling_terms(index: &Index) {
let agg_req = json!({
"my_texts": {
"terms": { "field": "text_few_terms_status" },
"aggs": {
"over_time": { "date_histogram": { "field": "timestamp", "fixed_interval": "1h" } }
}
},
"other_texts": { "terms": { "field": "text_few_terms" } }
});
execute_agg(index, agg_req);
}
fn terms_zipf_1000_with_histogram(index: &Index) {
let agg_req = json!({
"my_texts": {
@@ -837,9 +783,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
doc_with_value /= 20;
}
let _val_max = 1_000_000.0;
const SPAN_MS: i64 = 120 * 3600 * 1000; // 120 hours in ms
const NOISE_MS: i64 = 2 * 3600 * 1000; // ±2h noise
for i in 0..doc_with_value {
for _ in 0..doc_with_value {
let val: f64 = rng.random_range(0.0..1_000_000.0);
let json = if rng.random_bool(0.1) {
// 10% are numeric values
@@ -847,9 +791,6 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
} else {
json!({"mixed_type": many_terms_data.choose(&mut rng).unwrap().to_string()})
};
let base_ms = (i as i64 * SPAN_MS) / doc_with_value as i64;
let noise_ms = rng.random_range(-NOISE_MS..NOISE_MS);
let ts_ms = (base_ms + noise_ms).clamp(0, SPAN_MS);
index_writer.add_document(doc!(
single_term => "single_term",
text_field => "cool",
@@ -862,7 +803,7 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
score_field => val as u64,
score_field_f64 => lg_norm.sample(&mut rng),
score_field_i64 => val as i64,
date_field => DateTime::from_timestamp_millis(ts_ms),
date_field => DateTime::from_timestamp_millis((val * 1_000_000.) as i64),
))?;
if cardinality == Cardinality::OptionalSparse {
for _ in 0..20 {

View File

@@ -110,31 +110,43 @@ fn main() {
// Prepare corpora with varying scenarios
let scenarios = vec![
(
"dense and 0.1% a".to_string(),
5_000_000,
0.001,
"dense and 99% a".to_string(),
10_000_000,
0.99,
"dense",
0,
9,
),
("dense and 1% a".to_string(), 5_000_000, 0.01, "dense", 0, 9),
("dense and 10% a".to_string(), 5_000_000, 0.1, "dense", 0, 9),
(
"sparse and 50% a".to_string(),
5_000_000,
"dense and 99% a".to_string(),
10_000_000,
0.99,
"dense",
990,
999,
),
(
"sparse and 99% a".to_string(),
10_000_000,
0.99,
"sparse",
0,
9,
),
(
"sparse and 99% a".to_string(),
10_000_000,
0.99,
"sparse",
9_999_990,
9_999_999,
),
];
let mut runner = BenchRunner::new();
for (scenario_id, num_docs, p_title_a, num_rand_distribution, range_low, range_high) in
scenarios
{
for (scenario_id, n, p_title_a, num_rand_distribution, range_low, range_high) in scenarios {
// Build index for this scenario
let bench_index = build_shared_indices(num_docs, p_title_a, num_rand_distribution);
let bench_index = build_shared_indices(n, p_title_a, num_rand_distribution);
// Create benchmark group
let mut group = runner.new_group();
@@ -146,7 +158,7 @@ fn main() {
let field_names = ["num_rand", "num_asc", "num_rand_fast", "num_asc_fast"];
// Define the three terms we want to test with
let terms = ["a"];
let terms = ["a", "b", "z"];
// Generate all combinations of terms and field names
let mut queries = Vec::new();
@@ -191,7 +203,7 @@ fn run_benchmark_tasks(
bench_index,
query_str,
DocSetCollector,
"all_results",
"all results",
);
// Test top 100 by the field (if it's a FAST field)

View File

@@ -18,10 +18,5 @@ homepage = "https://github.com/quickwit-oss/tantivy"
bitpacking = { version = "0.9.2", default-features = false, features = ["bitpacker1x"] }
[dev-dependencies]
binggan = "0.17.0"
rand = "0.9"
proptest = "1"
[[bench]]
name = "bench"
harness = false

View File

@@ -1,110 +1,65 @@
use std::cell::RefCell;
#![feature(test)]
use binggan::{BenchRunner, black_box};
use rand::rng;
use rand::seq::IteratorRandom;
use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};
extern crate test;
fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
let mut bitpacker = BitPacker::new();
let mut buffer = Vec::new();
for _ in 0..num_els {
bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
bitpacker.flush(&mut buffer).unwrap();
}
buffer
}
#[cfg(test)]
mod tests {
use rand::rng;
use rand::seq::IteratorRandom;
use tantivy_bitpacker::{BitPacker, BitUnpacker, BlockedBitpacker};
use test::Bencher;
const N: usize = 100_000;
const MAX_VAL: u64 = 1_000;
const BIT_WIDTH: u8 = 10; // 2^10 = 1024 > MAX_VAL
fn create_packed_data() -> (BitUnpacker, Vec<u8>) {
let mut bitpacker = BitPacker::new();
let mut data = Vec::new();
for i in 0..N as u64 {
let val = i * MAX_VAL / N as u64;
bitpacker.write(val, BIT_WIDTH, &mut data).unwrap();
}
bitpacker.close(&mut data).unwrap();
(BitUnpacker::new(BIT_WIDTH), data)
}
fn bench_bitpacking() {
let mut runner = BenchRunner::new();
let bit_width = 3;
let num_els = 1_000_000u32;
let bit_unpacker = BitUnpacker::new(bit_width);
let data = create_bitpacked_data(bit_width, num_els);
let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
runner.bench_function("bitpacking_read", move |_| {
let mut out = 0u64;
for &idx in &idxs {
out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
#[inline(never)]
fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
let mut bitpacker = BitPacker::new();
let mut buffer = Vec::new();
for _ in 0..num_els {
// the values do not matter.
bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
bitpacker.flush(&mut buffer).unwrap();
}
black_box(out);
});
}
fn bench_blocked_bitpacker() {
let mut runner = BenchRunner::new();
let mut blocked_bitpacker = BlockedBitpacker::new();
for val in 0..=21500 {
blocked_bitpacker.add(val * val);
buffer
}
runner.bench_function("blockedbitp_read", move |_| {
let mut out = 0u64;
for val in 0..=21500 {
out = out.wrapping_add(blocked_bitpacker.get(val));
}
black_box(out);
});
runner.bench_function("blockedbitp_create", |_| {
#[bench]
fn bench_bitpacking_read(b: &mut Bencher) {
let bit_width = 3;
let num_els = 1_000_000u32;
let bit_unpacker = BitUnpacker::new(bit_width);
let data = create_bitpacked_data(bit_width, num_els);
let idxs: Vec<u32> = (0..num_els).choose_multiple(&mut rng(), 100_000);
b.iter(|| {
let mut out = 0u64;
for &idx in &idxs {
out = out.wrapping_add(bit_unpacker.get(idx, &data[..]));
}
out
});
}
#[bench]
fn bench_blockedbitp_read(b: &mut Bencher) {
let mut blocked_bitpacker = BlockedBitpacker::new();
for val in 0..=21500 {
blocked_bitpacker.add(val * val);
}
black_box(blocked_bitpacker);
});
}
fn bench_filter_vec() {
let mut runner = BenchRunner::new();
let (unpacker, data) = create_packed_data();
let positions = RefCell::new(Vec::with_capacity(N));
runner.bench_function("filter_vec_dense", move |_| {
unpacker.get_ids_for_value_range(
250..=750,
0..N as u32,
&data,
&mut positions.borrow_mut(),
);
black_box(positions.borrow().len());
});
let (unpacker, data) = create_packed_data();
let positions = RefCell::new(Vec::with_capacity(N));
runner.bench_function("filter_vec_sparse", move |_| {
unpacker.get_ids_for_value_range(0..=50, 0..N as u32, &data, &mut positions.borrow_mut());
black_box(positions.borrow().len());
});
let (unpacker, data) = create_packed_data();
let positions = RefCell::new(Vec::with_capacity(N));
runner.bench_function("filter_vec_full", move |_| {
unpacker.get_ids_for_value_range(
0..=MAX_VAL,
0..N as u32,
&data,
&mut positions.borrow_mut(),
);
black_box(positions.borrow().len());
});
}
fn main() {
bench_bitpacking();
bench_blocked_bitpacker();
bench_filter_vec();
b.iter(|| {
let mut out = 0u64;
for val in 0..=21500 {
out = out.wrapping_add(blocked_bitpacker.get(val));
}
out
});
}
#[bench]
fn bench_blockedbitp_create(b: &mut Bencher) {
b.iter(|| {
let mut blocked_bitpacker = BlockedBitpacker::new();
for val in 0..=21500 {
blocked_bitpacker.add(val * val);
}
blocked_bitpacker
});
}
}

View File

@@ -1,17 +1,8 @@
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
use std::arch::is_aarch64_feature_detected;
use std::ops::RangeInclusive;
#[cfg(target_arch = "x86_64")]
mod avx2;
#[cfg(target_arch = "aarch64")]
mod neon;
// SVE intrinsics are not exposed on aarch64-apple-darwin.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
mod sve;
mod scalar;
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
@@ -19,10 +10,6 @@ mod scalar;
enum FilterImplPerInstructionSet {
#[cfg(target_arch = "x86_64")]
AVX2 = 0u8,
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
SVE = 3u8,
#[cfg(target_arch = "aarch64")]
Neon = 2u8,
Scalar = 1u8,
}
@@ -32,57 +19,29 @@ impl FilterImplPerInstructionSet {
match *self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
FilterImplPerInstructionSet::SVE => is_aarch64_feature_detected!("sve"),
// TIL Neon is required on aarch 64.
#[cfg(target_arch = "aarch64")]
FilterImplPerInstructionSet::Neon => true,
FilterImplPerInstructionSet::Scalar => true,
}
}
}
// List of available implementations in preferred order.
// List of available implementation in preferred order.
#[cfg(target_arch = "x86_64")]
const IMPLS: [FilterImplPerInstructionSet; 2] = [
FilterImplPerInstructionSet::AVX2,
FilterImplPerInstructionSet::Scalar,
];
// Non-Apple aarch64: try SVE, NEON, Scalar.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
const IMPLS: [FilterImplPerInstructionSet; 3] = [
FilterImplPerInstructionSet::SVE,
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
];
// Apple aarch64 (M-series): SVE not available; use NEON or Scalar.
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
const IMPLS: [FilterImplPerInstructionSet; 2] = [
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
];
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
#[cfg(not(target_arch = "x86_64"))]
const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];
impl FilterImplPerInstructionSet {
#[inline]
#[allow(unused_variables)]
#[allow(unused_variables)] // on non-x86_64, code is unused.
fn from(code: u8) -> FilterImplPerInstructionSet {
#[cfg(target_arch = "x86_64")]
if code == FilterImplPerInstructionSet::AVX2 as u8 {
return FilterImplPerInstructionSet::AVX2;
}
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
if code == FilterImplPerInstructionSet::SVE as u8 {
return FilterImplPerInstructionSet::SVE;
}
#[cfg(target_arch = "aarch64")]
if code == FilterImplPerInstructionSet::Neon as u8 {
return FilterImplPerInstructionSet::Neon;
}
FilterImplPerInstructionSet::Scalar
}
@@ -91,13 +50,6 @@ impl FilterImplPerInstructionSet {
match self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
// SAFETY: SVE availability was verified by is_available() before selecting this impl.
FilterImplPerInstructionSet::SVE => unsafe {
sve::filter_vec_in_place(range, offset, output)
},
#[cfg(target_arch = "aarch64")]
FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
FilterImplPerInstructionSet::Scalar => {
scalar::filter_vec_in_place(range, offset, output)
}
@@ -105,12 +57,6 @@ impl FilterImplPerInstructionSet {
}
}
fn available_impls() -> impl Iterator<Item = FilterImplPerInstructionSet> {
IMPLS
.into_iter()
.filter(FilterImplPerInstructionSet::is_available)
}
#[inline]
fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
use std::sync::atomic::{AtomicU8, Ordering};
@@ -118,7 +64,10 @@ fn get_best_available_instruction_set() -> FilterImplPerInstructionSet {
let instruction_set_byte: u8 = INSTRUCTION_SET_BYTE.load(Ordering::Relaxed);
if instruction_set_byte == u8::MAX {
// Let's initialize the instruction set and cache it.
let instruction_set = available_impls().next().unwrap();
let instruction_set = IMPLS
.into_iter()
.find(FilterImplPerInstructionSet::is_available)
.unwrap();
INSTRUCTION_SET_BYTE.store(instruction_set as u8, Ordering::Relaxed);
return instruction_set;
}
@@ -131,12 +80,12 @@ pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut
#[cfg(test)]
mod tests {
use proptest::strategy::Strategy;
use super::*;
#[test]
fn test_get_best_available_instruction_set() {
// This does not test much unfortunately.
// We just make sure the function returns without crashing and returns the same result.
let instruction_set = get_best_available_instruction_set();
assert_eq!(get_best_available_instruction_set(), instruction_set);
}
@@ -153,31 +102,6 @@ mod tests {
}
}
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[test]
fn test_instruction_set_to_code_from_code() {
for instruction_set in [
FilterImplPerInstructionSet::SVE,
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
] {
let code = instruction_set as u8;
assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
}
}
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
#[test]
fn test_instruction_set_to_code_from_code() {
for instruction_set in [
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
] {
let code = instruction_set as u8;
assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
}
}
fn test_filter_impl_empty_aux(filter_impl: FilterImplPerInstructionSet) {
let mut output = vec![];
filter_impl.filter_vec_in_place(0..=u32::MAX, 0, &mut output);
@@ -202,20 +126,11 @@ mod tests {
assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
}
fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) {
// start > end: RangeInclusive::contains always returns false; output must be empty.
// The SVE path's wrapping_sub would otherwise produce a huge range_width.
let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
filter_impl.filter_vec_in_place(10..=5, 0, &mut output);
assert_eq!(&output, &[]);
}
fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
test_filter_impl_empty_aux(filter_impl);
test_filter_impl_simple_aux(filter_impl);
test_filter_impl_simple_aux_shifted(filter_impl);
test_filter_impl_simple_outside_i32_range(filter_impl);
test_filter_impl_empty_range_aux(filter_impl);
}
#[test]
@@ -226,60 +141,25 @@ mod tests {
}
}
#[test]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
fn test_filter_implementation_sve() {
if FilterImplPerInstructionSet::SVE.is_available() {
test_filter_impl_test_suite(FilterImplPerInstructionSet::SVE);
}
}
#[test]
#[cfg(target_arch = "aarch64")]
fn test_filter_implementation_neon() {
test_filter_impl_test_suite(FilterImplPerInstructionSet::Neon);
}
#[test]
fn test_filter_implementation_scalar() {
test_filter_impl_test_suite(FilterImplPerInstructionSet::Scalar);
}
fn max_val_strategy() -> impl proptest::strategy::Strategy<Value = u32> {
proptest::prop_oneof![
0u32..10u32,
255u32..258u32,
proptest::prelude::Just(1u32 << 25),
proptest::prelude::Just(u32::MAX - 1),
proptest::prelude::Just(u32::MAX),
]
}
fn vals_strategy() -> impl proptest::strategy::Strategy<Value = Vec<u32>> {
proptest::prop_oneof![
proptest::collection::vec(proptest::prelude::any::<u32>(), 0..300),
max_val_strategy()
.prop_flat_map(|max_val| { proptest::collection::vec(0..=max_val, 0..300) })
]
}
#[cfg(target_arch = "x86_64")]
proptest::proptest! {
#[test]
fn test_filter_compare_scalar_and_impls_impl_proptest(
start in 0u32..400u32,
end in 0u32..400u32,
fn test_filter_compare_scalar_and_avx2_impl_proptest(
start in proptest::prelude::any::<u32>(),
end in proptest::prelude::any::<u32>(),
offset in 0u32..2u32,
vals in vals_strategy()) {
for implementation in available_impls() {
if implementation == FilterImplPerInstructionSet::Scalar {
continue;
}
let mut impl_output = vals.clone();
let mut scalar_output = vals.clone();
implementation.filter_vec_in_place(start..=end, offset, &mut impl_output);
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut scalar_output);
assert_eq!(&impl_output, &scalar_output);
}
mut vals in proptest::collection::vec(0..u32::MAX, 0..30)) {
if FilterImplPerInstructionSet::AVX2.is_available() {
let mut vals_clone = vals.clone();
FilterImplPerInstructionSet::AVX2.filter_vec_in_place(start..=end, offset, &mut vals);
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
assert_eq!(&vals, &vals_clone);
}
}
}
}

View File

@@ -1,118 +0,0 @@
use std::arch::aarch64::*;
use std::ops::RangeInclusive;
const NUM_LANES: usize = 4;
// Compacts matching lanes to the front using a byte-level shuffle.
// `mask` is a 4-bit value: bit k=1 means lane k should appear in the output.
#[inline]
#[target_feature(enable = "neon")]
unsafe fn compact(data: uint32x4_t, mask: u8) -> uint32x4_t {
unsafe {
// SAFETY: mask is always in [0, 15] by construction (max sum of [1,2,4,8]).
// BYTE_SHUFFLE_TABLE has 16 entries, so this is always in bounds.
let shuffle = BYTE_SHUFFLE_TABLE.get_unchecked(mask as usize);
let shuffle_vec = vld1q_u8(shuffle.as_ptr());
vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(data), shuffle_vec))
}
}
// Safe (not unsafe) because NEON is mandatory on aarch64: no runtime feature check needed.
#[inline(never)]
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
let num_words = output.len() / NUM_LANES;
let mut output_len = unsafe {
filter_vec_neon_aux(
output.as_ptr(),
range.clone(),
output.as_mut_ptr(),
offset,
num_words,
)
};
let remainder_start = num_words * NUM_LANES;
for i in remainder_start..output.len() {
let val = output[i];
output[output_len] = offset + i as u32;
output_len += if range.contains(&val) { 1 } else { 0 };
}
output.truncate(output_len);
}
#[target_feature(enable = "neon")]
unsafe fn filter_vec_neon_aux(
input: *const u32,
range: RangeInclusive<u32>,
output: *mut u32,
offset: u32,
num_words: usize,
) -> usize {
unsafe {
let mut input = input;
let mut output_tail = output;
let range_start_simd = vdupq_n_u32(*range.start());
let range_end_simd = vdupq_n_u32(*range.end());
let mut ids = vld1q_u32([offset, offset + 1, offset + 2, offset + 3].as_ptr());
let shift = vdupq_n_u32(NUM_LANES as u32);
let bit_weights = vld1q_u32([1u32, 2, 4, 8].as_ptr());
for _ in 0..num_words {
let word = vld1q_u32(input);
// Unsigned compares: CMHS (compare higher or same) tests `word >= start`
// and `end >= word`. ANDing both gives the inside-range mask directly,
// which is cheaper than computing `outside` and then negating.
let ge_start = vcgeq_u32(word, range_start_simd);
let le_end = vcleq_u32(word, range_end_simd);
// inside[k] = 0xFFFFFFFF if val[k] is in range, 0 otherwise.
let inside = vandq_u32(ge_start, le_end);
// Build the 4-bit mask: AND bit_weights with the inside lane mask, so each
// inside lane contributes its bit_weight (1, 2, 4, or 8). Summing yields the
// 4-bit mask in one addv.
let inside_bits = vandq_u32(bit_weights, inside);
let mask = vaddvq_u32(inside_bits) as u8;
// mask is mathematically bounded: max value is 1+2+4+8=15 (all lanes match)
debug_assert!(mask <= 15, "mask must fit in 4 bits: {}", mask);
// Count of matching lanes = popcount(mask). Derives the count directly from
// the mask instead of running a parallel SIMD reduction over `outside`.
let added_len = mask.count_ones() as usize;
// Safe because mask is guaranteed to be in [0, 15]
let filtered_ids = compact(ids, mask);
vst1q_u32(output_tail, filtered_ids);
output_tail = output_tail.add(added_len);
ids = vaddq_u32(ids, shift);
input = input.add(NUM_LANES);
}
output_tail.offset_from(output) as usize
}
}
// Byte shuffle patterns to compact matching lanes to the front of the vector.
// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
// The j-th set bit determines which input lane goes to output position j.
const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
[
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
], // 0b0000: none
[0, 1, 2, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0001: lane 0
[4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0010: lane 1
[0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0011: lanes 0,1
[8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0100: lane 2
[0, 1, 2, 3, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0101: lanes 0,2
[4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0110: lanes 1,2
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16], // 0b0111: lanes 0,1,2
[
12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
], // 0b1000: lane 3
[0, 1, 2, 3, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1001: lanes 0,3
[4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1010: lanes 1,3
[0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1011: lanes 0,1,3
[8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1100: lanes 2,3
[0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1101: lanes 0,2,3
[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1110: lanes 1,2,3
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // 0b1111: all lanes
];

View File

@@ -1,260 +0,0 @@
use std::ops::RangeInclusive;
// SVE vector length (in u32 lanes) is not a compile-time constant; query at runtime.
// Safe to call only when SVE is confirmed available via is_aarch64_feature_detected!("sve").
#[target_feature(enable = "sve")]
unsafe fn num_lanes() -> usize {
let vl: usize;
unsafe {
core::arch::asm!(
"cntw {vl}",
vl = out(reg) vl,
options(nostack, nomem, preserves_flags),
);
}
vl
}
// SAFETY: caller must ensure SVE is available (checked via is_aarch64_feature_detected!("sve")).
// Unlike NEON, SVE is optional on aarch64 and not guaranteed by the target architecture.
pub unsafe fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
if range.start() > range.end() {
output.clear();
return;
}
let vl = unsafe { num_lanes() };
let num_words = output.len() / vl;
let range_start = *range.start();
// Unsigned subtraction trick: val ∈ [lo, hi] ↔ (val - lo) ≤ᵤ (hi - lo).
// Values below lo wrap around to large u32, so the single unsigned ≤ excludes them.
let range_width = range.end().wrapping_sub(range_start);
let mut output_len = unsafe {
filter_vec_sve_aux(
output.as_ptr(),
range_start,
range_width,
output.as_mut_ptr(),
offset,
num_words,
vl,
)
};
let remainder_start = num_words * vl;
for i in remainder_start..output.len() {
let val = output[i];
output[output_len] = offset + i as u32;
output_len += if range.contains(&val) { 1 } else { 0 };
}
output.truncate(output_len);
}
// Register allocation for the asm! blocks:
// z0 ids_a (index vector for first half of each pair, advances by step2 each iter)
// z1 range_width broadcast
// z2 range_start broadcast
// z3 step2 broadcast (2 * vl)
// z4 ids_b (index vector for second half, = ids_a + step, advances by step2)
// z5 scratch: loaded word_a, then compacted_a
// z6 scratch: loaded word_b, then compacted_b
// p0 all-true predicate (ptrue p0.s)
// p1 in-range mask for word_a
// p2 in-range mask for word_b
#[target_feature(enable = "sve")]
unsafe fn filter_vec_sve_aux(
input: *const u32,
range_start: u32,
range_width: u32,
output: *mut u32,
offset: u32,
num_words: usize,
vl: usize,
) -> usize {
let num_pairs = num_words / 2;
let mut input_ptr = input;
let mut output_tail = output;
if num_pairs > 0 {
unsafe {
// We rely on asm! because the SVE intrinsics are not available in stable Rust.
// The code that follows was generated by Rustc nightly based on the intrinsics version
// at the bottom of this file.
core::arch::asm!(
// --- Setup ---
// All-true predicate for 32-bit lanes.
"ptrue p0.s",
// ids_a = [offset, offset+1, offset+2, ...]
"index z0.s, {offset:w}, #1",
// Broadcast scalars into SVE vectors.
"mov z1.s, {range_width:w}",
"mov z2.s, {range_start:w}",
// vl_gpr = number of 32-bit lanes (cntw).
"cntw {vl_gpr}",
// step2_bytes will first hold 2*vl (for the step2 vector), then 2*VL in bytes.
"lsl {step2_bytes}, {vl_gpr}, #1",
// z4 = step = [vl, vl, ...]; will become ids_b after the add below.
"mov z4.s, {vl_gpr:w}",
// z3 = step2 = [2*vl, 2*vl, ...], used to advance both id vectors each iter.
"mov z3.s, {step2_bytes:w}",
// Repurpose step2_bytes to hold the byte stride for advancing the input pointer
// by two full SVE vectors per iteration.
"rdvl {step2_bytes}, #2",
// ids_b = ids_a + step = [offset+vl, offset+vl+1, ...]
"add z4.s, z0.s, z4.s",
// --- Main loop: process two SVE vectors (ids_a and ids_b) per iteration ---
"0:",
// Load two consecutive SVE vectors from input.
"ld1w {{z5.s}}, p0/z, [{input}]",
"ld1w {{z6.s}}, p0/z, [{input}, #1, mul vl]",
// Advance input pointer by 2 * VL bytes.
"add {input}, {input}, {step2_bytes}",
// Unsigned shift: subtract range_start so in-range check becomes a single cmpu ≤.
"sub z5.s, z5.s, z2.s",
"sub z6.s, z6.s, z2.s",
// in_range: shifted value ≤ range_width (unsigned, so values below lo also fail).
"cmphs p1.s, p0/z, z1.s, z5.s",
"cmphs p2.s, p0/z, z1.s, z6.s",
// Count matching lanes; both cntp calls have independent inputs for OOO parallelism.
"cntp {cnt_a}, p0, p1.s",
"compact z5.s, p1, z0.s",
"compact z6.s, p2, z4.s",
"cntp {cnt_b}, p0, p2.s",
// Advance id vectors for the next iteration.
"add z0.s, z0.s, z3.s",
"add z4.s, z4.s, z3.s",
// Store compacted ids. Only the first cnt_a / cnt_b slots are valid; the rest
// will be overwritten by subsequent iterations before the final truncate.
"str z5, [{out}]",
"st1w {{z6.s}}, p0, [{out}, {cnt_a}, lsl #2]",
"add {out}, {out}, {cnt_a}, lsl #2",
"add {out}, {out}, {cnt_b}, lsl #2",
"subs {pairs}, {pairs}, #1",
"b.ne 0b",
// --- Operands ---
input = inout(reg) input_ptr,
out = inout(reg) output_tail,
pairs = inout(reg) num_pairs => _,
offset = in(reg) offset,
range_start = in(reg) range_start,
range_width = in(reg) range_width,
vl_gpr = out(reg) _,
step2_bytes = out(reg) _,
cnt_a = out(reg) _,
cnt_b = out(reg) _,
out("p0") _, out("p1") _, out("p2") _,
out("v0") _, out("v1") _, out("v2") _, out("v3") _,
out("v4") _, out("v5") _, out("v6") _,
options(nostack),
);
}
}
// Handle an odd trailing vector.
if num_words % 2 == 1 {
// ids_a for the odd word starts at offset + num_pairs * 2 * vl.
// input_ptr was advanced by the main loop and now points at the odd word.
let odd_offset =
offset.wrapping_add((num_pairs as u32).wrapping_mul(2).wrapping_mul(vl as u32));
unsafe {
core::arch::asm!(
"ptrue p0.s",
"index z0.s, {odd_offset:w}, #1",
"mov z1.s, {range_width:w}",
"mov z2.s, {range_start:w}",
"ld1w {{z3.s}}, p0/z, [{input}]",
"sub z3.s, z3.s, z2.s",
"cmphs p1.s, p0/z, z1.s, z3.s",
"cntp {cnt}, p0, p1.s",
"compact z0.s, p1, z0.s",
"str z0, [{out}]",
"add {out}, {out}, {cnt}, lsl #2",
odd_offset = in(reg) odd_offset,
range_width = in(reg) range_width,
range_start = in(reg) range_start,
input = in(reg) input_ptr,
out = inout(reg) output_tail,
cnt = out(reg) _,
out("p0") _, out("p1") _,
out("v0") _, out("v1") _, out("v2") _, out("v3") _,
options(nostack),
);
}
}
unsafe { output_tail.offset_from(output) as usize }
}
// SVE implements with intrinsics.
//
// #[target_feature(enable = "sve")]
// unsafe fn filter_vec_sve_aux(
// input: *const u32,
// range_start: u32,
// range_width: u32,
// output: *mut u32,
// offset: u32,
// num_words: usize,
// vl: usize,
// ) -> usize {
// unsafe {
// let all_true = svptrue_b32();
// let range_start_simd = svdup_n_u32(range_start);
// let range_width_simd = svdup_n_u32(range_width);
// // ids_a covers [offset .. offset+vl), ids_b covers the next vl ids.
// // Keeping them separate breaks the loop-carried dependency through ids so
// // both compact/cntp chains are fully independent within each unrolled body.
// let mut ids_a = svindex_u32(offset, 1);
// let step = svdup_n_u32(vl as u32);
// let step2 = svdup_n_u32(2 * vl as u32);
// let mut ids_b = svadd_u32_x(all_true, ids_a, step);
// let mut input = input;
// let mut output_tail = output;
// // Unrolled ×2: both cntp calls have independent inputs and execute in parallel.
// // The two output_tail updates are sequential but together cost 4+1+1=6 cy per
// // pair vs 5+5=10 cy for two scalar iterations, breaking the cntp latency chain.
// let num_pairs = num_words / 2;
// for _ in 0..num_pairs {
// let word_a = svld1_u32(all_true, input);
// let word_b = svld1_u32(all_true, input.add(vl));
// let shifted_a = svsub_u32_x(all_true, word_a, range_start_simd);
// let shifted_b = svsub_u32_x(all_true, word_b, range_start_simd);
// let in_range_a = svcmple_u32(all_true, shifted_a, range_width_simd);
// let in_range_b = svcmple_u32(all_true, shifted_b, range_width_simd);
// let compacted_a = svcompact_u32(in_range_a, ids_a);
// let compacted_b = svcompact_u32(in_range_b, ids_b);
// // cntp_a and cntp_b have independent inputs: OOO engine issues them in parallel.
// let added_len_a = svcntp_b32(all_true, in_range_a) as usize;
// let added_len_b = svcntp_b32(all_true, in_range_b) as usize;
// // Write the full vector — only the first added_len slots are valid.
// // Subsequent iterations overwrite the trailing zeros before truncate.
// svst1_u32(all_true, output_tail, compacted_a);
// output_tail = output_tail.add(added_len_a);
// svst1_u32(all_true, output_tail, compacted_b);
// output_tail = output_tail.add(added_len_b);
// ids_a = svadd_u32_x(all_true, ids_a, step2);
// ids_b = svadd_u32_x(all_true, ids_b, step2);
// input = input.add(2 * vl);
// }
// // Handle an odd trailing word.
// if num_words % 2 == 1 {
// let word = svld1_u32(all_true, input);
// let shifted = svsub_u32_x(all_true, word, range_start_simd);
// let in_range = svcmple_u32(all_true, shifted, range_width_simd);
// let added_len = svcntp_b32(all_true, in_range) as usize;
// let compacted_ids = svcompact_u32(in_range, ids_a);
// svst1_u32(all_true, output_tail, compacted_ids);
// output_tail = output_tail.add(added_len);
// }
// output_tail.offset_from(output) as usize
// }
// }

View File

@@ -15,37 +15,9 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
{
#[inline]
pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<T>) {
self.fetch_block_with_is_full(docs, accessor, accessor.index.get_cardinality().is_full());
}
/// Like [`Self::fetch_block`] but takes the column's fullness instead of querying
/// `accessor.index.get_cardinality()` each call — for callers that know it up front (e.g.
/// checked once at construction). `is_full` must equal
/// `accessor.index.get_cardinality().is_full()`.
#[inline]
pub fn fetch_block_with_is_full<'a>(
&'a mut self,
docs: &'a [u32],
accessor: &Column<T>,
is_full: bool,
) {
if is_full {
// Skip the resize when already the right length (common case: fixed-size blocks).
if self.val_cache.len() != docs.len() {
self.val_cache.resize(docs.len(), T::default());
}
// When the docs form a contiguous ascending run we can fetch the values
// as a single range. This lets codecs (e.g. bitpacked) bulk-decode the
// slice instead of gathering value-by-value, and avoids per-value dynamic
// dispatch. `docs` is always sorted ascending and free of duplicates here,
// so comparing the endpoints is enough to detect contiguity.
if is_contiguous(docs) {
accessor
.values
.get_range(docs[0] as u64, &mut self.val_cache);
} else {
accessor.values.get_vals(docs, &mut self.val_cache);
}
if accessor.index.get_cardinality().is_full() {
self.val_cache.resize(docs.len(), T::default());
accessor.values.get_vals(docs, &mut self.val_cache);
} else {
self.docid_cache.clear();
self.row_id_cache.clear();
@@ -186,22 +158,6 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
}
}
/// Returns true if `docs` is a contiguous ascending run `[d, d + 1, ..., d + n - 1]`.
///
/// Assumes `docs` is sorted ascending and free of duplicates (the invariant for the
/// doc blocks passed to `fetch_block`), so comparing the endpoints is sufficient.
#[inline]
fn is_contiguous(docs: &[u32]) -> bool {
let (Some(&first), Some(&last)) = (docs.first(), docs.last()) else {
return false;
};
debug_assert!(
docs.windows(2).all(|w| w[0] < w[1]),
"fetch_block requires docs sorted ascending without duplicates"
);
(last - first) as usize + 1 == docs.len()
}
/// Given two sorted lists of docids `docs` and `hits`, hits is a subset of `docs`.
/// Return all docs that are not in `hits`.
fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
@@ -332,46 +288,4 @@ mod tests {
assert_eq!(accessor.docid_cache, vec![0]);
assert_eq!(accessor.val_cache, vec![1]);
}
#[test]
fn test_is_contiguous() {
assert!(!is_contiguous(&[]));
assert!(is_contiguous(&[5]));
assert!(is_contiguous(&[5, 6, 7, 8]));
assert!(is_contiguous(&[0, 1, 2]));
assert!(!is_contiguous(&[5, 7, 8]));
assert!(!is_contiguous(&[0, 1, 3]));
}
#[test]
fn test_fetch_block_contiguous_and_gather_match() {
use crate::column_index::ColumnIndex;
use crate::column_values::{
ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
};
let vals: Vec<u64> = (0..200u64).map(|i| i * 7 + 3).collect();
let values =
serialize_and_load_u64_based_column_values::<u64>(&&vals[..], &ALL_U64_CODEC_TYPES);
let column = Column {
index: ColumnIndex::Full,
values,
};
let check = |accessor: &mut ColumnBlockAccessor<u64>, docs: &[u32]| {
accessor.fetch_block(docs, &column);
let got: Vec<(u32, u64)> = accessor.iter_docid_vals(docs, &column).collect();
let expected: Vec<(u32, u64)> = docs.iter().map(|&d| (d, vals[d as usize])).collect();
assert_eq!(got, expected);
};
let mut accessor = ColumnBlockAccessor::<u64>::default();
// Contiguous block -> get_range fast path.
check(&mut accessor, &(10..74).collect::<Vec<u32>>());
// Non-contiguous block -> get_vals gather path.
check(&mut accessor, &[0, 5, 9, 100, 199]);
// Single doc and full span.
check(&mut accessor, &[42]);
check(&mut accessor, &(0..200).collect::<Vec<u32>>());
}
}

View File

@@ -119,18 +119,8 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
/// the segment's `maxdoc`.
#[inline(always)]
fn get_range(&self, start: u64, output: &mut [T]) {
let mut out_chunks = output.chunks_exact_mut(4);
let mut idx = start;
for out_x4 in out_chunks.by_ref() {
out_x4[0] = self.get_val(idx as u32);
out_x4[1] = self.get_val((idx + 1) as u32);
out_x4[2] = self.get_val((idx + 2) as u32);
out_x4[3] = self.get_val((idx + 3) as u32);
idx += 4;
}
for out in out_chunks.into_remainder() {
for (out, idx) in output.iter_mut().zip(start..) {
*out = self.get_val(idx as u32);
idx += 1;
}
}

View File

@@ -121,22 +121,6 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
reader.get_vals(&all_docs, &mut buffer);
assert_eq!(vals, buffer);
// Validate `get_range` over the full column and a sub-range. The sub-range starts
// at a non-zero offset to exercise the entrance-ramp alignment of the batch decode.
buffer.resize(all_docs.len(), 0);
reader.get_range(0, &mut buffer);
assert_eq!(vals, buffer, "get_range (full) mismatch in data set {name}");
if vals.len() >= 2 {
let start = 1usize;
buffer.resize(vals.len() - start, 0);
reader.get_range(start as u64, &mut buffer);
assert_eq!(
&vals[start..],
&buffer[..],
"get_range (sub-range) mismatch in data set {name}"
);
}
if !vals.is_empty() {
let test_rand_idx = rand::rng().random_range(0..=vals.len() - 1);
let expected_positions: Vec<u32> = vals

View File

@@ -327,9 +327,7 @@ fn exists(inp: &str) -> IResult<&str, UserInputLeaf> {
peek(alt((
value(
"",
satisfy(|c: char| {
c.is_whitespace() || (ESCAPE_IN_WORD.contains(&c) && c != '\\')
}),
satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
),
eof,
))),
@@ -347,9 +345,7 @@ fn exists_precond(inp: &str) -> IResult<&str, (), ()> {
peek(alt((
value(
"",
satisfy(|c: char| {
c.is_whitespace() || (ESCAPE_IN_WORD.contains(&c) && c != '\\')
}),
satisfy(|c: char| c.is_whitespace() || ESCAPE_IN_WORD.contains(&c)),
),
eof,
))), // we need to check this isn't a wildcard query
@@ -711,7 +707,6 @@ fn regex(inp: &str) -> IResult<&str, UserInputLeaf> {
peek(alt((
value((), multispace1),
value((), char(')')),
value((), char('^')),
value((), eof),
))),
),
@@ -733,10 +728,9 @@ fn regex_infallible(inp: &str) -> JResult<&str, UserInputLeaf> {
peek(alt((
value((), multispace1),
value((), char(')')),
value((), char('^')),
value((), eof),
))),
"expected whitespace, closing parenthesis, boost, or end of input",
"expected whitespace, closing parenthesis, or end of input",
),
)(inp)
{
@@ -779,10 +773,6 @@ fn leaf(inp: &str) -> IResult<&str, UserInputAst> {
value((), multispace1),
value((), char(')')),
value((), eof),
value(
(),
satisfy(|c: char| ESCAPE_IN_WORD.contains(&c) && c != '\\'),
),
))),
),
|_| UserInputAst::from(UserInputLeaf::All),
@@ -815,10 +805,6 @@ fn leaf_infallible(inp: &str) -> JResult<&str, Option<UserInputAst>> {
value((), multispace1),
value((), char(')')),
value((), eof),
value(
(),
satisfy(|c: char| ESCAPE_IN_WORD.contains(&c) && c != '\\'),
),
))),
),
),
@@ -1765,8 +1751,6 @@ mod test {
test_parse_query_to_ast_helper("*", "*");
test_parse_query_to_ast_helper("(*)", "*");
test_parse_query_to_ast_helper("(* )", "*");
// All query with boost
test_parse_query_to_ast_helper("*^2", "(*)^2");
}
#[test]
@@ -1829,7 +1813,6 @@ mod test {
test_parse_query_to_ast_helper("a:b*", "\"a\":b*");
test_parse_query_to_ast_helper("a:*b", "\"a\":*b");
test_parse_query_to_ast_helper(r#"a:*def*"#, "\"a\":*def*");
test_parse_query_to_ast_helper("a:*\\:foo", "\"a\":*:foo");
}
#[test]
@@ -1894,8 +1877,6 @@ mod test {
},
_ => panic!("Expected a leaf"),
}
// Regex followed by `^boost`
test_parse_query_to_ast_helper(r#"foo:/bar/^2"#, r#"("foo":/bar/)^2"#);
}
#[test]

View File

@@ -10,11 +10,11 @@ use crate::aggregation::accessor_helpers::{
};
use crate::aggregation::agg_req::{Aggregation, AggregationVariants, Aggregations};
use crate::aggregation::bucket::{
build_segment_filter_collector, build_segment_histogram_collector,
build_segment_range_collector, CompositeAggReqData, CompositeAggregation,
CompositeSourceAccessors, FilterAggReqData, HistogramAggReqData, HistogramBounds,
IncludeExcludeParam, MissingTermAggReqData, RangeAggReqData, TermMissingAgg, TermsAggReqData,
TermsAggregation, TermsAggregationInternal,
build_segment_filter_collector, build_segment_range_collector, CompositeAggReqData,
CompositeAggregation, CompositeSourceAccessors, FilterAggReqData, HistogramAggReqData,
HistogramBounds, IncludeExcludeParam, MissingTermAggReqData, RangeAggReqData,
SegmentHistogramCollector, TermMissingAgg, TermsAggReqData, TermsAggregation,
TermsAggregationInternal,
};
use crate::aggregation::metric::{
build_segment_stats_collector, AverageAggregation, CardinalityAggReqData,
@@ -41,7 +41,7 @@ pub struct AggregationsSegmentCtx {
impl AggregationsSegmentCtx {
pub(crate) fn push_term_req_data(&mut self, data: TermsAggReqData) -> usize {
self.per_request.term_req_data.push(data);
self.per_request.term_req_data.push(Some(Box::new(data)));
self.per_request.term_req_data.len() - 1
}
pub(crate) fn push_cardinality_req_data(&mut self, data: CardinalityAggReqData) -> usize {
@@ -61,25 +61,31 @@ impl AggregationsSegmentCtx {
self.per_request.missing_term_req_data.len() - 1
}
pub(crate) fn push_histogram_req_data(&mut self, data: HistogramAggReqData) -> usize {
self.per_request.histogram_req_data.push(data);
self.per_request
.histogram_req_data
.push(Some(Box::new(data)));
self.per_request.histogram_req_data.len() - 1
}
pub(crate) fn push_range_req_data(&mut self, data: RangeAggReqData) -> usize {
self.per_request.range_req_data.push(data);
self.per_request.range_req_data.push(Some(Box::new(data)));
self.per_request.range_req_data.len() - 1
}
pub(crate) fn push_filter_req_data(&mut self, data: FilterAggReqData) -> usize {
self.per_request.filter_req_data.push(data);
self.per_request.filter_req_data.push(Some(Box::new(data)));
self.per_request.filter_req_data.len() - 1
}
pub(crate) fn push_composite_req_data(&mut self, data: CompositeAggReqData) -> usize {
self.per_request.composite_req_data.push(data);
self.per_request
.composite_req_data
.push(Some(Box::new(data)));
self.per_request.composite_req_data.len() - 1
}
#[inline]
pub(crate) fn get_term_req_data(&self, idx: usize) -> &TermsAggReqData {
&self.per_request.term_req_data[idx]
self.per_request.term_req_data[idx]
.as_deref()
.expect("term_req_data slot is empty (taken)")
}
#[inline]
pub(crate) fn get_cardinality_req_data(&self, idx: usize) -> &CardinalityAggReqData {
@@ -97,6 +103,116 @@ impl AggregationsSegmentCtx {
pub(crate) fn get_missing_term_req_data(&self, idx: usize) -> &MissingTermAggReqData {
&self.per_request.missing_term_req_data[idx]
}
#[inline]
pub(crate) fn get_histogram_req_data(&self, idx: usize) -> &HistogramAggReqData {
self.per_request.histogram_req_data[idx]
.as_deref()
.expect("histogram_req_data slot is empty (taken)")
}
#[inline]
pub(crate) fn get_range_req_data(&self, idx: usize) -> &RangeAggReqData {
self.per_request.range_req_data[idx]
.as_deref()
.expect("range_req_data slot is empty (taken)")
}
#[inline]
pub(crate) fn get_composite_req_data(&self, idx: usize) -> &CompositeAggReqData {
self.per_request.composite_req_data[idx]
.as_deref()
.expect("composite_req_data slot is empty (taken)")
}
// ---------- mutable getters ----------
#[inline]
pub(crate) fn get_metric_req_data_mut(&mut self, idx: usize) -> &mut MetricAggReqData {
&mut self.per_request.stats_metric_req_data[idx]
}
#[inline]
pub(crate) fn get_cardinality_req_data_mut(
&mut self,
idx: usize,
) -> &mut CardinalityAggReqData {
&mut self.per_request.cardinality_req_data[idx]
}
#[inline]
pub(crate) fn get_histogram_req_data_mut(&mut self, idx: usize) -> &mut HistogramAggReqData {
self.per_request.histogram_req_data[idx]
.as_deref_mut()
.expect("histogram_req_data slot is empty (taken)")
}
// ---------- take / put (terms, histogram, range) ----------
/// Move out the boxed Histogram request at `idx`, leaving `None`.
#[inline]
pub(crate) fn take_histogram_req_data(&mut self, idx: usize) -> Box<HistogramAggReqData> {
self.per_request.histogram_req_data[idx]
.take()
.expect("histogram_req_data slot is empty (taken)")
}
/// Put back a Histogram request into an empty slot at `idx`.
#[inline]
pub(crate) fn put_back_histogram_req_data(
&mut self,
idx: usize,
value: Box<HistogramAggReqData>,
) {
debug_assert!(self.per_request.histogram_req_data[idx].is_none());
self.per_request.histogram_req_data[idx] = Some(value);
}
/// Move out the boxed Range request at `idx`, leaving `None`.
#[inline]
pub(crate) fn take_range_req_data(&mut self, idx: usize) -> Box<RangeAggReqData> {
self.per_request.range_req_data[idx]
.take()
.expect("range_req_data slot is empty (taken)")
}
/// Put back a Range request into an empty slot at `idx`.
#[inline]
pub(crate) fn put_back_range_req_data(&mut self, idx: usize, value: Box<RangeAggReqData>) {
debug_assert!(self.per_request.range_req_data[idx].is_none());
self.per_request.range_req_data[idx] = Some(value);
}
/// Move out the boxed Filter request at `idx`, leaving `None`.
#[inline]
pub(crate) fn take_filter_req_data(&mut self, idx: usize) -> Box<FilterAggReqData> {
self.per_request.filter_req_data[idx]
.take()
.expect("filter_req_data slot is empty (taken)")
}
/// Put back a Filter request into an empty slot at `idx`.
#[inline]
pub(crate) fn put_back_filter_req_data(&mut self, idx: usize, value: Box<FilterAggReqData>) {
debug_assert!(self.per_request.filter_req_data[idx].is_none());
self.per_request.filter_req_data[idx] = Some(value);
}
/// Move out the Composite request at `idx`.
#[inline]
pub(crate) fn take_composite_req_data(&mut self, idx: usize) -> Box<CompositeAggReqData> {
self.per_request.composite_req_data[idx]
.take()
.expect("composite_req_data slot is empty (taken)")
}
/// Put back a Composite request into an empty slot at `idx`.
#[inline]
pub(crate) fn put_back_composite_req_data(
&mut self,
idx: usize,
value: Box<CompositeAggReqData>,
) {
debug_assert!(self.per_request.composite_req_data[idx].is_none());
self.per_request.composite_req_data[idx] = Some(value);
}
}
/// Each type of aggregation has its own request data struct. This struct holds
@@ -107,14 +223,15 @@ impl AggregationsSegmentCtx {
/// for a node with [AggKind::Terms]).
#[derive(Default)]
pub struct PerRequestAggSegCtx {
// Box for cheap take/put - Only necessary for bucket aggs that have sub-aggregations
/// TermsAggReqData contains the request data for a terms aggregation.
pub term_req_data: Vec<TermsAggReqData>,
pub term_req_data: Vec<Option<Box<TermsAggReqData>>>,
/// HistogramAggReqData contains the request data for a histogram aggregation.
pub histogram_req_data: Vec<HistogramAggReqData>,
pub histogram_req_data: Vec<Option<Box<HistogramAggReqData>>>,
/// RangeAggReqData contains the request data for a range aggregation.
pub range_req_data: Vec<RangeAggReqData>,
pub range_req_data: Vec<Option<Box<RangeAggReqData>>>,
/// FilterAggReqData contains the request data for a filter aggregation.
pub filter_req_data: Vec<FilterAggReqData>,
pub filter_req_data: Vec<Option<Box<FilterAggReqData>>>,
/// Shared by avg, min, max, sum, stats, extended_stats, count
pub stats_metric_req_data: Vec<MetricAggReqData>,
/// CardinalityAggReqData contains the request data for a cardinality aggregation.
@@ -124,7 +241,7 @@ pub struct PerRequestAggSegCtx {
/// MissingTermAggReqData contains the request data for a missing term aggregation.
pub missing_term_req_data: Vec<MissingTermAggReqData>,
/// CompositeAggReqData contains the request data for a composite aggregation.
pub composite_req_data: Vec<CompositeAggReqData>,
pub composite_req_data: Vec<Option<Box<CompositeAggReqData>>>,
/// Request tree used to build collectors.
pub agg_tree: Vec<AggRefNode>,
@@ -135,22 +252,22 @@ impl PerRequestAggSegCtx {
fn get_memory_consumption(&self) -> usize {
self.term_req_data
.iter()
.map(|t| t.get_memory_consumption())
.map(|b| b.as_ref().unwrap().get_memory_consumption())
.sum::<usize>()
+ self
.histogram_req_data
.iter()
.map(|t| t.get_memory_consumption())
.map(|b| b.as_ref().unwrap().get_memory_consumption())
.sum::<usize>()
+ self
.range_req_data
.iter()
.map(|t| t.get_memory_consumption())
.map(|b| b.as_ref().unwrap().get_memory_consumption())
.sum::<usize>()
+ self
.filter_req_data
.iter()
.map(|t| t.get_memory_consumption())
.map(|b| b.as_ref().unwrap().get_memory_consumption())
.sum::<usize>()
+ self
.stats_metric_req_data
@@ -175,7 +292,7 @@ impl PerRequestAggSegCtx {
+ self
.composite_req_data
.iter()
.map(|t| t.get_memory_consumption())
.map(|b| b.as_ref().map(|d| d.get_memory_consumption()).unwrap_or(0))
.sum::<usize>()
+ self.agg_tree.len() * std::mem::size_of::<AggRefNode>()
}
@@ -184,16 +301,40 @@ impl PerRequestAggSegCtx {
let idx = node.idx_in_req_data;
let kind = node.kind;
match kind {
AggKind::Terms => self.term_req_data[idx].name.as_str(),
AggKind::Terms => self.term_req_data[idx]
.as_deref()
.expect("term_req_data slot is empty (taken)")
.name
.as_str(),
AggKind::Cardinality => &self.cardinality_req_data[idx].name,
AggKind::StatsKind(_) => &self.stats_metric_req_data[idx].name,
AggKind::TopHits => &self.top_hits_req_data[idx].name,
AggKind::MissingTerm => &self.missing_term_req_data[idx].name,
AggKind::Histogram => self.histogram_req_data[idx].name.as_str(),
AggKind::DateHistogram => self.histogram_req_data[idx].name.as_str(),
AggKind::Range => self.range_req_data[idx].name.as_str(),
AggKind::Filter => self.filter_req_data[idx].name.as_str(),
AggKind::Composite => self.composite_req_data[idx].name.as_str(),
AggKind::Histogram => self.histogram_req_data[idx]
.as_deref()
.expect("histogram_req_data slot is empty (taken)")
.name
.as_str(),
AggKind::DateHistogram => self.histogram_req_data[idx]
.as_deref()
.expect("histogram_req_data slot is empty (taken)")
.name
.as_str(),
AggKind::Range => self.range_req_data[idx]
.as_deref()
.expect("range_req_data slot is empty (taken)")
.name
.as_str(),
AggKind::Filter => self.filter_req_data[idx]
.as_deref()
.expect("filter_req_data slot is empty (taken)")
.name
.as_str(),
AggKind::Composite => self.composite_req_data[idx]
.as_deref()
.expect("composite_req_data slot is empty (taken)")
.name
.as_str(),
}
}
@@ -271,7 +412,7 @@ pub(crate) fn build_segment_agg_collector(
Ok(Box::new(TermMissingAgg::new(req, node)?))
}
AggKind::Cardinality => {
let req_data = req.get_cardinality_req_data(node.idx_in_req_data);
let req_data = &mut req.get_cardinality_req_data_mut(node.idx_in_req_data);
// For str columns, choose the per-bucket entries representation
// based on the segment's column.max_value():
// * small (< BITSET_MAX_TERM_ORD): `BitSet`, pre-allocated, no promotion machinery.
@@ -318,7 +459,7 @@ pub(crate) fn build_segment_agg_collector(
SegmentExtendedStatsCollector::from_req(req_data, sigma),
)),
StatsType::Percentiles => {
let req_data = req.get_metric_req_data(node.idx_in_req_data);
let req_data = req.get_metric_req_data_mut(node.idx_in_req_data);
Ok(Box::new(
SegmentPercentilesCollector::from_req_and_validate(
req_data.field_type,
@@ -338,8 +479,12 @@ pub(crate) fn build_segment_agg_collector(
req_data.segment_ordinal,
)))
}
AggKind::Histogram => build_segment_histogram_collector(req, node),
AggKind::DateHistogram => build_segment_histogram_collector(req, node),
AggKind::Histogram => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
req, node,
)?)),
AggKind::DateHistogram => Ok(Box::new(SegmentHistogramCollector::from_req_and_validate(
req, node,
)?)),
AggKind::Range => Ok(build_segment_range_collector(req, node)?),
AggKind::Filter => build_segment_filter_collector(req, node),
AggKind::Composite => Ok(Box::new(
@@ -654,18 +799,23 @@ fn build_nodes(
let schema = reader.schema();
let tokenizers = &data.context.tokenizers;
let query = filter_req.parse_query(schema, tokenizers)?;
let evaluator =
std::rc::Rc::new(crate::aggregation::bucket::DocumentQueryEvaluator::new(
query,
schema.clone(),
reader,
)?);
let evaluator = crate::aggregation::bucket::DocumentQueryEvaluator::new(
query,
schema.clone(),
reader,
)?;
// Pre-allocate buffer for batch filtering
let max_doc = reader.max_doc();
let buffer_capacity = crate::docset::COLLECT_BLOCK_BUFFER_LEN.min(max_doc as usize);
let matching_docs_buffer = Vec::with_capacity(buffer_capacity);
let idx_in_req_data = data.push_filter_req_data(FilterAggReqData {
name: agg_name.to_string(),
req: filter_req.clone(),
segment_reader: reader.clone(),
evaluator,
matching_docs_buffer,
is_top_level,
});
let children = build_children(&req.sub_aggregation, reader, segment_ordinal, data)?;

View File

@@ -299,12 +299,6 @@ impl AggregationVariants {
_ => None,
}
}
pub(crate) fn as_sum(&self) -> Option<&SumAggregation> {
match &self {
AggregationVariants::Sum(sum) => Some(sum),
_ => None,
}
}
}
#[cfg(test)]

View File

@@ -16,7 +16,6 @@ use crate::{SegmentReader, TantivyError};
/// Contains all information required by the SegmentCompositeCollector to perform the
/// composite aggregation on a segment.
#[derive(Debug, Clone)]
pub struct CompositeAggReqData {
/// The name of the aggregation.
pub name: String,
@@ -35,7 +34,6 @@ impl CompositeAggReqData {
}
/// Accessors for a single column in a composite source.
#[derive(Debug, Clone)]
pub struct CompositeAccessor {
/// The fast field column
pub column: Column<u64>,
@@ -50,7 +48,6 @@ pub struct CompositeAccessor {
}
/// Accessors to all the columns that belong to the field of a composite source.
#[derive(Debug, Clone)]
pub struct CompositeSourceAccessors {
/// The accessors for this source
pub accessors: Vec<CompositeAccessor>,
@@ -361,7 +358,7 @@ impl PrecomputedDateInterval {
///
/// Some column types (term, IP) might not have an exact representation of the
/// specified after key
#[derive(Debug, Clone)]
#[derive(Debug)]
pub enum PrecomputedAfterKey {
/// The after key could be exactly represented in the column space.
Exact(u64),

View File

@@ -118,7 +118,7 @@ impl InternalValueRepr {
pub struct SegmentCompositeCollector {
/// One DynArrayHeapMap per parent bucket.
parent_buckets: Vec<DynArrayHeapMap<InternalValueRepr, CompositeBucketCollector>>,
req_data: CompositeAggReqData,
accessor_idx: usize,
sub_agg: Option<BufferedSubAggs<HighCardSubAggBuffer>>,
bucket_id_provider: BucketIdProvider,
/// Number of sources, needed when creating new DynArrayHeapMaps.
@@ -132,7 +132,10 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
results: &mut IntermediateAggregationResults,
parent_bucket_id: BucketId,
) -> crate::Result<()> {
let name = self.req_data.name.clone();
let name = agg_data
.get_composite_req_data(self.accessor_idx)
.name
.clone();
let buckets = self.add_intermediate_bucket_result(agg_data, parent_bucket_id)?;
results.push(
@@ -150,11 +153,12 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
agg_data: &mut AggregationsSegmentCtx,
) -> crate::Result<()> {
let mem_pre = self.get_memory_consumption(parent_bucket_id);
let composite_agg_data = agg_data.take_composite_req_data(self.accessor_idx);
for doc in docs {
let mut visitor = CompositeKeyVisitor {
doc_id: *doc,
composite_agg_data: &self.req_data,
composite_agg_data: &composite_agg_data,
buckets: &mut self.parent_buckets[parent_bucket_id as usize],
sub_agg: &mut self.sub_agg,
bucket_id_provider: &mut self.bucket_id_provider,
@@ -162,6 +166,7 @@ impl SegmentAggregationCollector for SegmentCompositeCollector {
};
visitor.visit(0, true)?;
}
agg_data.put_back_composite_req_data(self.accessor_idx, composite_agg_data);
if let Some(sub_agg) = &mut self.sub_agg {
sub_agg.check_flush_local(agg_data)?;
@@ -216,13 +221,7 @@ impl SegmentCompositeCollector {
req_data: &mut AggregationsSegmentCtx,
node: &AggRefNode,
) -> crate::Result<Self> {
let composite_req_data =
req_data.per_request.composite_req_data[node.idx_in_req_data].clone();
validate_req(&composite_req_data)?;
req_data
.context
.limits
.add_memory_consumed(composite_req_data.get_memory_consumption() as u64)?;
validate_req(req_data, node.idx_in_req_data)?;
let has_sub_aggregations = !node.children.is_empty();
let sub_agg = if has_sub_aggregations {
@@ -232,11 +231,12 @@ impl SegmentCompositeCollector {
None
};
let composite_req_data = req_data.get_composite_req_data(node.idx_in_req_data);
let num_sources = composite_req_data.req.sources.len();
Ok(SegmentCompositeCollector {
parent_buckets: vec![DynArrayHeapMap::try_new(num_sources)?],
req_data: composite_req_data,
accessor_idx: node.idx_in_req_data,
sub_agg,
bucket_id_provider: BucketIdProvider::default(),
num_sources,
@@ -258,7 +258,7 @@ impl SegmentCompositeCollector {
let mut dict: FxHashMap<Vec<CompositeIntermediateKey>, IntermediateCompositeBucketEntry> =
Default::default();
dict.reserve(heap_map.size());
let composite_data = &self.req_data;
let composite_data = agg_data.get_composite_req_data(self.accessor_idx);
for (key_internal_repr, agg) in heap_map.into_iter() {
let key = resolve_key(&key_internal_repr, composite_data)?;
let mut sub_aggregation_res = IntermediateAggregationResults::default();
@@ -298,7 +298,8 @@ impl SegmentCompositeCollector {
}
}
fn validate_req(composite_data: &CompositeAggReqData) -> crate::Result<()> {
fn validate_req(req_data: &mut AggregationsSegmentCtx, accessor_idx: usize) -> crate::Result<()> {
let composite_data = req_data.get_composite_req_data(accessor_idx);
let req = &composite_data.req;
if req.sources.is_empty() {
return Err(TantivyError::InvalidArgument(

View File

@@ -1,5 +1,4 @@
use std::fmt::Debug;
use std::rc::Rc;
use common::BitSet;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -397,7 +396,6 @@ impl PartialEq for FilterAggregation {
/// Request data for filter aggregation
/// This struct holds the per-segment data needed to execute a filter aggregation
#[derive(Clone)]
pub struct FilterAggReqData {
/// The name of the filter aggregation
pub name: String,
@@ -405,20 +403,22 @@ pub struct FilterAggReqData {
pub req: FilterAggregation,
/// The segment reader
pub segment_reader: SegmentReader,
/// Document evaluator for the filter query (precomputed BitSet).
/// Wrapped in `Rc` so cloning the request data does not duplicate the (potentially large)
/// underlying BitSet.
pub evaluator: Rc<DocumentQueryEvaluator>,
/// Document evaluator for the filter query (precomputed BitSet)
/// This is built once when the request data is created
pub evaluator: DocumentQueryEvaluator,
/// Reusable buffer for matching documents to minimize allocations during collection
pub matching_docs_buffer: Vec<DocId>,
/// True if this filter aggregation is at the top level of the aggregation tree (not nested).
pub is_top_level: bool,
}
impl FilterAggReqData {
pub(crate) fn get_memory_consumption(&self) -> usize {
// Estimate: name + segment reader reference + bitset
// Estimate: name + segment reader reference + bitset + buffer capacity
self.name.len()
+ std::mem::size_of::<SegmentReader>()
+ self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes)
+ self.matching_docs_buffer.capacity() * std::mem::size_of::<DocId>()
+ std::mem::size_of::<bool>()
}
}
@@ -509,10 +509,8 @@ pub struct SegmentFilterCollector<B: SubAggBuffer> {
/// Sub-aggregation collectors
sub_aggregations: Option<BufferedSubAggs<B>>,
bucket_id_provider: BucketIdProvider,
/// Per-segment filter request data, owned by this collector.
req_data: FilterAggReqData,
/// Reusable buffer for matching documents to minimize allocations during collection.
matching_docs_buffer: Vec<DocId>,
/// Accessor index for this filter aggregation (to access FilterAggReqData)
accessor_idx: usize,
}
impl<B: SubAggBuffer> SegmentFilterCollector<B> {
@@ -520,7 +518,6 @@ impl<B: SubAggBuffer> SegmentFilterCollector<B> {
pub(crate) fn from_req_and_validate(
req: &mut AggregationsSegmentCtx,
node: &AggRefNode,
req_data: FilterAggReqData,
) -> crate::Result<Self> {
// Build sub-aggregation collectors if any
let sub_agg_collector = if !node.children.is_empty() {
@@ -530,15 +527,11 @@ impl<B: SubAggBuffer> SegmentFilterCollector<B> {
};
let sub_agg_collector = sub_agg_collector.map(BufferedSubAggs::new);
let max_doc = req_data.segment_reader.max_doc();
let buffer_capacity = crate::docset::COLLECT_BLOCK_BUFFER_LEN.min(max_doc as usize);
Ok(SegmentFilterCollector {
parent_buckets: Vec::new(),
sub_aggregations: sub_agg_collector,
req_data,
accessor_idx: node.idx_in_req_data,
bucket_id_provider: BucketIdProvider::default(),
matching_docs_buffer: Vec::with_capacity(buffer_capacity),
})
}
}
@@ -547,23 +540,18 @@ pub(crate) fn build_segment_filter_collector(
req: &mut AggregationsSegmentCtx,
node: &AggRefNode,
) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
let req_data = req.per_request.filter_req_data[node.idx_in_req_data].clone();
req.context
.limits
.add_memory_consumed(req_data.get_memory_consumption() as u64)?;
let is_top_level = req_data.is_top_level;
let is_top_level = req.per_request.filter_req_data[node.idx_in_req_data]
.as_ref()
.expect("filter_req_data slot is empty")
.is_top_level;
if is_top_level {
Ok(Box::new(
SegmentFilterCollector::<LowCardSubAggBuffer>::from_req_and_validate(
req, node, req_data,
)?,
SegmentFilterCollector::<LowCardSubAggBuffer>::from_req_and_validate(req, node)?,
))
} else {
Ok(Box::new(
SegmentFilterCollector::<HighCardSubAggBuffer>::from_req_and_validate(
req, node, req_data,
)?,
SegmentFilterCollector::<HighCardSubAggBuffer>::from_req_and_validate(req, node)?,
))
}
}
@@ -573,7 +561,7 @@ impl<B: SubAggBuffer> Debug for SegmentFilterCollector<B> {
f.debug_struct("SegmentFilterCollector")
.field("buckets", &self.parent_buckets)
.field("has_sub_aggs", &self.sub_aggregations.is_some())
.field("name", &self.req_data.name)
.field("accessor_idx", &self.accessor_idx)
.finish()
}
}
@@ -610,7 +598,11 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
};
// Get the name of this filter aggregation
let name = self.req_data.name.clone();
let name = agg_data.per_request.filter_req_data[self.accessor_idx]
.as_ref()
.expect("filter_req_data slot is empty")
.name
.clone();
results.push(
name,
@@ -631,24 +623,27 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentFilterCollector<B>
}
let mut bucket = self.parent_buckets[parent_bucket_id as usize];
// Take the request data to avoid borrow checker issues with sub-aggregations
let mut req = agg_data.take_filter_req_data(self.accessor_idx);
// Use batch filtering with O(1) BitSet lookups
self.matching_docs_buffer.clear();
self.req_data
.evaluator
.filter_batch(docs, &mut self.matching_docs_buffer);
req.matching_docs_buffer.clear();
req.evaluator
.filter_batch(docs, &mut req.matching_docs_buffer);
bucket.doc_count += self.matching_docs_buffer.len() as u64;
bucket.doc_count += req.matching_docs_buffer.len() as u64;
// Batch process sub-aggregations if we have matches
if !self.matching_docs_buffer.is_empty() {
if !req.matching_docs_buffer.is_empty() {
if let Some(sub_aggs) = &mut self.sub_aggregations {
for &doc_id in &self.matching_docs_buffer {
for &doc_id in &req.matching_docs_buffer {
sub_aggs.push(bucket.bucket_id, doc_id);
}
}
}
// Put the request data back
agg_data.put_back_filter_req_data(self.accessor_idx, req);
if let Some(sub_aggs) = &mut self.sub_aggregations {
sub_aggs.check_flush_local(agg_data)?;
}

View File

@@ -21,7 +21,6 @@ use crate::TantivyError;
/// Contains all information required by the SegmentHistogramCollector to perform the
/// histogram or date_histogram aggregation on a segment.
#[derive(Debug, Clone)]
pub struct HistogramAggReqData {
/// The column accessor to access the fast field values.
pub accessor: Column<u64>,
@@ -244,52 +243,19 @@ impl Display for HistogramBounds {
}
impl HistogramBounds {
pub(crate) fn contains(&self, val: f64) -> bool {
fn contains(&self, val: f64) -> bool {
val >= self.min && val <= self.max
}
}
/// The per-bucket identifier stored in a [`SegmentHistogramBucketEntry`].
///
/// It is [`BucketId`] when the histogram has sub aggregations (which key their state by it), and
/// the zero-sized `()` when it does not. Without sub aggregations the id is never read, so storing
/// `()` drops 8 bytes per bucket (24 -> 16) and turns id assignment into a no-op.
pub trait BucketIdSlot: Copy + Default + std::fmt::Debug + PartialEq {
/// Assigns the next id from the provider, called once when a bucket is first filled.
fn assign(provider: &mut BucketIdProvider) -> Self;
/// Resolves to the `BucketId` for sub-aggregation bookkeeping.
///
/// Only ever called for the [`BucketId`] slot: the `()` slot is used exactly when there are no
/// sub aggregations, so every call site is guarded by `sub_agg.is_some()` and is dead for `()`.
fn to_bucket_id(self) -> BucketId;
}
impl BucketIdSlot for BucketId {
#[inline(always)]
fn assign(provider: &mut BucketIdProvider) -> Self {
provider.next_bucket_id()
}
#[inline(always)]
fn to_bucket_id(self) -> BucketId {
self
}
}
impl BucketIdSlot for () {
#[inline(always)]
fn assign(_provider: &mut BucketIdProvider) -> Self {}
#[inline(always)]
fn to_bucket_id(self) -> BucketId {
unreachable!("bucket ids are only resolved when sub aggregations are present")
}
}
#[derive(Default, Clone, Debug, PartialEq)]
pub(crate) struct SegmentHistogramBucketEntry<B> {
pub(crate) struct SegmentHistogramBucketEntry {
pub key: f64,
pub doc_count: u64,
pub bucket_id: B,
pub bucket_id: BucketId,
}
impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
impl SegmentHistogramBucketEntry {
pub(crate) fn into_intermediate_bucket_entry(
self,
sub_aggregation: &mut Option<HighCardBufferedSubAggs>,
@@ -302,7 +268,7 @@ impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
.add_intermediate_aggregation_result(
agg_data,
&mut sub_aggregation_res,
self.bucket_id.to_bucket_id(),
self.bucket_id,
)?;
}
Ok(IntermediateHistogramBucketEntry {
@@ -313,147 +279,39 @@ impl<B: BucketIdSlot> SegmentHistogramBucketEntry<B> {
}
}
/// The contiguous bucket range a histogram can span, derived from the column min/max (clamped to
/// the histogram bounds). Buckets in `[base_pos, base_pos + len)` can be stored in a flat `Vec`
/// indexed by `bucket_pos - base_pos`, avoiding the hash map on the hot path.
#[derive(Clone, Copy, Debug)]
pub(crate) struct DenseRange {
/// `bucket_pos` mapped to index 0 of the dense `Vec`.
pub(crate) base_pos: i64,
/// Number of bucket positions in the range.
pub(crate) len: usize,
#[derive(Clone, Debug, Default)]
struct HistogramBuckets {
pub buckets: FxHashMap<i64, SegmentHistogramBucketEntry>,
}
/// Storage for the histogram buckets of a single parent bucket.
///
/// Starts out sparse (a hash map keyed by `bucket_pos`). Once enough distinct buckets have been
/// filled that we are clearly going to cover most of the column's theoretical range, it switches
/// to a dense `Vec` indexed by `bucket_pos - base_pos`, which removes hashing from the hot loop.
#[derive(Clone, Debug)]
enum HistogramBuckets<B> {
Sparse(FxHashMap<i64, SegmentHistogramBucketEntry<B>>),
Dense {
base_pos: i64,
/// One slot per bucket position; a slot with `doc_count == 0` has not been hit yet.
buckets: Vec<SegmentHistogramBucketEntry<B>>,
},
}
impl<B> Default for HistogramBuckets<B> {
fn default() -> Self {
HistogramBuckets::Sparse(FxHashMap::default())
}
}
impl<B: BucketIdSlot> HistogramBuckets<B> {
impl HistogramBuckets {
fn memory_consumption(&self) -> u64 {
let num_slots = match self {
HistogramBuckets::Sparse(map) => map.capacity(),
HistogramBuckets::Dense { buckets, .. } => buckets.capacity(),
};
num_slots as u64 * std::mem::size_of::<SegmentHistogramBucketEntry<B>>() as u64
}
/// Switches from sparse to dense storage once the dense `Vec` would use no more memory than the
/// hash map does now, so the switch never increases memory. Called at block boundaries.
///
/// The `Vec` holds one `Entry` per bucket position in the range. The map additionally stores
/// the key and a control byte per slot, at a load factor of 7/16..7/8, so for a dense histogram
/// its footprint grows past the `Vec` well before full coverage. And since the `Vec` never
/// grows afterwards while the map would keep growing, dense only gets relatively cheaper — so
/// no upper bound on the range is needed: a large but sparse range simply never crosses over.
#[inline]
fn maybe_densify(&mut self, dense_range: Option<DenseRange>) {
let Some(range) = dense_range else { return };
let HistogramBuckets::Sparse(map) = self else {
return;
};
let dense_bytes = range
.len
.saturating_mul(std::mem::size_of::<SegmentHistogramBucketEntry<B>>());
let sparse_bytes = map
.capacity()
.saturating_mul(std::mem::size_of::<(i64, SegmentHistogramBucketEntry<B>)>() + 1);
if dense_bytes > sparse_bytes {
return;
}
let map = std::mem::take(map);
let mut buckets = vec![SegmentHistogramBucketEntry::<B>::default(); range.len];
for (bucket_pos, entry) in map {
buckets[(bucket_pos - range.base_pos) as usize] = entry;
}
*self = HistogramBuckets::Dense {
base_pos: range.base_pos,
buckets,
};
}
/// Returns the bucket entry for `bucket_pos`, setting its key (and `bucket_id`, when `B` is
/// [`BucketId`]) on first use.
///
/// For the dense variant `bucket_pos` is guaranteed to be inside the range, since it is
/// derived from the column min/max that bounds every value (see [`compute_dense_range`]).
#[inline]
fn get_or_create(
&mut self,
bucket_pos: i64,
bucket_id_provider: &mut BucketIdProvider,
key_from_pos: impl FnOnce(i64) -> f64,
) -> &mut SegmentHistogramBucketEntry<B> {
match self {
HistogramBuckets::Sparse(map) => {
map.entry(bucket_pos)
.or_insert_with(|| SegmentHistogramBucketEntry {
key: key_from_pos(bucket_pos),
doc_count: 0,
bucket_id: B::assign(bucket_id_provider),
})
}
HistogramBuckets::Dense { base_pos, buckets } => {
let idx = (bucket_pos - *base_pos) as usize;
debug_assert!(idx < buckets.len(), "bucket_pos outside the dense range");
let entry = &mut buckets[idx];
if entry.doc_count == 0 {
entry.key = key_from_pos(bucket_pos);
entry.bucket_id = B::assign(bucket_id_provider);
}
entry
}
}
}
/// Consumes the storage, yielding all non-empty bucket entries.
fn into_filled_entries(self) -> Vec<SegmentHistogramBucketEntry<B>> {
match self {
HistogramBuckets::Sparse(map) => map.into_values().collect(),
HistogramBuckets::Dense { buckets, .. } => {
buckets.into_iter().filter(|b| b.doc_count > 0).collect()
}
}
self.buckets.capacity() as u64 * std::mem::size_of::<SegmentHistogramBucketEntry>() as u64
}
}
/// The collector puts values from the fast field into the correct buckets and does a conversion to
/// the correct datatype.
#[derive(Debug)]
pub struct SegmentHistogramCollector<B> {
pub struct SegmentHistogramCollector {
/// The buckets containing the aggregation data.
/// One Histogram bucket per parent bucket id.
parent_buckets: Vec<HistogramBuckets<B>>,
parent_buckets: Vec<HistogramBuckets>,
sub_agg: Option<HighCardBufferedSubAggs>,
req_data: HistogramAggReqData,
accessor_idx: usize,
bucket_id_provider: BucketIdProvider,
/// Theoretical bucket range derived from the column min/max, if dense `Vec` storage is
/// viable. `None` keeps every parent bucket in the sparse hash map.
dense_range: Option<DenseRange>,
}
impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<B> {
impl SegmentAggregationCollector for SegmentHistogramCollector {
fn add_intermediate_aggregation_result(
&mut self,
agg_data: &AggregationsSegmentCtx,
results: &mut IntermediateAggregationResults,
parent_bucket_id: BucketId,
) -> crate::Result<()> {
let name = self.req_data.name.clone();
let name = agg_data
.get_histogram_req_data(self.accessor_idx)
.name
.clone();
// TODO: avoid prepare_max_bucket here and handle empty buckets.
self.prepare_max_bucket(parent_bucket_id, agg_data)?;
let histogram = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);
@@ -470,13 +328,10 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
docs: &[crate::DocId],
agg_data: &mut AggregationsSegmentCtx,
) -> crate::Result<()> {
let req = agg_data.take_histogram_req_data(self.accessor_idx);
let mem_pre = self.get_memory_consumption(parent_bucket_id);
let dense_range = self.dense_range;
let store = &mut self.parent_buckets[parent_bucket_id as usize];
// Upgrade to dense storage before processing the block if the buckets are dense enough.
store.maybe_densify(dense_range);
let buckets = &mut self.parent_buckets[parent_bucket_id as usize].buckets;
let req = &self.req_data;
let bounds = req.bounds;
let interval = req.req.interval;
let offset = req.offset;
@@ -485,42 +340,31 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
agg_data
.column_block_accessor
.fetch_block(docs, &req.accessor);
// special path for nested buckets
if let Some(sub_agg) = &mut self.sub_agg {
for (doc, val) in agg_data
.column_block_accessor
.iter_docid_vals(docs, &req.accessor)
{
let val = f64_from_fastfield_u64(val, req.field_type);
if bounds.contains(val) {
let bucket = store.get_or_create(
get_bucket_pos(val),
&mut self.bucket_id_provider,
|pos| get_bucket_key_from_pos(pos as f64, interval, offset),
);
bucket.doc_count += 1;
sub_agg.push(bucket.bucket_id.to_bucket_id(), doc);
}
}
} else {
for val in agg_data.column_block_accessor.iter_vals() {
let val = f64_from_fastfield_u64(val, req.field_type);
if bounds.contains(val) {
let bucket = store.get_or_create(
get_bucket_pos(val),
&mut self.bucket_id_provider,
|pos| get_bucket_key_from_pos(pos as f64, interval, offset),
);
bucket.doc_count += 1;
for (doc, val) in agg_data
.column_block_accessor
.iter_docid_vals(docs, &req.accessor)
{
let val = f64_from_fastfield_u64(val, req.field_type);
let bucket_pos = get_bucket_pos(val);
if bounds.contains(val) {
let bucket = buckets.entry(bucket_pos).or_insert_with(|| {
let key = get_bucket_key_from_pos(bucket_pos as f64, interval, offset);
SegmentHistogramBucketEntry {
key,
doc_count: 0,
bucket_id: self.bucket_id_provider.next_bucket_id(),
}
});
bucket.doc_count += 1;
if let Some(sub_agg) = &mut self.sub_agg {
sub_agg.push(bucket.bucket_id, doc);
}
}
}
agg_data.put_back_histogram_req_data(self.accessor_idx, req);
// `checked_sub` is `None` when densifying shrank the accounted memory; only account growth.
if let Some(mem_delta) = self
.get_memory_consumption(parent_bucket_id)
.checked_sub(mem_pre)
{
let mem_delta = self.get_memory_consumption(parent_bucket_id) - mem_pre;
if mem_delta > 0 {
agg_data.context.limits.add_memory_consumed(mem_delta)?;
}
@@ -544,7 +388,9 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
_agg_data: &AggregationsSegmentCtx,
) -> crate::Result<()> {
while self.parent_buckets.len() <= max_bucket as usize {
self.parent_buckets.push(HistogramBuckets::default());
self.parent_buckets.push(HistogramBuckets {
buckets: FxHashMap::default(),
});
}
Ok(())
}
@@ -561,7 +407,7 @@ impl<B: BucketIdSlot> SegmentAggregationCollector for SegmentHistogramCollector<
}
}
impl<B: BucketIdSlot> SegmentHistogramCollector<B> {
impl SegmentHistogramCollector {
fn get_memory_consumption(&self, parent_bucket_id: BucketId) -> u64 {
self.parent_buckets[parent_bucket_id as usize].memory_consumption()
}
@@ -570,19 +416,21 @@ impl<B: BucketIdSlot> SegmentHistogramCollector<B> {
fn add_intermediate_bucket_result(
&mut self,
agg_data: &AggregationsSegmentCtx,
histogram: HistogramBuckets<B>,
histogram: HistogramBuckets,
) -> crate::Result<IntermediateBucketResult> {
let filled = histogram.into_filled_entries();
let mut buckets = Vec::with_capacity(filled.len());
let mut buckets = Vec::with_capacity(histogram.buckets.len());
for bucket in filled {
for bucket in histogram.buckets.into_values() {
let bucket_res = bucket.into_intermediate_bucket_entry(&mut self.sub_agg, agg_data);
buckets.push(bucket_res?);
}
buckets.sort_unstable_by(|b1, b2| b1.key.total_cmp(&b2.key));
let is_date_agg = self.req_data.field_type == ColumnType::DateTime;
let is_date_agg = agg_data
.get_histogram_req_data(self.accessor_idx)
.field_type
== ColumnType::DateTime;
Ok(IntermediateBucketResult::Histogram {
buckets,
is_date_agg,
@@ -598,175 +446,32 @@ impl<B: BucketIdSlot> SegmentHistogramCollector<B> {
} else {
None
};
let mut req_data = agg_data.per_request.histogram_req_data[node.idx_in_req_data].clone();
normalize_histogram_req(&mut req_data)?;
agg_data
.context
.limits
.add_memory_consumed(req_data.get_memory_consumption() as u64)?;
let dense_range = compute_dense_range(
&req_data.accessor,
req_data.field_type,
req_data.req.interval,
req_data.offset,
req_data.bounds,
);
let req_data = agg_data.get_histogram_req_data_mut(node.idx_in_req_data);
req_data.req.validate()?;
if req_data.field_type == ColumnType::DateTime && !req_data.is_date_histogram {
req_data.req.normalize_date_time();
}
req_data.bounds = req_data.req.hard_bounds.unwrap_or(HistogramBounds {
min: f64::MIN,
max: f64::MAX,
});
req_data.offset = req_data.req.offset.unwrap_or(0.0);
let sub_agg = sub_agg.map(BufferedSubAggs::new);
Ok(Self {
parent_buckets: Default::default(),
sub_agg,
req_data,
accessor_idx: node.idx_in_req_data,
bucket_id_provider: BucketIdProvider::default(),
dense_range,
})
}
}
impl SegmentHistogramCollector<()> {
/// Builds a histogram collector whose parent `t` is a dense histogram filled from
/// `counts[t * num_time_buckets .. (t + 1) * num_time_buckets]` (row-major). Used by the fused
/// terms×histogram collector to turn its flat 2D counters into the regular intermediate result,
/// so cross-segment merging is shared with the general path.
pub(crate) fn from_dense_rows(
req_data: HistogramAggReqData,
base_pos: i64,
num_time_buckets: usize,
counts: &[u32],
) -> Self {
let interval = req_data.req.interval;
let offset = req_data.offset;
let num_parents = counts.len().checked_div(num_time_buckets).unwrap_or(0);
let parent_buckets = (0..num_parents)
.map(|t| {
let row = &counts[t * num_time_buckets..(t + 1) * num_time_buckets];
let buckets = row
.iter()
.enumerate()
.map(|(b, &doc_count)| SegmentHistogramBucketEntry {
key: get_bucket_key_from_pos(
(base_pos + b as i64) as f64,
interval,
offset,
),
doc_count: doc_count as u64,
bucket_id: (),
})
.collect();
HistogramBuckets::Dense { base_pos, buckets }
})
.collect();
Self {
parent_buckets,
sub_agg: None,
req_data,
bucket_id_provider: BucketIdProvider::default(),
dense_range: None,
}
}
}
/// Validates and normalizes a histogram request in place: applies date ns-normalization (for a
/// `histogram` on a date column) and resolves `bounds`/`offset` from the request.
fn normalize_histogram_req(req_data: &mut HistogramAggReqData) -> crate::Result<()> {
req_data.req.validate()?;
if req_data.field_type == ColumnType::DateTime && !req_data.is_date_histogram {
req_data.req.normalize_date_time();
}
req_data.bounds = req_data.req.hard_bounds.unwrap_or(HistogramBounds {
min: f64::MIN,
max: f64::MAX,
});
req_data.offset = req_data.req.offset.unwrap_or(0.0);
// Drop `hard_bounds` that can't exclude any value (the column's range already sits inside
// them): the per-doc `bounds.contains` check is then a no-op, so collapsing to the unbounded
// sentinel lets the histogram hot loop skip it and the fused term×histogram path derive
// per-term counts from the grid. Only this collect-time filter is touched — empty-bucket
// emission reads `req.hard_bounds` directly (see `get_req_min_max`), and `hard_bounds` only
// ever clips that range, so a wider-than-data bound leaves the result unchanged.
if req_data.req.hard_bounds.is_some() {
let col_min = f64_from_fastfield_u64(req_data.accessor.min_value(), req_data.field_type);
let col_max = f64_from_fastfield_u64(req_data.accessor.max_value(), req_data.field_type);
if col_min >= req_data.bounds.min && col_max <= req_data.bounds.max {
req_data.bounds = HistogramBounds {
min: f64::MIN,
max: f64::MAX,
};
}
}
Ok(())
}
/// Clones and normalizes (resolving interval/offset/bounds) the histogram request at `node`, and
/// returns it together with its dense bucket range — or `None` if the column has no usable range.
/// Used by the fused terms×histogram collector, which then owns the normalized request.
pub(crate) fn prepare_histogram_dense_range(
agg_data: &AggregationsSegmentCtx,
node: &AggRefNode,
) -> crate::Result<Option<(HistogramAggReqData, DenseRange)>> {
let mut req_data = agg_data.per_request.histogram_req_data[node.idx_in_req_data].clone();
normalize_histogram_req(&mut req_data)?;
let dense_range = compute_dense_range(
&req_data.accessor,
req_data.field_type,
req_data.req.interval,
req_data.offset,
req_data.bounds,
);
Ok(dense_range.map(|range| (req_data, range)))
}
/// Builds a boxed histogram (or date histogram) segment collector, picking the bucket-id storage
/// based on whether there are sub aggregations: `()` (no id stored) when there are none, otherwise
/// [`BucketId`].
pub(crate) fn build_segment_histogram_collector(
agg_data: &mut AggregationsSegmentCtx,
node: &AggRefNode,
) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
if node.children.is_empty() {
Ok(Box::new(
SegmentHistogramCollector::<()>::from_req_and_validate(agg_data, node)?,
))
} else {
Ok(Box::new(
SegmentHistogramCollector::<BucketId>::from_req_and_validate(agg_data, node)?,
))
}
}
#[inline]
pub(crate) fn get_bucket_pos_f64(val: f64, interval: f64, offset: f64) -> f64 {
fn get_bucket_pos_f64(val: f64, interval: f64, offset: f64) -> f64 {
((val - offset) / interval).floor()
}
/// Computes the dense bucket range for a column from its min/max value (clamped to the histogram
/// bounds), or `None` if there are no values within bounds (or the range overflows `usize`).
///
/// There is no upper bound on the range: whether dense storage is actually used is decided later,
/// per parent bucket, by [`HistogramBuckets::maybe_densify`] based on the memory it would save.
///
/// The column min/max bound every value the collector can see, so a `Vec` sized to this range can
/// be indexed by `bucket_pos - base_pos` without any out-of-bounds check on the hot path.
fn compute_dense_range(
accessor: &Column<u64>,
field_type: ColumnType,
interval: f64,
offset: f64,
bounds: HistogramBounds,
) -> Option<DenseRange> {
let col_min = f64_from_fastfield_u64(accessor.min_value(), field_type);
let col_max = f64_from_fastfield_u64(accessor.max_value(), field_type);
let lo = col_min.max(bounds.min);
let hi = col_max.min(bounds.max);
if lo > hi {
return None;
}
let base_pos = get_bucket_pos_f64(lo, interval, offset) as i64;
let top_pos = get_bucket_pos_f64(hi, interval, offset) as i64;
let len = usize::try_from(top_pos.checked_sub(base_pos)?.checked_add(1)?).ok()?;
(len > 0).then_some(DenseRange { base_pos, len })
}
#[inline]
fn get_bucket_key_from_pos(bucket_pos: f64, interval: f64, offset: f64) -> f64 {
bucket_pos * interval + offset
@@ -1071,62 +776,6 @@ mod tests {
Ok(())
}
#[test]
fn histogram_dense_storage_test() -> crate::Result<()> {
histogram_dense_storage_test_with_opt(false)?;
histogram_dense_storage_test_with_opt(true)?;
Ok(())
}
/// Exercises the switch from sparse hash map to dense `Vec` storage. The switch happens at a
/// block boundary (a block is `COLLECT_BLOCK_BUFFER_LEN` = 64 docs), so we need many docs in a
/// single segment, densely covering the bucket range. `with_sub_agg` toggles the `iter_vals`
/// fast path vs. the `iter_docid_vals` path used when there is a sub aggregation.
fn histogram_dense_storage_test_with_opt(with_sub_agg: bool) -> crate::Result<()> {
let num_buckets = 50usize;
let docs_per_bucket = 10usize;
// Value `k` repeated `docs_per_bucket` times for each bucket `k`, so every value in bucket
// `k` equals `k` and the per-bucket average is exactly `k`.
let values: Vec<f64> = (0..num_buckets * docs_per_bucket)
.map(|i| (i % num_buckets) as f64)
.collect();
// `merge_segments = true` collapses the per-value segments into a single segment with all
// the docs, which is collected in 64-doc blocks and therefore switches to dense storage.
let index = get_test_index_from_values(true, &values)?;
let agg_req: Aggregations = serde_json::from_value(if with_sub_agg {
json!({
"histogram": {
"histogram": { "field": "score_f64", "interval": 1.0 },
"aggs": { "avg": { "avg": { "field": "score_f64" } } }
}
})
} else {
json!({
"histogram": {
"histogram": { "field": "score_f64", "interval": 1.0 }
}
})
})
.unwrap();
let res = exec_request(agg_req, &index)?;
for k in 0..num_buckets {
assert_eq!(res["histogram"]["buckets"][k]["key"], k as f64);
assert_eq!(
res["histogram"]["buckets"][k]["doc_count"],
docs_per_bucket as u64
);
if with_sub_agg {
assert_eq!(res["histogram"]["buckets"][k]["avg"]["value"], k as f64);
}
}
assert_eq!(res["histogram"]["buckets"][num_buckets], Value::Null);
Ok(())
}
#[test]
fn histogram_memory_limit() -> crate::Result<()> {
let index = get_test_index_with_num_docs(true, 100)?;
@@ -1421,55 +1070,6 @@ mod tests {
Ok(())
}
#[test]
fn histogram_non_binding_hard_bounds_test_multi_segment() -> crate::Result<()> {
histogram_non_binding_hard_bounds_test_with_opt(false)
}
#[test]
fn histogram_non_binding_hard_bounds_test_single_segment() -> crate::Result<()> {
histogram_non_binding_hard_bounds_test_with_opt(true)
}
/// `hard_bounds` wider than the data (here with mid-interval edges, to cover the "bound cuts a
/// bucket" case) can't exclude any value, so the result must be identical to the same request
/// without bounds. Guards the normalization that collapses such bounds to the unbounded
/// sentinel so the hot loop / fused path can skip the per-doc bounds check.
fn histogram_non_binding_hard_bounds_test_with_opt(merge_segments: bool) -> crate::Result<()> {
let values = vec![10.0, 12.0, 14.0, 16.0, 10.0, 13.0, 10.0, 12.0];
let index = get_test_index_from_values(merge_segments, &values)?;
// Mid-interval edges, but wider than the data range [10, 16] -> they exclude nothing.
let with_bounds: Aggregations = serde_json::from_value(json!({
"histogram": {
"histogram": {
"field": "score_f64",
"interval": 1.0,
"hard_bounds": { "min": 9.5, "max": 16.5 }
}
}
}))
.unwrap();
let no_bounds: Aggregations = serde_json::from_value(json!({
"histogram": {
"histogram": { "field": "score_f64", "interval": 1.0 }
}
}))
.unwrap();
let res_bounds = exec_request(with_bounds, &index)?;
let res_plain = exec_request(no_bounds, &index)?;
// Dropping a non-binding bound must not change anything.
assert_eq!(res_bounds, res_plain);
// Sanity: buckets span the data range with gaps filled (min_doc_count defaults to 0).
assert_eq!(res_bounds["histogram"]["buckets"][0]["key"], 10.0);
assert_eq!(res_bounds["histogram"]["buckets"][0]["doc_count"], 3);
assert_eq!(res_bounds["histogram"]["buckets"][6]["key"], 16.0);
assert_eq!(res_bounds["histogram"]["buckets"][6]["doc_count"], 1);
assert_eq!(res_bounds["histogram"]["buckets"][7], Value::Null);
Ok(())
}
#[test]
fn histogram_empty_result_behaviour_test_single_segment() -> crate::Result<()> {
histogram_empty_result_behaviour_test_with_opt(true)

View File

@@ -23,7 +23,6 @@ use crate::TantivyError;
/// Contains all information required by the SegmentRangeCollector to perform the
/// range aggregation on a segment.
#[derive(Debug, Clone)]
pub struct RangeAggReqData {
/// The column accessor to access the fast field values.
pub accessor: Column<u64>,
@@ -162,7 +161,7 @@ pub struct SegmentRangeCollector<B: SubAggBuffer> {
/// One for each ParentBucketId
parent_buckets: Vec<Vec<SegmentRangeAndBucketEntry>>,
column_type: ColumnType,
pub(crate) req_data: RangeAggReqData,
pub(crate) accessor_idx: usize,
sub_agg: Option<BufferedSubAggs<B>>,
/// Here things get a bit weird. We need to assign unique bucket ids across all
/// parent buckets. So we keep track of the next available bucket id here.
@@ -185,7 +184,7 @@ impl<B: SubAggBuffer> Debug for SegmentRangeCollector<B> {
f.debug_struct("SegmentRangeCollector")
.field("parent_buckets_len", &self.parent_buckets.len())
.field("column_type", &self.column_type)
.field("name", &self.req_data.name)
.field("accessor_idx", &self.accessor_idx)
.field("has_sub_agg", &self.sub_agg.is_some())
.finish()
}
@@ -240,7 +239,10 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
) -> crate::Result<()> {
self.prepare_max_bucket(parent_bucket_id, agg_data)?;
let field_type = self.column_type;
let name = self.req_data.name.to_string();
let name = agg_data
.get_range_req_data(self.accessor_idx)
.name
.to_string();
let buckets = std::mem::take(&mut self.parent_buckets[parent_bucket_id as usize]);
@@ -279,15 +281,17 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
docs: &[crate::DocId],
agg_data: &mut AggregationsSegmentCtx,
) -> crate::Result<()> {
let req = agg_data.take_range_req_data(self.accessor_idx);
agg_data
.column_block_accessor
.fetch_block(docs, &self.req_data.accessor);
.fetch_block(docs, &req.accessor);
let buckets = &mut self.parent_buckets[parent_bucket_id as usize];
for (doc, val) in agg_data
.column_block_accessor
.iter_docid_vals(docs, &self.req_data.accessor)
.iter_docid_vals(docs, &req.accessor)
{
let bucket_pos = get_bucket_pos(val, buckets);
let bucket = &mut buckets[bucket_pos];
@@ -297,6 +301,7 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
}
}
agg_data.put_back_range_req_data(self.accessor_idx, req);
if let Some(sub_agg) = self.sub_agg.as_mut() {
sub_agg.check_flush_local(agg_data)?;
}
@@ -314,10 +319,10 @@ impl<B: SubAggBuffer> SegmentAggregationCollector for SegmentRangeCollector<B> {
fn prepare_max_bucket(
&mut self,
max_bucket: BucketId,
_agg_data: &AggregationsSegmentCtx,
agg_data: &AggregationsSegmentCtx,
) -> crate::Result<()> {
while self.parent_buckets.len() <= max_bucket as usize {
let new_buckets = self.create_new_buckets()?;
let new_buckets = self.create_new_buckets(agg_data)?;
self.parent_buckets.push(new_buckets);
}
@@ -341,11 +346,8 @@ pub(crate) fn build_segment_range_collector(
agg_data: &mut AggregationsSegmentCtx,
node: &AggRefNode,
) -> crate::Result<Box<dyn SegmentAggregationCollector>> {
let req_data = agg_data.per_request.range_req_data[node.idx_in_req_data].clone();
agg_data
.context
.limits
.add_memory_consumed(req_data.get_memory_consumption() as u64)?;
let accessor_idx = node.idx_in_req_data;
let req_data = agg_data.get_range_req_data(node.idx_in_req_data);
let field_type = req_data.field_type;
// TODO: A better metric instead of is_top_level would be the number of buckets expected.
@@ -363,7 +365,7 @@ pub(crate) fn build_segment_range_collector(
Ok(Box::new(SegmentRangeCollector::<LowCardSubAggBuffer> {
sub_agg: sub_agg.map(LowCardBufferedSubAggs::new),
column_type: field_type,
req_data,
accessor_idx,
parent_buckets: Vec::new(),
bucket_id_provider: BucketIdProvider::default(),
limits: agg_data.context.limits.clone(),
@@ -372,7 +374,7 @@ pub(crate) fn build_segment_range_collector(
Ok(Box::new(SegmentRangeCollector::<HighCardSubAggBuffer> {
sub_agg: sub_agg.map(BufferedSubAggs::new),
column_type: field_type,
req_data,
accessor_idx,
parent_buckets: Vec::new(),
bucket_id_provider: BucketIdProvider::default(),
limits: agg_data.context.limits.clone(),
@@ -381,9 +383,12 @@ pub(crate) fn build_segment_range_collector(
}
impl<B: SubAggBuffer> SegmentRangeCollector<B> {
pub(crate) fn create_new_buckets(&mut self) -> crate::Result<Vec<SegmentRangeAndBucketEntry>> {
pub(crate) fn create_new_buckets(
&mut self,
agg_data: &AggregationsSegmentCtx,
) -> crate::Result<Vec<SegmentRangeAndBucketEntry>> {
let field_type = self.column_type;
let req_data = &self.req_data;
let req_data = agg_data.get_range_req_data(self.accessor_idx);
// The range input on the request is f64.
// We need to convert to u64 ranges, because we read the values as u64.
// The mapping from the conversion is monotonic so ordering is preserved.
@@ -558,16 +563,17 @@ mod tests {
get_test_index_with_num_docs,
};
pub fn build_test_buckets(
ranges: &[RangeAggregationRange],
pub fn get_collector_from_ranges(
ranges: Vec<RangeAggregationRange>,
field_type: ColumnType,
) -> Vec<SegmentRangeAndBucketEntry> {
) -> SegmentRangeCollector<HighCardSubAggBuffer> {
let req = RangeAggregation {
field: "dummy".to_string(),
ranges: ranges.to_vec(),
ranges,
..Default::default()
};
extend_validate_ranges(&req.ranges, &field_type)
// Build buckets directly as in from_req_and_validate without AggregationsData
let buckets: Vec<_> = extend_validate_ranges(&req.ranges, &field_type)
.expect("unexpected error in extend_validate_ranges")
.iter()
.map(|range| {
@@ -598,7 +604,16 @@ mod tests {
},
}
})
.collect()
.collect();
SegmentRangeCollector {
parent_buckets: vec![buckets],
column_type: field_type,
accessor_idx: 0,
sub_agg: None,
bucket_id_provider: Default::default(),
limits: AggregationLimitsGuard::default(),
}
}
#[test]
@@ -841,10 +856,10 @@ mod tests {
#[test]
fn bucket_test_extend_range_hole() {
let buckets = [(10f64..20f64).into(), (30f64..40f64).into()];
let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
let buckets = vec![(10f64..20f64).into(), (30f64..40f64).into()];
let collector = get_collector_from_ranges(buckets, ColumnType::F64);
let buckets = parent_buckets[0].clone();
let buckets = collector.parent_buckets[0].clone();
assert_eq!(buckets[0].range.start, u64::MIN);
assert_eq!(buckets[0].range.end, 10f64.to_u64());
assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -860,14 +875,14 @@ mod tests {
fn bucket_test_range_conversion_special_case() {
// the monotonic conversion between f64 and u64, does not map f64::MIN.to_u64() ==
// u64::MIN, but the into trait converts f64::MIN/MAX to None
let buckets = [
let buckets = vec![
(f64::MIN..10f64).into(),
(10f64..20f64).into(),
(20f64..f64::MAX).into(),
];
let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
let collector = get_collector_from_ranges(buckets, ColumnType::F64);
let buckets = parent_buckets[0].clone();
let buckets = collector.parent_buckets[0].clone();
assert_eq!(buckets[0].range.start, u64::MIN);
assert_eq!(buckets[0].range.end, 10f64.to_u64());
assert_eq!(buckets[1].range.start, 10f64.to_u64());
@@ -879,28 +894,28 @@ mod tests {
#[test]
fn bucket_range_test_negative_vals() {
let buckets = [(-10f64..-1f64).into()];
let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
let buckets = vec![(-10f64..-1f64).into()];
let collector = get_collector_from_ranges(buckets, ColumnType::F64);
let buckets = parent_buckets[0].clone();
let buckets = collector.parent_buckets[0].clone();
assert_eq!(&buckets[0].bucket.key.to_string(), "*--10");
assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "-1-*");
}
#[test]
fn bucket_range_test_positive_vals() {
let buckets = [(0f64..10f64).into()];
let parent_buckets = [build_test_buckets(&buckets, ColumnType::F64)];
let buckets = vec![(0f64..10f64).into()];
let collector = get_collector_from_ranges(buckets, ColumnType::F64);
let buckets = parent_buckets[0].clone();
let buckets = collector.parent_buckets[0].clone();
assert_eq!(&buckets[0].bucket.key.to_string(), "*-0");
assert_eq!(&buckets[buckets.len() - 1].bucket.key.to_string(), "10-*");
}
#[test]
fn range_binary_search_test_u64() {
let check_ranges = |ranges: &[RangeAggregationRange]| {
let parent_buckets = [build_test_buckets(ranges, ColumnType::U64)];
let search = |val: u64| get_bucket_pos(val, &parent_buckets[0]);
let check_ranges = |ranges: Vec<RangeAggregationRange>| {
let collector = get_collector_from_ranges(ranges, ColumnType::U64);
let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);
assert_eq!(search(u64::MIN), 0);
assert_eq!(search(9), 0);
@@ -913,7 +928,7 @@ mod tests {
};
let ranges = vec![(10.0..100.0).into()];
check_ranges(&ranges);
check_ranges(ranges);
let ranges = vec![
RangeAggregationRange {
@@ -923,7 +938,7 @@ mod tests {
},
(10.0..100.0).into(),
];
check_ranges(&ranges);
check_ranges(ranges);
let ranges = vec![
RangeAggregationRange {
@@ -938,15 +953,15 @@ mod tests {
from: Some(100.0),
},
];
check_ranges(&ranges);
check_ranges(ranges);
}
#[test]
fn range_binary_search_test_f64() {
let ranges = [(10.0..100.0).into()];
let ranges = vec![(10.0..100.0).into()];
let parent_buckets = [build_test_buckets(&ranges, ColumnType::F64)];
let search = |val: u64| get_bucket_pos(val, &parent_buckets[0]);
let collector = get_collector_from_ranges(ranges, ColumnType::F64);
let search = |val: u64| get_bucket_pos(val, &collector.parent_buckets[0]);
assert_eq!(search(u64::MIN), 0);
assert_eq!(search(9f64.to_u64()), 0);

View File

@@ -29,8 +29,6 @@ use crate::aggregation::{format_date, BucketId, Key};
use crate::error::DataCorruption;
use crate::TantivyError;
mod term_histogram;
/// Contains all information required by the SegmentTermCollector to perform the
/// terms aggregation on a segment.
#[derive(Debug, Clone)]
@@ -376,21 +374,9 @@ pub(crate) fn build_segment_term_collector(
// Let's see if we can use a vec to aggregate our data
// instead of a hashmap.
let col_max_value = terms_req_data.accessor.max_value();
let max_column_val: u64 =
let max_term_id: u64 =
col_max_value.max(terms_req_data.missing_value_for_accessor.unwrap_or(0u64));
// Fused fast path: low-cardinality terms × a single `histogram`/`date_histogram` leaf over full
// columns with a small enough bucket grid. Anything else falls through to the general path.
if let Some(collector) = term_histogram::maybe_build_collector(
req_data,
node,
&terms_req_data,
max_column_val,
is_top_level,
)? {
return Ok(collector);
}
let sub_agg_collector = if has_sub_aggregations {
Some(build_segment_agg_collectors(req_data, &node.children)?)
} else {
@@ -399,30 +385,30 @@ pub(crate) fn build_segment_term_collector(
let mut bucket_id_provider = BucketIdProvider::default();
// Decide which bucket storage is best suited for this aggregation.
if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
let term_buckets = VecTermBucketsNoAgg::new(max_column_val + 1, &mut bucket_id_provider);
if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
let collector: SegmentTermCollector<_, HighCardSubAggBuffer> = SegmentTermCollector {
parent_buckets: vec![term_buckets],
sub_agg: None,
bucket_id_provider,
max_term_id: max_column_val,
max_term_id,
terms_req_data,
};
Ok(Box::new(collector))
} else if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC {
let term_buckets = VecTermBuckets::new(max_column_val + 1, &mut bucket_id_provider);
} else if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC {
let term_buckets = VecTermBuckets::new(max_term_id + 1, &mut bucket_id_provider);
let sub_agg = sub_agg_collector.map(LowCardBufferedSubAggs::new);
let collector: SegmentTermCollector<_, LowCardSubAggBuffer> = SegmentTermCollector {
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id: max_column_val,
max_term_id,
terms_req_data,
};
Ok(Box::new(collector))
} else if max_column_val < 8_000_000 && is_top_level {
} else if max_term_id < 8_000_000 && is_top_level {
let term_buckets: PagedTermMap =
PagedTermMap::new(max_column_val + 1, &mut bucket_id_provider);
PagedTermMap::new(max_term_id + 1, &mut bucket_id_provider);
// Build sub-aggregation blueprint (flat pairs)
let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggBuffer> =
@@ -430,7 +416,7 @@ pub(crate) fn build_segment_term_collector(
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id: max_column_val,
max_term_id,
terms_req_data,
};
Ok(Box::new(collector))
@@ -443,7 +429,7 @@ pub(crate) fn build_segment_term_collector(
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id: max_column_val,
max_term_id,
terms_req_data,
};
Ok(Box::new(collector))

View File

@@ -1,585 +0,0 @@
//! Fused collector for the very common shape `terms` (low cardinality) × a single
//! `histogram`/`date_histogram` sub-aggregation with nothing nested below it.
//!
//! See [`SegmentTermHistogramCollector`] for the approach and [`maybe_build_collector`] for the
//! conditions under which it is used.
use columnar::ColumnBlockAccessor;
use super::{Bucket, SegmentTermCollector, TermsAggReqData, VecTermBuckets};
use crate::aggregation::agg_data::{AggKind, AggRefNode, AggregationsSegmentCtx};
use crate::aggregation::bucket::{
get_bucket_pos_f64, prepare_histogram_dense_range, HistogramAggReqData,
SegmentHistogramCollector,
};
use crate::aggregation::buffered_sub_aggs::LowCardSubAggBuffer;
use crate::aggregation::intermediate_agg_result::{
IntermediateAggregationResult, IntermediateAggregationResults,
};
use crate::aggregation::segment_agg_result::{BucketIdProvider, SegmentAggregationCollector};
use crate::aggregation::{f64_from_fastfield_u64, BucketId};
/// Maximum number of cells (`num_terms × num_time_buckets`) in the fused flat 2D grid. Above this
/// the grid would be too large/cache-unfriendly, so we fall back to the general buffered path.
/// `1 << 14` cells = 128 KB of `u64` counters, comfortably L2-resident.
///
/// Since we are only at the top-level, this won't be multiplied by any parent buckets.
const MAX_FUSED_GRID_BUCKETS: usize = 16384;
/// Fused collector for `terms` (low cardinality) × a single `histogram`/`date_histogram` leaf with
/// nothing nested below it, when the resulting `num_terms × num_time_buckets` grid is small (see
/// [`MAX_FUSED_GRID_BUCKETS`]).
///
/// It keeps a flat, fully dense 2D counter grid (`counts[term * num_time_buckets + bucket]`) and a
/// per-term total. A single pass reads both the term and histogram columns in document order and
/// bumps the counters directly — no doc-id buffering, no per-term scattered re-fetch, no dynamic
/// dispatch on flush, no per-bucket key/id storage during collection (keys are derived from the
/// index at the end).
///
/// At result time the flat grid is expanded back into the regular term map + histogram storage and
/// handed to the shared intermediate-result builders, so cross-segment merging is identical to the
/// general path.
#[derive(Debug)]
pub(crate) struct SegmentTermHistogramCollector {
/// Per-term count of docs *outside* `hard_bounds` (still in `doc_count`, but in no bucket).
/// Per-term total = this + the term's `counts` row-sum; left empty when there are no hard
/// bounds (every doc is in-bounds, so there's no remainder to track).
term_counts: Vec<u32>,
/// Flattened `[num_terms * num_time_buckets]` histogram counters (`u32`, see
/// `term_counts`).
///
/// Each term id get its own contiguous slice of `num_time_buckets` histogram counter.
/// When we count all docs (#nofilter), we can derive the per-term total as the sum over that
/// term's slice.
counts: Vec<u32>,
/// Histogram buckets per term (the dense time-range length).
num_time_buckets: usize,
/// `bucket_pos` mapped to time-bucket index 0.
base_pos: i64,
terms_req_data: TermsAggReqData,
/// The (cloned, normalized) histogram request: its column + interval/offset/bounds.
hist_req_data: HistogramAggReqData,
/// Private block accessors for both columns. We read them together, so each needs its own
/// (the shared `agg_data` scratch accessor only holds one block at a time). Owning them keeps
/// `collect` independent of `agg_data`.
term_block: ColumnBlockAccessor<u64>,
hist_block: ColumnBlockAccessor<u64>,
/// No hard bounds, so every doc is in-bounds.
all_docs_in_bounds: bool,
/// Both columns are full (fused-path precondition); cached so `collect` skips the per-block
/// cardinality lookup in `fetch_block`.
is_full: bool,
}
impl SegmentAggregationCollector for SegmentTermHistogramCollector {
fn add_intermediate_aggregation_result(
&mut self,
agg_data: &AggregationsSegmentCtx,
results: &mut IntermediateAggregationResults,
parent_bucket_id: BucketId,
) -> crate::Result<()> {
debug_assert_eq!(
parent_bucket_id, 0,
"fused term-histogram collector is top-level only"
);
// Expand the flat grid back into the regular structures and reuse the shared builders, so
// ordering/cut-off/dict handling and cross-segment merging match the general path exactly.
let mut bucket_id_provider = BucketIdProvider::default();
// Per-term total = histogram row-sum (in-bounds) + `term_counts` (out-of-bounds remainder,
// empty when there are no hard bounds).
let term_buckets = VecTermBuckets {
buckets: self
.counts
.chunks_exact(self.num_time_buckets)
.enumerate()
.map(|(term_id, row)| {
let in_bounds: u32 = row.iter().sum();
let out_of_bounds = self.term_counts.get(term_id).copied().unwrap_or(0);
Bucket {
count: in_bounds + out_of_bounds,
bucket_id: bucket_id_provider.next_bucket_id(),
}
})
.collect(),
};
let mut histogram = SegmentHistogramCollector::<()>::from_dense_rows(
self.hist_req_data.clone(),
self.base_pos,
self.num_time_buckets,
&self.counts,
);
let name = self.terms_req_data.name.clone();
let bucket = SegmentTermCollector::<VecTermBuckets, LowCardSubAggBuffer>::into_intermediate_bucket_result(
&self.terms_req_data,
Some(&mut histogram as &mut dyn SegmentAggregationCollector),
term_buckets,
agg_data,
)?;
results.push(name, IntermediateAggregationResult::Bucket(bucket))?;
Ok(())
}
#[inline]
fn collect(
&mut self,
parent_bucket_id: BucketId,
docs: &[crate::DocId],
_agg_data: &mut AggregationsSegmentCtx,
) -> crate::Result<()> {
debug_assert_eq!(
parent_bucket_id, 0,
"fused term-histogram collector is top-level only"
);
// Fetch both columns into our own accessors (we read them together, so they can't share the
// single `agg_data` scratch accessor). The collector owns all its inputs, so `collect`
// doesn't touch `agg_data`.
self.term_block
.fetch_block_with_is_full(docs, &self.terms_req_data.accessor, self.is_full);
self.hist_block
.fetch_block_with_is_full(docs, &self.hist_req_data.accessor, self.is_full);
// Hoist the loop-invariant fields into locals: the optimizer can't prove the
// `self.counts`/`self.term_counts` writes don't alias these `self` fields, so it can't keep
// them in registers and re-reads them from memory every iteration — ~15% slower on
// `terms_status_with_date_histogram` when read straight from `self`.
// Note: check which are actually relevant.
let field_type = self.hist_req_data.field_type;
let bounds = self.hist_req_data.bounds;
let interval = self.hist_req_data.req.interval;
let offset = self.hist_req_data.offset;
let base_pos = self.base_pos;
let num_time_buckets = self.num_time_buckets;
let all_docs_in_bounds = self.all_docs_in_bounds;
let term_counts = &mut self.term_counts;
let counts = &mut self.counts;
// Both columns are full (checked at construction), so values align with `docs` positionally
// and are read together in one pass.
// In-bounds docs bump the `counts` grid, out-of-bounds bump `term_counts`; deriving the
// total at flush avoids a per-doc `term_counts` RMW that serializes on
// store-to-load forwarding.
for (term_id, hist_raw) in self.term_block.iter_vals().zip(self.hist_block.iter_vals()) {
let term_id = term_id as usize;
let val = f64_from_fastfield_u64(hist_raw, field_type);
if all_docs_in_bounds || bounds.contains(val) {
let bucket = (get_bucket_pos_f64(val, interval, offset) as i64 - base_pos) as usize;
debug_assert!(
bucket < num_time_buckets,
"histogram bucket outside dense range"
);
counts[term_id * num_time_buckets + bucket] += 1;
} else {
term_counts[term_id] += 1;
}
}
Ok(())
}
fn flush(&mut self, _agg_data: &mut AggregationsSegmentCtx) -> crate::Result<()> {
// Nothing is buffered: `collect` writes the flat grid directly.
Ok(())
}
fn prepare_max_bucket(
&mut self,
_max_bucket: BucketId,
_agg_data: &AggregationsSegmentCtx,
) -> crate::Result<()> {
// Top-level: the flat grid is allocated up front.
Ok(())
}
fn compute_metric_value(
&self,
_bucket_id: BucketId,
_sub_agg_name: &str,
_sub_agg_property: &str,
_agg_data: &AggregationsSegmentCtx,
) -> Option<f64> {
None
}
}
/// Builds the fused terms×histogram collector for a single top-level parent, when the shape is
/// eligible. Returns `Ok(None)` to fall back to the general buffered terms path.
///
/// Eligibility: top-level, low-cardinality terms over a full column with no missing/include-exclude
/// handling; a single `histogram`/`date_histogram` leaf (no nesting below it) over a full column;
/// and a `num_terms × num_time_buckets` grid no larger than [`MAX_FUSED_GRID_BUCKETS`].
pub(super) fn maybe_build_collector(
agg_data: &mut AggregationsSegmentCtx,
node: &AggRefNode,
terms_req_data: &TermsAggReqData,
col_max_val: u64,
is_top_level: bool,
) -> crate::Result<Option<Box<dyn SegmentAggregationCollector>>> {
// Both columns must be full (one value per doc) so their values align positionally with `docs`
// and we can zip them. Requiring full columns also makes the terms agg's `missing` config a
// no-op (`fetch_block_with_missing` early-returns on full columns), so we needn't check for it.
//
// We don't cap the term cardinality here: the flat grid is bounded by the total cell count
// (`num_terms * num_time_buckets <= MAX_FUSED_GRID_BUCKETS`) checked below, which subsumes it.
//
// We only allow this at the top-level, since we don't know how many buckets are created. We
// are less likely to get enough docs for the preallocation to be worth and there's a risk of
// using too much memory. We could check the maximum theoretical buckets up-front and pass
// them down.
let fuseable = is_top_level
// TODO: We can easily support this
&& terms_req_data.allowed_term_ids.is_none()
&& terms_req_data.accessor.get_cardinality().is_full()
// The flat counters are `u32`, bumped once per value, so no count can exceed the column's
// value count. (Essentially always true here: the column is full, so its value count
// equals the doc count, and `DocId` is `u32`.)
&& terms_req_data.accessor.values.num_vals() < u32::MAX
&& node.children.len() == 1
&& matches!(
node.children[0].kind,
AggKind::Histogram | AggKind::DateHistogram
)
&& node.children[0].children.is_empty()
&& agg_data.per_request.histogram_req_data[node.children[0].idx_in_req_data]
.accessor
.get_cardinality()
.is_full();
if !fuseable {
return Ok(None);
}
// Clone + normalize the histogram request and get its dense bucket range; only take the fused
// path when the flat `num_terms × num_time_buckets` grid is small enough.
let Some((hist_req_data, range)) = prepare_histogram_dense_range(agg_data, &node.children[0])?
else {
return Ok(None);
};
let num_terms = col_max_val.saturating_add(1) as usize;
if num_terms.saturating_mul(range.len) > MAX_FUSED_GRID_BUCKETS {
return Ok(None);
}
// No hard bounds means every doc is in-bounds, letting `collect` short-circuit the bounds
// check — and leaving `term_counts` (the out-of-bounds remainder) unused, so we skip allocating
// it.
let all_docs_in_bounds =
hist_req_data.bounds.min == f64::MIN && hist_req_data.bounds.max == f64::MAX;
let counts = vec![0u32; num_terms * range.len];
let term_counts = if all_docs_in_bounds {
Vec::new()
} else {
vec![0u32; num_terms]
};
// Charge both grids to the aggregation memory limit.
agg_data.context.limits.add_memory_consumed(
((counts.len() + term_counts.len()) * std::mem::size_of::<u32>()) as u64,
)?;
Ok(Some(Box::new(SegmentTermHistogramCollector {
term_counts,
counts,
num_time_buckets: range.len,
base_pos: range.base_pos,
terms_req_data: terms_req_data.clone(),
hist_req_data,
term_block: ColumnBlockAccessor::default(),
hist_block: ColumnBlockAccessor::default(),
all_docs_in_bounds,
is_full: terms_req_data.accessor.get_cardinality().is_full(),
})))
}
#[cfg(test)]
mod tests {
use crate::aggregation::agg_req::Aggregations;
use crate::aggregation::tests::{
exec_request, exec_request_with_query_and_memory_limit,
get_test_index_from_values_and_terms,
};
use crate::aggregation::AggregationLimitsGuard;
/// Hand-computed correctness check for the fused terms×histogram fast path
/// ([`super::SegmentTermHistogramCollector`]): low-cardinality terms × a histogram leaf over
/// full columns, exercised single- and multi-segment.
#[test]
fn fused_term_histogram_test() -> crate::Result<()> {
fused_term_histogram_with_opt(false)?;
fused_term_histogram_with_opt(true)?;
Ok(())
}
fn fused_term_histogram_with_opt(merge_segments: bool) -> crate::Result<()> {
// 300 docs: term = {a, b, c} by i % 3, histogram value = i % 20 (interval 1 => buckets
// 0..19). gcd(3, 20) = 1, so every (term, bucket) pair occurs exactly 300 / 60 = 5 times.
let docs: Vec<(f64, String)> = (0..300u64)
.map(|i| {
(
(i % 20) as f64,
["a", "b", "c"][(i % 3) as usize].to_string(),
)
})
.collect();
// Two segments, to also exercise cross-segment merging of the fused per-term histograms.
let segments = vec![docs[..150].to_vec(), docs[150..].to_vec()];
let index = get_test_index_from_values_and_terms(merge_segments, &segments)?;
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id", "order": { "_key": "asc" } },
"aggs": {
"histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
}
}
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
for b in 0..20usize {
assert_eq!(histo[b]["key"], b as f64, "term {term} bucket {b}");
assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
}
assert_eq!(histo[20], serde_json::Value::Null);
}
assert_eq!(res["by_term"]["buckets"][3], serde_json::Value::Null);
Ok(())
}
/// A `missing` config on a *full* term column still takes the fused path (the string sentinel
/// is just `col_max + 1`, so the column stays low-cardinality). Since no doc is missing, the
/// real term buckets must be exactly as without `missing`.
#[test]
fn fused_term_histogram_with_missing_on_full_column() -> crate::Result<()> {
let docs: Vec<(f64, String)> = (0..300u64)
.map(|i| {
(
(i % 20) as f64,
["a", "b", "c"][(i % 3) as usize].to_string(),
)
})
.collect();
let index = get_test_index_from_values_and_terms(true, &[docs])?;
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id", "missing": "MISSING", "order": { "_key": "asc" } },
"aggs": {
"histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
}
}
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
// Column is full, so "MISSING" never applies: a, b, c are unchanged (100 docs, 5 per
// bucket).
for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
for b in 0..20usize {
assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
}
}
Ok(())
}
/// Term cardinality above the general path's `MAX_NUM_TERMS_FOR_VEC` (100) still fuses: the
/// flat grid is bounded by the total cell count (`num_terms * num_time_buckets`), not the
/// term count.
#[test]
fn fused_term_histogram_many_terms() -> crate::Result<()> {
let num_terms = 150usize;
let docs_per_term = 2usize;
// All docs share histogram value 0 (a single bucket), so the grid is 150 x 1 = 150 cells.
let docs: Vec<(f64, String)> = (0..num_terms * docs_per_term)
.map(|i| (0.0, format!("t{:03}", i % num_terms)))
.collect();
let index = get_test_index_from_values_and_terms(true, &[docs])?;
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id", "size": 1000, "order": { "_key": "asc" } },
"aggs": {
"histo": { "histogram": { "field": "score_f64", "interval": 1.0 } }
}
}
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
let buckets = res["by_term"]["buckets"].as_array().unwrap();
assert_eq!(buckets.len(), num_terms);
for (i, bucket) in buckets.iter().enumerate() {
assert_eq!(bucket["key"], format!("t{i:03}"));
assert_eq!(bucket["doc_count"], docs_per_term as u64);
assert_eq!(bucket["histo"]["buckets"][0]["key"], 0.0);
assert_eq!(
bucket["histo"]["buckets"][0]["doc_count"],
docs_per_term as u64
);
}
Ok(())
}
/// `hard_bounds` exercises the non-derived `term_counts` branch: a term's `doc_count` must
/// count *every* doc with that term, including docs whose histogram value is outside the
/// bounds (those are excluded from the histogram buckets but still counted for the term). This
/// is the case where the per-doc `term_counts` increment cannot be replaced by the grid
/// row-sum.
#[test]
fn fused_term_histogram_with_hard_bounds() -> crate::Result<()> {
// 300 docs: term = {a, b, c} by i % 3, value = i % 20. Per term: 100 docs, each value in
// 0..=19 occurring 5 times.
let docs: Vec<(f64, String)> = (0..300u64)
.map(|i| {
(
(i % 20) as f64,
["a", "b", "c"][(i % 3) as usize].to_string(),
)
})
.collect();
let index = get_test_index_from_values_and_terms(true, &[docs])?;
// hard_bounds [5, 14] (inclusive) keeps only values 5..=14 in the histogram (10 buckets);
// values 0..=4 and 15..=19 are out of bounds.
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id", "order": { "_key": "asc" } },
"aggs": {
"histo": {
"histogram": {
"field": "score_f64",
"interval": 1.0,
"hard_bounds": { "min": 5.0, "max": 14.0 }
}
}
}
}
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
// doc_count includes the 50 per-term docs whose value is outside [5, 14].
assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
for b in 0..10usize {
let key = 5 + b;
assert_eq!(histo[b]["key"], key as f64, "term {term} bucket key {key}");
assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {key}");
}
// Only the 10 in-bounds buckets exist.
assert_eq!(histo[10], serde_json::Value::Null);
}
Ok(())
}
/// Non-binding `hard_bounds` (wider than the data, with mid-interval edges) must still produce
/// exact results via the derive-from-grid path: since no doc is out of bounds, normalization
/// drops the bound, every doc lands in the dense range, and each term's total equals its
/// histogram row-sum. This is the case that previously fell back to the per-doc counter only
/// because `bounds != [MIN, MAX]`.
#[test]
fn fused_term_histogram_with_non_binding_hard_bounds() -> crate::Result<()> {
// 300 docs: term = {a, b, c} by i % 3, value = i % 20. Data values span [0, 19].
let docs: Vec<(f64, String)> = (0..300u64)
.map(|i| {
(
(i % 20) as f64,
["a", "b", "c"][(i % 3) as usize].to_string(),
)
})
.collect();
let index = get_test_index_from_values_and_terms(true, &[docs])?;
// Bounds wider than [0, 19], with mid-interval edges -> they exclude nothing.
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id", "order": { "_key": "asc" } },
"aggs": {
"histo": {
"histogram": {
"field": "score_f64",
"interval": 1.0,
"hard_bounds": { "min": -0.5, "max": 19.5 }
}
}
}
}
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
for (term_idx, term) in ["a", "b", "c"].iter().enumerate() {
assert_eq!(res["by_term"]["buckets"][term_idx]["key"], *term);
// Every doc is in-bounds, so the per-term total is the full 100 (as without bounds).
assert_eq!(res["by_term"]["buckets"][term_idx]["doc_count"], 100);
let histo = &res["by_term"]["buckets"][term_idx]["histo"]["buckets"];
for b in 0..20usize {
assert_eq!(histo[b]["key"], b as f64, "term {term} bucket {b}");
assert_eq!(histo[b]["doc_count"], 5, "term {term} bucket {b}");
}
assert_eq!(histo[20], serde_json::Value::Null);
}
Ok(())
}
/// Regression: with hard bounds the fused path allocates `term_counts` (one `u32`/term) on top
/// of the grid, and that allocation must be charged to the memory limit. With many terms and a
/// single time bucket the two are equal in size, so a limit admitting the grid alone but not
/// grid + `term_counts` must fail.
#[test]
fn fused_term_histogram_hard_bounds_charges_term_counts() -> crate::Result<()> {
// 16k distinct terms, one doc each; values alternate in/out of the single-bucket bounds
// [5, 5] so the bounds bind and `term_counts` is allocated. num_terms=16000,
// num_time_buckets=1 => `counts` and `term_counts` are ~64 KB each.
let docs: Vec<(f64, String)> = (0..16_000u64)
.map(|i| (if i % 2 == 0 { 5.0 } else { 10.0 }, format!("t{i:05}")))
.collect();
let index = get_test_index_from_values_and_terms(true, &[docs])?;
let agg_req: Aggregations = serde_json::from_value(serde_json::json!({
"by_term": {
"terms": { "field": "string_id" },
"aggs": {
"histo": {
"histogram": {
"field": "score_f64",
"interval": 1.0,
"hard_bounds": { "min": 5.0, "max": 5.0 }
}
}
}
}
}))
.unwrap();
// ~96 KB admits the grid (~64 KB) but not grid + `term_counts` (~128 KB).
let err = exec_request_with_query_and_memory_limit(
agg_req,
&index,
None,
AggregationLimitsGuard::new(Some(96_000), None),
)
.unwrap_err();
assert!(
err.to_string().contains("memory limit was exceeded"),
"expected a memory-limit error, got: {err}"
);
Ok(())
}
}

View File

@@ -138,7 +138,6 @@ impl SubAggBuffer for HighCardSubAggBuffer {
}
}
#[inline]
fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
let idx = bucket_id % NUM_PARTITIONS as u32;
let slot = &mut self.partitions[idx as usize];
@@ -197,7 +196,6 @@ impl SubAggBuffer for LowCardSubAggBuffer {
}
}
#[inline]
fn push(&mut self, bucket_id: BucketId, doc_id: DocId) {
let idx = bucket_id as usize;
if self.per_bucket_docs.len() <= idx {

View File

@@ -377,22 +377,7 @@ impl IntermediateMetricResult {
MetricResult::ExtendedStats(intermediate_stats.finalize())
}
IntermediateMetricResult::Sum(intermediate_sum) => {
// By default match Elasticsearch: empty / all-missing sum
// buckets serialize as `"value": 0`, not `"value": null`.
// The non-ES `none_if_no_match` flag on `SumAggregation`
// opts into SQL-style `null` for downstream consumers.
let none_if_no_match = req
.agg
.as_sum()
.and_then(|sum| sum.none_if_no_match)
.unwrap_or(false);
let value = intermediate_sum.finalize();
if none_if_no_match {
MetricResult::Sum(value.into())
} else {
let value = Some(value.unwrap_or(0.0));
MetricResult::Sum(value.into())
}
MetricResult::Sum(intermediate_sum.finalize().into())
}
IntermediateMetricResult::Percentiles(percentiles) => MetricResult::Percentiles(
percentiles

View File

@@ -171,7 +171,6 @@ impl CouponCache {
let uninitialized_coupon = Coupon::from_hash(0);
let mut coupon_map: Vec<Coupon> =
vec![uninitialized_coupon; highest_term_ord as usize + 1];
for (term_ord, coupon) in term_ords.into_iter().zip(coupons) {
coupon_map[term_ord as usize] = coupon;
}

View File

@@ -27,16 +27,6 @@ pub struct SumAggregation {
/// { "field": "my_numbers", "missing": "10.0" }
#[serde(default, deserialize_with = "deserialize_option_f64")]
pub missing: Option<f64>,
/// Non-Elasticsearch extension. When `Some(true)`, the serialized result
/// returns `"value": null` if no values were collected (all documents had
/// missing/NULL values for the field), matching the behavior of `min`,
/// `max`, and `avg`. When `None` or `Some(false)` (the default) the
/// result returns `"value": 0`, matching Elasticsearch.
///
/// Intended for SQL-style consumers where `SUM` of zero rows is `NULL`
/// and must be distinguishable from a bucket that genuinely sums to `0`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub none_if_no_match: Option<bool>,
}
impl SumAggregation {
@@ -45,7 +35,6 @@ impl SumAggregation {
Self {
field: field_name,
missing: None,
none_if_no_match: None,
}
}
/// Returns the field name the aggregation is computed on.
@@ -70,104 +59,8 @@ impl IntermediateSum {
pub fn merge_fruits(&mut self, other: IntermediateSum) {
self.stats.merge_fruits(other.stats);
}
/// Computes the final sum value.
///
/// Returns `None` when no values were collected, matching the Rust-side
/// behavior of `IntermediateMin`, `IntermediateMax`, and
/// `IntermediateAvg`. The Elasticsearch-vs-SQL choice for the
/// user-visible result is made at the boundary in
/// [`IntermediateMetricResult::into_final_metric_result`]: by default
/// `None` is coerced to `Some(0.0)` to match Elasticsearch
/// (`"value": 0`), and the [`SumAggregation::none_if_no_match`] flag
/// opts out of that coercion for SQL-style consumers.
/// Computes the final minimum value.
pub fn finalize(&self) -> Option<f64> {
let stats = self.stats.finalize();
if stats.count == 0 {
None
} else {
Some(stats.sum)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sum_finalize_returns_none_when_no_values() {
// Default IntermediateSum has count=0 — finalize should return None,
// matching MIN/MAX/AVG behavior for all-NULL groups.
let sum = IntermediateSum::default();
assert_eq!(sum.finalize(), None);
}
#[test]
fn test_sum_finalize_returns_value_when_has_values() {
let mut sum = IntermediateSum::default();
// Merge in a result that has actual values
let stats = IntermediateStats {
count: 3,
sum: 42.0,
min: 10.0,
max: 20.0,
..Default::default()
};
let other = IntermediateSum::from_stats(stats);
sum.merge_fruits(other);
assert_eq!(sum.finalize(), Some(42.0));
}
#[test]
fn test_sum_merge_two_empty_still_none() {
let mut a = IntermediateSum::default();
let b = IntermediateSum::default();
a.merge_fruits(b);
assert_eq!(a.finalize(), None);
}
#[test]
fn test_sum_aggregation_empty_index_default_matches_es() -> crate::Result<()> {
use serde_json::json;
use crate::aggregation::agg_req::Aggregations;
use crate::aggregation::tests::{exec_request, get_test_index_from_terms};
// Empty index — sum has no values to collect.
let values: Vec<Vec<&str>> = vec![];
let index = get_test_index_from_terms(false, &values)?;
let agg_req: Aggregations = serde_json::from_value(json!({
"score_sum": { "sum": { "field": "score" } }
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
// Default: match Elasticsearch — empty sum serializes as 0, not null.
assert_eq!(res["score_sum"]["value"], 0.0);
Ok(())
}
#[test]
fn test_sum_aggregation_empty_index_none_if_no_match_opt_in() -> crate::Result<()> {
use serde_json::json;
use crate::aggregation::agg_req::Aggregations;
use crate::aggregation::tests::{exec_request, get_test_index_from_terms};
let values: Vec<Vec<&str>> = vec![];
let index = get_test_index_from_terms(false, &values)?;
let agg_req: Aggregations = serde_json::from_value(json!({
"score_sum": { "sum": { "field": "score", "none_if_no_match": true } }
}))
.unwrap();
let res = exec_request(agg_req, &index)?;
// Opt-in non-ES extension — empty sum serializes as null.
assert!(
res["score_sum"]["value"].is_null(),
"expected null, got {:?}",
res["score_sum"]["value"]
);
Ok(())
Some(self.stats.finalize().sum)
}
}

View File

@@ -301,11 +301,14 @@ pub trait SegmentCollector: 'static {
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score);
/// The query pushes the scored document to the collector via this method.
/// The query pushes the matched documents to the collector via this method.
/// This method is used when the collector does not require scoring.
///
/// See [`COLLECT_BLOCK_BUFFER_LEN`](crate::COLLECT_BLOCK_BUFFER_LEN) for the
/// buffer size passed to the collector.
/// `docs` is a block of matched doc ids. Doc ids are produced in increasing
/// order, in windows of [`COLLECT_BLOCK_BUFFER_LEN`](crate::COLLECT_BLOCK_BUFFER_LEN),
/// but several windows are accumulated before being flushed here, so the
/// block may be larger than `COLLECT_BLOCK_BUFFER_LEN`. Implementations must
/// not assume any particular maximum length.
fn collect_block(&mut self, docs: &[DocId]) {
for doc in docs {
self.collect(*doc, 0.0);

View File

@@ -11,9 +11,14 @@ use crate::DocId;
/// to compare `[u32; 4]`.
pub const TERMINATED: DocId = i32::MAX as u32;
/// The collect_block method on `SegmentCollector` uses a buffer of this size.
/// Passed results to `collect_block` will not exceed this size and will be
/// exactly this size as long as we can fill the buffer.
/// Window size used by [`DocSet::fill_buffer`]: a single `fill_buffer` call
/// writes at most this many doc ids, and exactly this many as long as the
/// `DocSet` is not exhausted.
///
/// Note that this is *not* the maximum length of the slice passed to
/// `SegmentCollector::collect_block`: the collection loop accumulates several
/// such windows into a larger buffer before flushing it, so `collect_block`
/// may receive a block larger than `COLLECT_BLOCK_BUFFER_LEN`.
pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;
/// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`].

View File

@@ -287,33 +287,6 @@ impl BlockSegmentPostings {
doc
}
/// Returns the number of documents with a doc id strictly smaller than `target`
/// (i.e. the *rank* of `target` in this posting list).
///
/// This jumps to the block that may contain `target` through the skip list, so no
/// skipped block is decoded; a single block is then decoded to locate `target`
/// within it. The cost is therefore `O(number_of_skip_list_entries)` plus one block
/// decode, rather than `O(doc_freq)`.
///
/// Like [`Self::seek`], the underlying cursor only ever moves forward. This method
/// must be called with **non-decreasing** `target` values (galloping); calling it
/// with a `target` smaller than a previous one yields an incorrect result. `target`
/// must be a valid doc id (i.e. `target <= TERMINATED`), exactly as for `seek`.
///
/// Edge cases: returns `0` when `target` is smaller than every doc id, and
/// `doc_freq()` when `target` is larger than every doc id.
pub fn rank(&mut self, target: DocId) -> u32 {
if self.doc_freq == 0 {
return 0;
}
// `within` = number of docs in the landed block with a doc id < target.
let within = self.seek(target);
// `remaining_docs` counts the landed block and everything after it, so the
// difference is the number of docs in all blocks strictly before it.
let docs_before_block = self.doc_freq - self.skip_reader.remaining_docs();
docs_before_block + within as u32
}
pub(crate) fn position_offset(&self) -> u64 {
self.skip_reader.position_offset()
}
@@ -595,38 +568,4 @@ mod tests {
assert_eq!(block_segments.docs(), &[1, 3, 5]);
Ok(())
}
#[test]
fn test_block_segment_postings_rank() -> crate::Result<()> {
// ~8 blocks worth of docs so the skip list is actually exercised.
let docs: Vec<DocId> = (0..1000u32).map(|i| i * 3).collect();
let mut block_postings = build_block_postings(&docs[..])?;
let doc_freq = block_postings.doc_freq();
// rank(target) must equal the number of docs strictly below target.
// Targets are queried in non-decreasing order, as the API requires.
// `target` values must be a valid doc id (<= TERMINATED) and non-decreasing.
let targets = [
0u32, 1, 2, 3, 4, 299, 300, 301, 1500, 2996, 2997, 3000, 10_000,
];
for &target in &targets {
let expected = docs.iter().filter(|&&d| d < target).count() as u32;
assert_eq!(
block_postings.rank(target),
expected,
"rank({target}) mismatch"
);
}
// Edge cases: below the first doc -> 0, above the last doc -> doc_freq.
let mut fresh = build_block_postings(&docs[..])?;
assert_eq!(fresh.rank(0), 0);
let mut fresh = build_block_postings(&docs[..])?;
assert_eq!(fresh.rank(1_000_000), doc_freq);
// Empty postings: rank is always 0.
let mut empty = BlockSegmentPostings::empty();
assert_eq!(empty.rank(42), 0);
Ok(())
}
}

View File

@@ -275,9 +275,8 @@ impl Recorder for TfAndPositionRecorder {
mod tests {
use common::write_u32_vint;
use stacker::MemoryArena;
use super::{BufferLender, Recorder, TermFrequencyRecorder, VInt32Reader};
use super::{BufferLender, VInt32Reader};
#[test]
fn test_buffer_lender() {
@@ -315,98 +314,4 @@ mod tests {
let res: Vec<u32> = VInt32Reader::new(&buffer[..]).collect();
assert_eq!(&res[..], &vals[..]);
}
// ── TermFrequencyRecorder ─────────────────────────────────────────────────
#[test]
fn term_frequency_recorder_has_term_freq() {
let rec = TermFrequencyRecorder::default();
assert!(
rec.has_term_freq(),
"TermFrequencyRecorder must advertise term-frequency support"
);
}
#[test]
fn term_frequency_recorder_term_doc_freq_single_doc() {
let mut arena = MemoryArena::default();
let mut rec = TermFrequencyRecorder::default();
// Record one document with two term occurrences.
rec.new_doc(0, &mut arena);
rec.record_position(0, &mut arena);
rec.record_position(1, &mut arena);
rec.close_doc(&mut arena);
assert_eq!(
rec.term_doc_freq(),
Some(1),
"term_doc_freq should be 1 after recording one document"
);
}
#[test]
fn term_frequency_recorder_term_doc_freq_multiple_docs() {
let mut arena = MemoryArena::default();
let mut rec = TermFrequencyRecorder::default();
// Three documents with 1, 3, and 2 occurrences respectively.
for (doc, tf) in [(0u32, 1u32), (5, 3), (10, 2)] {
rec.new_doc(doc, &mut arena);
for pos in 0..tf {
rec.record_position(pos, &mut arena);
}
rec.close_doc(&mut arena);
}
assert_eq!(
rec.term_doc_freq(),
Some(3),
"term_doc_freq should equal the number of documents recorded"
);
}
#[test]
fn term_frequency_recorder_zero_docs() {
let rec = TermFrequencyRecorder::default();
assert_eq!(
rec.term_doc_freq(),
Some(0),
"term_doc_freq should be 0 before any document is recorded"
);
}
#[test]
fn term_frequency_recorder_single_occurrence_per_doc() {
let mut arena = MemoryArena::default();
let mut rec = TermFrequencyRecorder::default();
// Each document has exactly one occurrence — the minimum non-trivial case.
for doc in [1u32, 2, 100] {
rec.new_doc(doc, &mut arena);
rec.record_position(0, &mut arena);
rec.close_doc(&mut arena);
}
assert_eq!(rec.term_doc_freq(), Some(3));
}
#[test]
fn term_frequency_recorder_high_frequency_doc() {
let mut arena = MemoryArena::default();
let mut rec = TermFrequencyRecorder::default();
// A document where the term appears many times.
rec.new_doc(42, &mut arena);
for pos in 0..1000 {
rec.record_position(pos, &mut arena);
}
rec.close_doc(&mut arena);
assert_eq!(
rec.term_doc_freq(),
Some(1),
"term_doc_freq counts documents, not occurrences"
);
}
}

View File

@@ -187,12 +187,6 @@ impl SkipReader {
self.last_doc_in_block
}
/// Number of docs from the start of the current block to the end of the postings
/// (i.e. the current block plus every block after it).
pub(crate) fn remaining_docs(&self) -> u32 {
self.remaining_docs
}
pub fn position_offset(&self) -> u64 {
self.position_offset
}

View File

@@ -1,6 +1,5 @@
use std::collections::HashMap;
use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
use crate::index::SegmentReader;
use crate::postings::FreqReadingOption;
use crate::query::disjunction::Disjunction;
@@ -531,13 +530,12 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
) -> crate::Result<()> {
let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
let num_docs = reader.num_docs();
let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
match scorer {
SpecializedScorer::TermUnion(term_scorers) => {
let mut union_scorer =
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
for_each_docset_buffered(&mut union_scorer, callback);
}
SpecializedScorer::TermIntersection(term_scorers) => {
let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
@@ -545,10 +543,10 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
.map(|term_scorer| Box::new(term_scorer) as Box<dyn Scorer>)
.collect();
let mut intersection = intersect_scorers(boxed_scorers, num_docs);
for_each_docset_buffered(intersection.as_mut(), &mut buffer, callback);
for_each_docset_buffered(intersection.as_mut(), callback);
}
SpecializedScorer::Other(mut scorer) => {
for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
for_each_docset_buffered(scorer.as_mut(), callback);
}
}
Ok(())

View File

@@ -1,6 +1,6 @@
use std::fmt;
use crate::docset::{SeekDangerResult, COLLECT_BLOCK_BUFFER_LEN};
use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};
@@ -119,10 +119,6 @@ impl<TDocSet: DocSet> DocSet for ConstScorer<TDocSet> {
self.docset.seek(target)
}
fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
self.docset.seek_danger(target)
}
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
self.docset.fill_buffer(buffer)
}

View File

@@ -3,7 +3,6 @@ use std::ops::RangeInclusive;
use columnar::Column;
use crate::docset::SeekDangerResult;
use crate::{DocId, DocSet, TERMINATED};
/// Helper to have a cursor over a vec of docids
@@ -185,37 +184,6 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
doc
}
/// `seek_danger` only needs to answer whether `target` itself matches, so it does a cheap
/// point lookup on the column instead of scanning forward to materialize the next match (the
/// expensive part of a regular `seek`).
fn seek_danger(&mut self, target: DocId) -> SeekDangerResult {
// Covers `target == TERMINATED` and any target past the last doc: no match is possible.
if target >= self.column.num_docs() {
return SeekDangerResult::SeekLowerBound(TERMINATED);
}
if self.is_last_seek_distance_large(target) {
self.reset_fetch_range();
}
self.last_seek_pos_opt = Some(target);
let is_match = self
.column
.values_for_doc(target)
.any(|value| self.value_range.contains(&value));
if is_match {
// Leave the docset in a valid state positioned on `target`, so `doc()` returns it and a
// following `advance()` resumes the scan right after it.
self.loaded_docs.get_cleared_data().push(target);
self.next_fetch_start = target + 1;
SeekDangerResult::Found
} else {
// `target` is not in the docset. The next match is strictly greater than `target`, so
// `target + 1` is a valid lower bound. We may leave the docset in an invalid state.
SeekDangerResult::SeekLowerBound(target + 1)
}
}
fn size_hint(&self) -> u32 {
// TODO: Implement a better size hint
self.column.num_docs() / 10
@@ -241,148 +209,12 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe
#[cfg(test)]
mod tests {
use std::ops::{Bound, RangeInclusive};
use std::ops::Bound;
use columnar::Column;
use super::RangeDocSet;
use crate::collector::Count;
use crate::directory::RamDirectory;
use crate::docset::{SeekDangerResult, TERMINATED};
use crate::query::RangeQuery;
use crate::{schema, DocSet, Index, IndexBuilder, TantivyDocument, Term};
/// Builds a single-segment index where doc `i` carries `values_for_doc(i)` in a u64 fast
/// field, then returns its column so we can drive a `RangeDocSet` directly.
fn build_u64_column(
num_docs: usize,
values_for_doc: impl Fn(usize) -> Vec<u64>,
) -> Column<u64> {
let mut schema_builder = schema::SchemaBuilder::new();
let value_field = schema_builder.add_u64_field("value", schema::FAST);
let index = Index::create_in_ram(schema_builder.build());
{
let mut writer = index.writer_for_tests().unwrap();
for i in 0..num_docs {
let mut doc = TantivyDocument::new();
for v in values_for_doc(i) {
doc.add_u64(value_field, v);
}
writer.add_document(doc).unwrap();
}
writer.commit().unwrap();
}
let searcher = index.reader().unwrap().searcher();
assert_eq!(searcher.segment_readers().len(), 1);
searcher
.segment_reader(0)
.fast_fields()
.u64("value")
.unwrap()
}
fn range_docset(
value_range: RangeInclusive<u64>,
num_docs: usize,
values_for_doc: impl Fn(usize) -> Vec<u64>,
) -> RangeDocSet<u64> {
RangeDocSet::new(value_range, build_u64_column(num_docs, values_for_doc))
}
#[test]
fn seek_danger_found_leaves_valid_state() {
// Even docs match the range, odd docs do not.
let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
// Matching target: `Found`, and the docset is positioned exactly on it.
assert_eq!(docset.seek_danger(10), SeekDangerResult::Found);
assert_eq!(docset.doc(), 10);
// A following advance resumes the scan right after the found doc.
assert_eq!(docset.advance(), 12);
assert_eq!(docset.doc(), 12);
}
#[test]
fn seek_danger_miss_returns_lower_bound() {
let mut docset = range_docset(0..=0, 100, |i| vec![(i % 2) as u64]);
// Odd target does not match: lower bound is strictly greater than the target and never
// skips past the next real match (here doc 12, the first even doc after 11).
match docset.seek_danger(11) {
SeekDangerResult::SeekLowerBound(lower_bound) => {
assert!(lower_bound > 11);
assert!(lower_bound <= 12);
}
SeekDangerResult::Found => panic!("11 should not match"),
}
// After a miss we may be in an invalid state; another seek_danger recovers it.
assert_eq!(docset.seek_danger(12), SeekDangerResult::Found);
assert_eq!(docset.doc(), 12);
}
#[test]
fn seek_danger_terminated_and_out_of_bounds() {
let mut docset = range_docset(0..=0, 10, |i| vec![(i % 2) as u64]);
assert_eq!(
docset.seek_danger(TERMINATED),
SeekDangerResult::SeekLowerBound(TERMINATED)
);
// A target past the last doc has no possible match either.
assert_eq!(
docset.seek_danger(10),
SeekDangerResult::SeekLowerBound(TERMINATED)
);
}
#[test]
fn seek_danger_multivalued() {
// Doc `i` holds values [i, i+1]; the range {5} matches docs 4 and 5.
let mut docset = range_docset(5..=5, 20, |i| vec![i as u64, i as u64 + 1]);
assert_eq!(docset.seek_danger(4), SeekDangerResult::Found);
assert_eq!(docset.doc(), 4);
assert_eq!(docset.advance(), 5);
// No further match after doc 5.
assert_eq!(docset.advance(), TERMINATED);
}
#[test]
fn seek_danger_matches_seek() {
// Cross-check seek_danger against the true next match for every target, on a column with a
// few sparse matches.
let matches = [3u32, 7, 50, 51, 99];
let num_docs = 100;
let values_for_doc = |i: usize| {
vec![if matches.contains(&(i as u32)) {
1u64
} else {
0u64
}]
};
for target in 0..num_docs as u32 {
// The first matching doc greater than or equal to `target`, i.e. what `seek` returns.
let expected = matches
.iter()
.copied()
.find(|&m| m >= target)
.unwrap_or(TERMINATED);
let mut danger = range_docset(1..=1, num_docs, values_for_doc);
match danger.seek_danger(target) {
SeekDangerResult::Found => {
assert_eq!(expected, target, "target {target} reported Found");
assert_eq!(danger.doc(), target);
}
SeekDangerResult::SeekLowerBound(lower_bound) => {
assert_ne!(expected, target, "target {target} should have been Found");
assert!(lower_bound > target);
// The lower bound must never skip past the true next match.
assert!(lower_bound <= expected);
}
}
}
}
use crate::{schema, IndexBuilder, TantivyDocument, Term};
#[test]
fn range_query_fast_optional_field_minimum() {

View File

@@ -1,5 +1,5 @@
use super::term_scorer::TermScorer;
use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN};
use crate::docset::DocSet;
use crate::fieldnorm::FieldNormReader;
use crate::index::SegmentReader;
use crate::postings::SegmentPostings;
@@ -92,13 +92,11 @@ impl Weight for TermWeight {
) -> crate::Result<()> {
match self.specialized_scorer(reader, 1.0)? {
TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => {
let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
for_each_docset_buffered(&mut term_scorer, &mut buffer, callback);
for_each_docset_buffered(&mut term_scorer, callback);
}
TermOrEmptyOrAllScorer::Empty => {}
TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => {
let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
for_each_docset_buffered(&mut all_scorer, &mut buffer, callback);
for_each_docset_buffered(&mut all_scorer, callback);
}
};

View File

@@ -17,18 +17,56 @@ pub(crate) fn for_each_scorer<TScorer: Scorer + ?Sized>(
}
}
/// Iterates through all of the documents matched by the DocSet
/// `DocSet`.
/// Number of `COLLECT_BLOCK_BUFFER_LEN`-sized windows accumulated into the large
/// buffer before it is flushed to the collector via `collect_block`.
const NUM_WINDOWS_PER_BLOCK: usize = 32;
/// Size of the buffer accumulated before invoking the callback (2_048 = 32 * 64).
/// `fill_buffer` keeps writing `COLLECT_BLOCK_BUFFER_LEN`-sized windows; this only
/// changes how much we accumulate before flushing.
const LARGE_COLLECT_BUFFER_LEN: usize = COLLECT_BLOCK_BUFFER_LEN * NUM_WINDOWS_PER_BLOCK;
/// Iterates through all of the documents matched by the `DocSet`, flushing
/// blocks of up to `LARGE_COLLECT_BUFFER_LEN` doc ids to `callback`.
///
/// `fill_buffer` only ever writes `COLLECT_BLOCK_BUFFER_LEN` doc ids at a time,
/// so we accumulate several such windows into a single larger buffer before
/// handing it to the collector. This amortizes the per-`collect_block` overhead
/// (virtual dispatch, aggregation setup) over more documents.
#[inline]
pub(crate) fn for_each_docset_buffered<T: DocSet + ?Sized>(
docset: &mut T,
buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN],
mut callback: impl FnMut(&[DocId]),
) {
// Heap-allocated once per call (i.e. once per segment in the no-score path).
// `new_zeroed_slice` zeroes directly on the heap, avoiding a 2_048-element
// stack temporary.
// SAFETY: an all-zero bit pattern is a valid value for every `DocId` (u32),
// so the zeroed slice is fully initialized.
let mut buffer: Box<[DocId]> =
unsafe { Box::new_zeroed_slice(LARGE_COLLECT_BUFFER_LEN).assume_init() };
loop {
let num_items = docset.fill_buffer(buffer);
callback(&buffer[..num_items]);
if num_items != buffer.len() {
let mut filled = 0;
let mut reached_end = false;
// Fill the large buffer one `COLLECT_BLOCK_BUFFER_LEN` window at a time.
// `chunks_exact_mut` yields windows of exactly `COLLECT_BLOCK_BUFFER_LEN`
// because `LARGE_COLLECT_BUFFER_LEN` is a multiple of it (empty remainder).
// The windows are contiguous and filled in order, so the doc ids always
// occupy the contiguous prefix `buffer[..filled]`.
for window in buffer.chunks_exact_mut(COLLECT_BLOCK_BUFFER_LEN) {
// SAFETY: each `window` is a slice of exactly `COLLECT_BLOCK_BUFFER_LEN`
// elements, so reinterpreting its start pointer as a fixed-size array
// reference of that length is valid.
let window: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN] =
unsafe { &mut *window.as_mut_ptr().cast::<[DocId; COLLECT_BLOCK_BUFFER_LEN]>() };
let num_items = docset.fill_buffer(window);
filled += num_items;
if num_items != COLLECT_BLOCK_BUFFER_LEN {
reached_end = true;
break;
}
}
callback(&buffer[..filled]);
if reached_end {
break;
}
}
@@ -104,9 +142,7 @@ pub trait Weight: Send + Sync + 'static {
callback: &mut dyn FnMut(&[DocId]),
) -> crate::Result<()> {
let mut docset = self.scorer(reader, 1.0)?;
let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
for_each_docset_buffered(&mut docset, &mut buffer, callback);
for_each_docset_buffered(&mut docset, callback);
Ok(())
}

View File

@@ -8,7 +8,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
description = "term hashmap used for indexing"
[dependencies]
murmurhash32 = "0.4"
murmurhash32 = "0.3"
common = { version = "0.11", path = "../common/", package = "tantivy-common" }
ahash = { version = "0.8.11", default-features = false, optional = true }