mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-22 10:20:43 +00:00
Compare commits
4 Commits
mallets/so
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
195e69dac4 | ||
|
|
1e859fd78d | ||
|
|
f451fa938f | ||
|
|
2a82dd6f64 |
2
.github/workflows/coverage.yml
vendored
2
.github/workflows/coverage.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
||||
- name: Install Rust
|
||||
run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
|
||||
- uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
|
||||
|
||||
2
.github/workflows/long_running.yml
vendored
2
.github/workflows/long_running.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
||||
- name: Install stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
with:
|
||||
|
||||
2
.github/workflows/scorecard.yml
vendored
2
.github/workflows/scorecard.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: 'Checkout code'
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
checks: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
||||
|
||||
- name: Install nightly
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
name: test-${{ matrix.features.label}}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
||||
|
||||
- name: Install stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
|
||||
|
||||
@@ -275,7 +275,7 @@ impl SegmentCompositeCollector {
|
||||
dict.insert(
|
||||
key,
|
||||
IntermediateCompositeBucketEntry {
|
||||
doc_count: agg.count,
|
||||
doc_count: agg.count as u64,
|
||||
sub_aggregation: sub_aggregation_res,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -957,7 +957,7 @@ fn into_intermediate_bucket_entry(
|
||||
)?;
|
||||
}
|
||||
Ok(IntermediateTermBucketEntry {
|
||||
doc_count: bucket.count,
|
||||
doc_count: bucket.count as u64,
|
||||
sub_aggregation: sub_aggregation_res,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ impl SegmentAggregationCollector for TermMissingAgg {
|
||||
|
||||
let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
|
||||
let mut missing_entry = IntermediateTermBucketEntry {
|
||||
doc_count: missing_count.missing_count,
|
||||
doc_count: missing_count.missing_count as u64,
|
||||
sub_aggregation: Default::default(),
|
||||
};
|
||||
if let Some(sub_agg) = &mut self.sub_agg {
|
||||
|
||||
@@ -930,7 +930,7 @@ impl IntermediateRangeBucketEntry {
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct IntermediateTermBucketEntry {
|
||||
/// The number of documents in the bucket.
|
||||
pub doc_count: u32,
|
||||
pub doc_count: u64,
|
||||
/// The sub_aggregation in this bucket.
|
||||
pub sub_aggregation: IntermediateAggregationResults,
|
||||
}
|
||||
@@ -1240,6 +1240,24 @@ mod tests {
|
||||
assert_eq!(tree_left, tree_expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_term_bucket_doc_count_no_u32_overflow() {
|
||||
// Two segments each contributing (u32::MAX - 100) docs to the same term. Summing them
|
||||
// overflowed when doc_count was u32.
|
||||
let per_segment = u32::MAX as u64 - 100;
|
||||
let mut entry = IntermediateTermBucketEntry {
|
||||
doc_count: per_segment,
|
||||
sub_aggregation: Default::default(),
|
||||
};
|
||||
entry
|
||||
.merge_fruits(IntermediateTermBucketEntry {
|
||||
doc_count: per_segment,
|
||||
sub_aggregation: Default::default(),
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(entry.doc_count, per_segment * 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_fruits_tree_empty() {
|
||||
let mut tree_left = get_intermediate_tree_with_ranges(&[
|
||||
|
||||
@@ -273,8 +273,14 @@ mod tests {
|
||||
}
|
||||
|
||||
if all_match {
|
||||
let score: Score =
|
||||
leader.score() + secondaries.iter_mut().map(|s| s.score()).sum::<Score>();
|
||||
// Accumulate in the same left-to-right order as the WAND implementation
|
||||
// (leader first, then each secondary in turn). Float addition is not
|
||||
// associative, so `leader + secondaries.sum()` gives a different bit
|
||||
// pattern and can cause spurious nearly_equals failures.
|
||||
let mut score: Score = leader.score();
|
||||
for secondary in secondaries.iter_mut() {
|
||||
score += secondary.score();
|
||||
}
|
||||
|
||||
if score > limit {
|
||||
heap.push(Float(score));
|
||||
@@ -417,6 +423,198 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_wand_intersection_three_scorers_regression() {
|
||||
// Minimal failing case found by proptest (CI run 27557430583, job 81460063906).
|
||||
// Posting list 0 spans docs 0–63 (all present, doc 8 has tf=80, doc 26 tf=4, rest tf=1).
|
||||
// Posting lists 1 and 2 are sparse with varying term freqs, and doc 16/64 appear only
|
||||
// in lists 1/2 but not list 0. The high tf=80 on doc 8 of list 0 makes the WAND
|
||||
// upper-bound estimation skip documents that the naive intersection would score.
|
||||
let posting_lists: &[&[(DocId, u32)]] = &[
|
||||
&[
|
||||
(0, 1),
|
||||
(1, 1),
|
||||
(2, 1),
|
||||
(3, 1),
|
||||
(4, 1),
|
||||
(5, 1),
|
||||
(6, 1),
|
||||
(7, 1),
|
||||
(8, 80),
|
||||
(9, 1),
|
||||
(10, 1),
|
||||
(11, 1),
|
||||
(12, 1),
|
||||
(13, 1),
|
||||
(14, 1),
|
||||
(15, 1),
|
||||
(17, 1),
|
||||
(18, 1),
|
||||
(19, 1),
|
||||
(20, 1),
|
||||
(21, 1),
|
||||
(22, 1),
|
||||
(23, 1),
|
||||
(24, 1),
|
||||
(25, 1),
|
||||
(26, 4),
|
||||
(27, 1),
|
||||
(28, 1),
|
||||
(29, 1),
|
||||
(30, 1),
|
||||
(31, 1),
|
||||
(32, 1),
|
||||
(33, 1),
|
||||
(34, 1),
|
||||
(35, 1),
|
||||
(36, 1),
|
||||
(37, 1),
|
||||
(38, 1),
|
||||
(39, 1),
|
||||
(40, 1),
|
||||
(41, 1),
|
||||
(42, 1),
|
||||
(43, 1),
|
||||
(44, 1),
|
||||
(45, 1),
|
||||
(46, 1),
|
||||
(47, 1),
|
||||
(48, 1),
|
||||
(49, 1),
|
||||
(50, 1),
|
||||
(51, 1),
|
||||
(52, 1),
|
||||
(53, 1),
|
||||
(54, 1),
|
||||
(55, 1),
|
||||
(56, 1),
|
||||
(57, 1),
|
||||
(58, 1),
|
||||
(59, 1),
|
||||
(60, 1),
|
||||
(61, 1),
|
||||
(62, 1),
|
||||
(63, 1),
|
||||
],
|
||||
&[
|
||||
(0, 2),
|
||||
(3, 98),
|
||||
(7, 93),
|
||||
(8, 87),
|
||||
(9, 39),
|
||||
(10, 2),
|
||||
(12, 71),
|
||||
(14, 47),
|
||||
(15, 76),
|
||||
(16, 6),
|
||||
(17, 38),
|
||||
(19, 61),
|
||||
(20, 87),
|
||||
(21, 1),
|
||||
(22, 5),
|
||||
(23, 43),
|
||||
(25, 48),
|
||||
(26, 87),
|
||||
(28, 81),
|
||||
(29, 69),
|
||||
(30, 7),
|
||||
(31, 47),
|
||||
(32, 32),
|
||||
(33, 38),
|
||||
(35, 39),
|
||||
(38, 65),
|
||||
(39, 98),
|
||||
(42, 43),
|
||||
(43, 52),
|
||||
(44, 99),
|
||||
(45, 88),
|
||||
(48, 24),
|
||||
(51, 61),
|
||||
(52, 22),
|
||||
(53, 58),
|
||||
(55, 26),
|
||||
(56, 32),
|
||||
(58, 57),
|
||||
(60, 29),
|
||||
(61, 78),
|
||||
(62, 9),
|
||||
(63, 44),
|
||||
(64, 29),
|
||||
],
|
||||
&[
|
||||
(0, 94),
|
||||
(2, 49),
|
||||
(3, 63),
|
||||
(4, 7),
|
||||
(6, 93),
|
||||
(7, 17),
|
||||
(8, 91),
|
||||
(9, 18),
|
||||
(10, 85),
|
||||
(11, 11),
|
||||
(12, 45),
|
||||
(13, 42),
|
||||
(15, 91),
|
||||
(16, 44),
|
||||
(17, 36),
|
||||
(18, 68),
|
||||
(19, 24),
|
||||
(20, 17),
|
||||
(21, 59),
|
||||
(22, 97),
|
||||
(24, 20),
|
||||
(25, 7),
|
||||
(26, 85),
|
||||
(27, 69),
|
||||
(28, 78),
|
||||
(29, 84),
|
||||
(30, 35),
|
||||
(31, 49),
|
||||
(33, 83),
|
||||
(34, 97),
|
||||
(35, 29),
|
||||
(36, 43),
|
||||
(37, 59),
|
||||
(38, 79),
|
||||
(39, 74),
|
||||
(40, 21),
|
||||
(41, 5),
|
||||
(42, 47),
|
||||
(43, 27),
|
||||
(44, 59),
|
||||
(45, 97),
|
||||
(46, 91),
|
||||
(47, 81),
|
||||
(48, 57),
|
||||
(49, 47),
|
||||
(50, 64),
|
||||
(51, 86),
|
||||
(52, 60),
|
||||
(53, 52),
|
||||
(54, 14),
|
||||
(55, 23),
|
||||
(56, 64),
|
||||
(57, 40),
|
||||
(58, 5),
|
||||
(59, 30),
|
||||
(60, 81),
|
||||
(61, 62),
|
||||
(62, 39),
|
||||
(63, 93),
|
||||
(64, 82),
|
||||
],
|
||||
];
|
||||
let fieldnorms: &[u32] = &[
|
||||
624, 668, 725, 670, 851, 169, 537, 627, 200, 757, 51, 272, 835, 89, 750, 63, 272, 406,
|
||||
394, 390, 822, 449, 257, 571, 527, 855, 4, 98, 548, 413, 539, 351, 596, 151, 728, 152,
|
||||
766, 829, 20, 828, 477, 251, 743, 646, 136, 477, 909, 907, 266, 341, 676, 161, 40, 384,
|
||||
347, 707, 42, 397, 482, 814, 801, 528, 465, 410, 171,
|
||||
];
|
||||
let posting_lists_owned: Vec<Vec<(DocId, u32)>> =
|
||||
posting_lists.iter().map(|pl| pl.to_vec()).collect();
|
||||
test_block_wand_intersection_aux(&posting_lists_owned, fieldnorms);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_block_wand_intersection_disjoint() {
|
||||
// Two posting lists with no overlap — intersection is empty.
|
||||
|
||||
Reference in New Issue
Block a user