Compare commits

..

4 Commits

Author SHA1 Message Date
Ming
384f31d350 feat: Restore index sorting (#2959)
We ([ParadeDB](https://github.com/paradedb/paradedb)) have restored and been using the removed [index sorting](https://github.com/quickwit-oss/tantivy/issues/2352) feature in our Tantivy fork.

Our use case is sorting the index by Postgres' internal `ctid` identifier. Results returned from Tantivy must be checked against Postgres' visibility map, and checking them in ctid order is much more cache friendly, resulting in up to 80% speedups for certain queries.

This PR is split into 5 commits, corresponding to the index sorting reversal plus bug fixes we uncovered during our usage of index sorting.

| Commit | Maps to | What it does |
|---|---|---|
| `2aea0ad9f` | foundation ([#104](https://github.com/paradedb/tantivy/pull/104)) | Restore `SegmentComponent::TempStore` (revert of upstream #2815). Subsumes fork PR [#104](https://github.com/paradedb/tantivy/pull/104)'s CI fix. |
| `9205bcb0c` | [#92](https://github.com/paradedb/tantivy/pull/92) | Restore sort-by-field (single-segment + merge paths). |
| `39c790f0f` | [#101](https://github.com/paradedb/tantivy/pull/101) | Enable `sort_by` for `Str`/`Bytes` fast fields. |
| `9c4341a87` | [#105](https://github.com/paradedb/tantivy/pull/105) | Native typed numeric sort-key comparison (precision/NULL fix). |
| `2d9ba2418` | [#106](https://github.com/paradedb/tantivy/pull/106) | Preserve NULL ordering in numeric segment merges. |

We have discussed with the Tantivy maintainers and they indicated they would be open to this PR. Another motivation for landing this PR is we are planning on contributing a significant refactor that makes Tantivy's segment components extensible, and landing that without index sorting leads to too many conflicts.
2026-06-22 11:22:25 -07:00
Pascal Seitz
1e859fd78d fix term aggregation u32::MAX overflow issue 2026-06-18 17:07:43 +08:00
Pascal Seitz
f451fa938f explain why naive scorer must accumulate scores in WAND order 2026-06-17 18:58:58 +08:00
Pascal Seitz
2a82dd6f64 fix flaky test 2026-06-17 18:58:58 +08:00
12 changed files with 324 additions and 202 deletions

View File

@@ -54,6 +54,6 @@ pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_name: &str)
}
let mut wrt: Vec<u8> = Vec::new();
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
columnar_writer.serialize(num_docs, &mut wrt).unwrap();
ColumnarReader::open(wrt).unwrap()
}

View File

@@ -275,7 +275,7 @@ impl SegmentCompositeCollector {
dict.insert(
key,
IntermediateCompositeBucketEntry {
doc_count: agg.count,
doc_count: agg.count as u64,
sub_aggregation: sub_aggregation_res,
},
);

View File

@@ -957,7 +957,7 @@ fn into_intermediate_bucket_entry(
)?;
}
Ok(IntermediateTermBucketEntry {
doc_count: bucket.count,
doc_count: bucket.count as u64,
sub_aggregation: sub_aggregation_res,
})
}

View File

@@ -98,7 +98,7 @@ impl SegmentAggregationCollector for TermMissingAgg {
let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
let mut missing_entry = IntermediateTermBucketEntry {
doc_count: missing_count.missing_count,
doc_count: missing_count.missing_count as u64,
sub_aggregation: Default::default(),
};
if let Some(sub_agg) = &mut self.sub_agg {

View File

@@ -930,7 +930,7 @@ impl IntermediateRangeBucketEntry {
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateTermBucketEntry {
/// The number of documents in the bucket.
pub doc_count: u32,
pub doc_count: u64,
/// The sub_aggregation in this bucket.
pub sub_aggregation: IntermediateAggregationResults,
}
@@ -1240,6 +1240,24 @@ mod tests {
assert_eq!(tree_left, tree_expected);
}
#[test]
fn test_term_bucket_doc_count_no_u32_overflow() {
// Two segments each contributing (u32::MAX - 100) docs to the same term. Summing them
// overflowed when doc_count was u32.
let per_segment = u32::MAX as u64 - 100;
let mut entry = IntermediateTermBucketEntry {
doc_count: per_segment,
sub_aggregation: Default::default(),
};
entry
.merge_fruits(IntermediateTermBucketEntry {
doc_count: per_segment,
sub_aggregation: Default::default(),
})
.unwrap();
assert_eq!(entry.doc_count, per_segment * 2);
}
#[test]
fn test_merge_fruits_tree_empty() {
let mut tree_left = get_intermediate_tree_with_ranges(&[

View File

@@ -7,24 +7,16 @@ use super::SegmentWriter;
use crate::schema::{Field, Schema};
use crate::{DocAddress, DocId, IndexSortByField, TantivyError};
/// Describes how the document ID mapping was produced during a merge.
#[derive(Copy, Clone, Eq, PartialEq)]
pub enum MappingType {
/// Segments are concatenated in order with no deletes; doc IDs are contiguous ranges.
Stacked,
/// Segments are concatenated in order but some documents have been deleted and are skipped.
StackedWithDeletes,
/// Documents have been reordered (e.g. sorted by a field or externally shuffled).
Shuffled,
}
/// Struct to provide mapping from new doc_id to old doc_id and segment.
///
/// Callers outside tantivy (e.g. pomsky's merge executor) can construct a
/// `Shuffled` mapping directly from a precomputed permutation and pass it
/// into [`IndexMerger::write_with_doc_id_mapping`].
#[derive(Clone)]
pub struct SegmentDocIdMapping {
pub(crate) struct SegmentDocIdMapping {
pub(crate) new_doc_id_to_old_doc_addr: Vec<DocAddress>,
pub(crate) alive_bitsets: Vec<Option<ReadOnlyBitSet>>,
mapping_type: MappingType,
@@ -43,25 +35,6 @@ impl SegmentDocIdMapping {
}
}
/// Build a `Shuffled` mapping from an explicit permutation of [`DocAddress`]es.
///
/// `new_doc_id_to_old_doc_addr[new_id]` gives the source segment and doc id for
/// the document that should appear at position `new_id` in the merged segment.
/// `alive_bitsets` must contain one entry per source segment (in the same order
/// as passed to [`IndexMerger::open_with_custom_alive_set`]), each `None` if that
/// segment has no deletes.
pub fn new_shuffled(
new_doc_id_to_old_doc_addr: Vec<DocAddress>,
alive_bitsets: Vec<Option<ReadOnlyBitSet>>,
) -> Self {
Self {
new_doc_id_to_old_doc_addr,
mapping_type: MappingType::Shuffled,
alive_bitsets,
}
}
/// Returns the [`MappingType`] that describes how this mapping was constructed.
pub fn mapping_type(&self) -> MappingType {
self.mapping_type
}
@@ -91,14 +64,13 @@ impl SegmentDocIdMapping {
}
}
/// Bidirectional mapping between old and new doc IDs within a single segment.
/// Struct to provide mapping from old doc_id to new doc_id and vice versa within a segment.
pub struct DocIdMapping {
new_doc_id_to_old: Vec<DocId>,
old_doc_id_to_new: Vec<DocId>,
}
impl DocIdMapping {
/// Constructs a [`DocIdMapping`] from a vector mapping each new doc ID to its old doc ID.
pub fn from_new_id_to_old_id(new_doc_id_to_old: Vec<DocId>) -> Self {
let max_doc = new_doc_id_to_old.len();
let old_max_doc = new_doc_id_to_old
@@ -130,7 +102,6 @@ impl DocIdMapping {
self.new_doc_id_to_old.iter().cloned()
}
/// Returns a slice mapping each old doc ID to its corresponding new doc ID.
pub fn old_to_new_ids(&self) -> &[DocId] {
&self.old_doc_id_to_new[..]
}
@@ -142,11 +113,9 @@ impl DocIdMapping {
.map(|old_doc| els[*old_doc as usize])
.collect()
}
/// Returns the number of new (post-sort) doc IDs in this mapping.
pub fn num_new_doc_ids(&self) -> usize {
self.new_doc_id_to_old.len()
}
/// Returns the number of old (pre-sort) doc IDs covered by this mapping.
pub fn num_old_doc_ids(&self) -> usize {
self.old_doc_id_to_new.len()
}

View File

@@ -113,7 +113,6 @@ fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::
Ok(total_num_tokens)
}
/// Merges multiple index segments into a single new segment.
pub struct IndexMerger {
index_settings: IndexSettings,
schema: Schema,
@@ -219,7 +218,6 @@ impl IndexMerger {
.any(|doc_id| col.first(doc_id).is_none())
}
/// Opens an [`IndexMerger`] over the given segments using their existing delete sets.
pub fn open(
schema: Schema,
index_settings: IndexSettings,
@@ -229,14 +227,18 @@ impl IndexMerger {
Self::open_with_custom_alive_set(schema, index_settings, segments, alive_bitset)
}
/// Opens an [`IndexMerger`] with a custom alive (delete) set per segment.
///
/// For every segment, an optional [`AliveBitSet`] can be provided which is intersected
/// with the segment's existing alive set. Pass `None` for a segment to use its existing
/// delete set unchanged.
///
/// This allows merging while also applying an additional filter, for example to demux
/// documents by a field value into separate output segments.
// Create merge with a custom delete set.
// For every Segment, a delete bitset can be provided, which
// will be merged with the existing bit set. Make sure the index
// corresponds to the segment index.
//
// If `None` is provided for custom alive set, the regular alive set will be used.
// If a alive_bitset is provided, the union between the provided and regular
// alive set will be used.
//
// This can be used to merge but also apply an additional filter.
// One use case is demux, which is basically taking a list of
// segments and partitions them e.g. by a value in a field.
pub fn open_with_custom_alive_set(
schema: Schema,
index_settings: IndexSettings,
@@ -945,7 +947,7 @@ impl IndexMerger {
///
/// # Returns
/// The number of documents in the resulting segment.
pub fn write(&self, serializer: SegmentSerializer) -> crate::Result<u32> {
pub fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
let doc_id_mapping = if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref()
{
if self.is_disjunct_and_sorted_on_sort_property(sort_by_field)? {
@@ -956,31 +958,6 @@ impl IndexMerger {
} else {
self.get_doc_id_from_concatenated_data()?
};
self.write_with_mapping(serializer, doc_id_mapping)
}
/// Like [`write`], but uses the caller-supplied `doc_id_mapping` instead of
/// deriving one from an index sort field.
///
/// The mapping must cover *all* live documents across every segment passed to
/// [`IndexMerger::open_with_custom_alive_set`]. The simplest way to build one
/// is [`SegmentDocIdMapping::new_shuffled`].
///
/// # Returns
/// The number of documents in the resulting segment.
pub fn write_with_doc_id_mapping(
&self,
serializer: SegmentSerializer,
doc_id_mapping: SegmentDocIdMapping,
) -> crate::Result<u32> {
self.write_with_mapping(serializer, doc_id_mapping)
}
fn write_with_mapping(
&self,
mut serializer: SegmentSerializer,
doc_id_mapping: SegmentDocIdMapping,
) -> crate::Result<u32> {
debug!("write-fieldnorms");
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
self.write_fieldnorms(fieldnorms_serializer, &doc_id_mapping)?;

View File

@@ -8,7 +8,7 @@
pub(crate) mod delete_queue;
pub(crate) mod path_to_unordered_id;
pub mod doc_id_mapping;
pub(crate) mod doc_id_mapping;
mod doc_opstamp_mapping;
mod flat_map_with_buffer;
pub(crate) mod index_writer;
@@ -17,8 +17,7 @@ pub(crate) mod indexing_term;
mod log_merge_policy;
mod merge_operation;
pub(crate) mod merge_policy;
/// Segment merger: combines multiple segments into one.
pub mod merger;
pub(crate) mod merger;
mod merger_sorted_index_test;
pub(crate) mod operation;
pub(crate) mod prepared_commit;
@@ -34,19 +33,15 @@ mod stamper;
use crossbeam_channel as channel;
use smallvec::SmallVec;
pub use self::doc_id_mapping::SegmentDocIdMapping;
pub use self::index_writer::{advance_deletes, IndexWriter, IndexWriterOptions};
pub use self::log_merge_policy::LogMergePolicy;
pub use self::merge_operation::MergeOperation;
pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
pub use self::merger::IndexMerger;
pub use self::operation::{AddOperation, DeleteOperation, UserOperation};
pub use self::prepared_commit::PreparedCommit;
pub use self::segment_entry::SegmentEntry;
pub(crate) use self::segment_serializer::SegmentSerializer;
pub use self::segment_updater::{
merge_filtered_segments, merge_indices, merge_segments_with_doc_id_mapping,
};
pub use self::segment_updater::{merge_filtered_segments, merge_indices};
pub use self::segment_writer::SegmentWriter;
pub use self::single_segment_index_writer::SingleSegmentIndexWriter;

View File

@@ -15,7 +15,6 @@ use crate::directory::{Directory, DirectoryClone, GarbageCollectionResult};
use crate::fastfield::AliveBitSet;
use crate::index::{Index, IndexMeta, IndexSettings, Segment, SegmentId, SegmentMeta};
use crate::indexer::delete_queue::DeleteCursor;
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
use crate::indexer::index_writer::advance_deletes;
use crate::indexer::merge_operation::MergeOperationInventory;
use crate::indexer::merger::IndexMerger;
@@ -256,82 +255,6 @@ pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
Ok(merged_index)
}
/// Like [`merge_filtered_segments`], but uses a caller-supplied [`SegmentDocIdMapping`]
/// to control the final document order. The mapping should be built from the same
/// segments (in the same order) passed here.
///
/// Use this to apply an external reordering during a merge without relying on a persistent fast field.
///
/// # Warning
/// Same caveats as [`merge_filtered_segments`]: no live `IndexWriter` allowed.
#[doc(hidden)]
pub fn merge_segments_with_doc_id_mapping<T: Into<Box<dyn Directory>>>(
segments: &[Segment],
target_settings: IndexSettings,
filter_doc_ids: Vec<Option<AliveBitSet>>,
doc_id_mapping: SegmentDocIdMapping,
output_directory: T,
) -> crate::Result<Index> {
if segments.is_empty() {
return Err(crate::TantivyError::InvalidArgument(
"No segments given to merge".to_string(),
));
}
let target_schema = segments[0].schema();
if segments
.iter()
.skip(1)
.any(|seg| seg.schema() != target_schema)
{
return Err(crate::TantivyError::InvalidArgument(
"Attempt to merge different schema indices".to_string(),
));
}
let mut merged_index = Index::create(
output_directory,
target_schema.clone(),
target_settings.clone(),
)?;
let merged_segment = merged_index.new_segment();
let merged_segment_id = merged_segment.id();
let merger: IndexMerger = IndexMerger::open_with_custom_alive_set(
merged_index.schema(),
merged_index.settings().clone(),
segments,
filter_doc_ids,
)?;
let segment_serializer = SegmentSerializer::for_segment(merged_segment, true)?;
let num_docs = merger.write_with_doc_id_mapping(segment_serializer, doc_id_mapping)?;
let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);
let stats = format!(
"Segments Merge (external reordering): [{}]",
segments
.iter()
.fold(String::new(), |sum, current| format!(
"{sum}{} ",
current.meta().id().uuid_string()
))
.trim_end()
);
let index_meta = IndexMeta {
index_settings: target_settings,
segments: vec![segment_meta],
schema: target_schema,
opstamp: 0u64,
payload: Some(stats),
};
save_metas(&index_meta, merged_index.directory_mut())?;
Ok(merged_index)
}
pub(crate) struct InnerSegmentUpdater {
// we keep a copy of the current active IndexMeta to
// avoid loading the file every time we need it in the

View File

@@ -229,10 +229,7 @@ pub use crate::index::{
Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, Order,
Segment, SegmentMeta, SegmentReader,
};
pub use crate::indexer::{
IndexMerger, IndexWriter, SegmentDocIdMapping, SingleSegmentIndexWriter,
merge_segments_with_doc_id_mapping,
};
pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
pub use crate::schema::{Document, TantivyDocument, Term};
/// Index format version.

View File

@@ -9,8 +9,11 @@ const POSITION_END: u32 = 0;
#[derive(Default)]
pub(crate) struct BufferLender {
buffer_u8: Vec<u8>,
buffer_u32: Vec<u32>,
pub buffer_u8: Vec<u8>,
pub buffer_u32: Vec<u32>,
pub doc_id_and_tf: Vec<(u32, u32)>,
pub buffer_positions_flat: Vec<u32>,
pub doc_id_and_offsets: Vec<(u32, u32, u32)>,
}
impl BufferLender {
@@ -198,11 +201,14 @@ impl Recorder for TermFrequencyRecorder {
serializer: &mut FieldSerializer<'_>,
buffer_lender: &mut BufferLender,
) {
let buffer = buffer_lender.lend_u8();
self.stack.read_to_end(arena, buffer);
let mut u32_it = VInt32Reader::new(&buffer[..]);
if let Some(doc_id_map) = doc_id_map {
let mut doc_id_and_tf = vec![];
buffer_lender.buffer_u8.clear();
buffer_lender.doc_id_and_tf.clear();
let buffer = &mut buffer_lender.buffer_u8;
self.stack.read_to_end(arena, buffer);
let mut u32_it = VInt32Reader::new(&buffer[..]);
let doc_id_and_tf = &mut buffer_lender.doc_id_and_tf;
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
@@ -212,10 +218,13 @@ impl Recorder for TermFrequencyRecorder {
}
doc_id_and_tf.sort_unstable_by_key(|&(doc_id, _)| doc_id);
for (doc_id, tf) in doc_id_and_tf {
for &(doc_id, tf) in doc_id_and_tf.iter() {
serializer.write_doc(doc_id, tf, &[][..]);
}
} else {
let buffer = buffer_lender.lend_u8();
self.stack.read_to_end(arena, buffer);
let mut u32_it = VInt32Reader::new(&buffer[..]);
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
@@ -272,42 +281,78 @@ impl Recorder for TfAndPositionRecorder {
serializer: &mut FieldSerializer<'_>,
buffer_lender: &mut BufferLender,
) {
let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
self.stack.read_to_end(arena, buffer_u8);
let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
let mut doc_id_and_positions = vec![];
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let mut prev_position_plus_one = 1u32;
buffer_positions.clear();
loop {
match u32_it.next() {
Some(POSITION_END) | None => {
break;
}
Some(position_plus_one) => {
let delta_position = position_plus_one - prev_position_plus_one;
buffer_positions.push(delta_position);
prev_position_plus_one = position_plus_one;
if let Some(doc_id_map) = doc_id_map {
buffer_lender.buffer_u8.clear();
buffer_lender.buffer_positions_flat.clear();
buffer_lender.doc_id_and_offsets.clear();
let buffer_u8 = &mut buffer_lender.buffer_u8;
self.stack.read_to_end(arena, buffer_u8);
let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
let buffer_positions_flat = &mut buffer_lender.buffer_positions_flat;
let doc_id_and_offsets = &mut buffer_lender.doc_id_and_offsets;
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let start_offset = buffer_positions_flat.len() as u32;
let mut prev_position_plus_one = 1u32;
loop {
match u32_it.next() {
Some(POSITION_END) | None => {
break;
}
Some(position_plus_one) => {
let delta_position = position_plus_one - prev_position_plus_one;
buffer_positions_flat.push(delta_position);
prev_position_plus_one = position_plus_one;
}
}
}
let end_offset = buffer_positions_flat.len() as u32;
doc_id_and_offsets.push((
doc_id_map.get_new_doc_id(doc_id),
start_offset,
end_offset,
));
}
if let Some(doc_id_map) = doc_id_map {
// this simple variant to remap may consume to much memory
doc_id_and_positions
.push((doc_id_map.get_new_doc_id(doc_id), buffer_positions.to_vec()));
} else {
doc_id_and_offsets.sort_unstable_by_key(|&(doc_id, _, _)| doc_id);
for &(doc_id, start_offset, end_offset) in doc_id_and_offsets.iter() {
let positions =
&buffer_positions_flat[(start_offset as usize)..(end_offset as usize)];
serializer.write_doc(doc_id, positions.len() as u32, positions);
}
} else {
let (buffer_u8, buffer_positions) = buffer_lender.lend_all();
self.stack.read_to_end(arena, buffer_u8);
let mut u32_it = VInt32Reader::new(&buffer_u8[..]);
let mut prev_doc = 0;
while let Some(delta_doc_id) = u32_it.next() {
let doc_id = prev_doc + delta_doc_id;
prev_doc = doc_id;
let mut prev_position_plus_one = 1u32;
buffer_positions.clear();
loop {
match u32_it.next() {
Some(POSITION_END) | None => {
break;
}
Some(position_plus_one) => {
let delta_position = position_plus_one - prev_position_plus_one;
buffer_positions.push(delta_position);
prev_position_plus_one = position_plus_one;
}
}
}
serializer.write_doc(doc_id, buffer_positions.len() as u32, buffer_positions);
}
}
if doc_id_map.is_some() {
doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _)| doc_id);
for (doc_id, positions) in doc_id_and_positions {
serializer.write_doc(doc_id, positions.len() as u32, &positions);
}
}
}
fn term_doc_freq(&self) -> Option<u32> {

View File

@@ -273,8 +273,14 @@ mod tests {
}
if all_match {
let score: Score =
leader.score() + secondaries.iter_mut().map(|s| s.score()).sum::<Score>();
// Accumulate in the same left-to-right order as the WAND implementation
// (leader first, then each secondary in turn). Float addition is not
// associative, so `leader + secondaries.sum()` gives a different bit
// pattern and can cause spurious nearly_equals failures.
let mut score: Score = leader.score();
for secondary in secondaries.iter_mut() {
score += secondary.score();
}
if score > limit {
heap.push(Float(score));
@@ -417,6 +423,198 @@ mod tests {
}
}
#[test]
fn test_block_wand_intersection_three_scorers_regression() {
// Minimal failing case found by proptest (CI run 27557430583, job 81460063906).
// Posting list 0 spans docs 063 (all present, doc 8 has tf=80, doc 26 tf=4, rest tf=1).
// Posting lists 1 and 2 are sparse with varying term freqs, and doc 16/64 appear only
// in lists 1/2 but not list 0. The high tf=80 on doc 8 of list 0 makes the WAND
// upper-bound estimation skip documents that the naive intersection would score.
let posting_lists: &[&[(DocId, u32)]] = &[
&[
(0, 1),
(1, 1),
(2, 1),
(3, 1),
(4, 1),
(5, 1),
(6, 1),
(7, 1),
(8, 80),
(9, 1),
(10, 1),
(11, 1),
(12, 1),
(13, 1),
(14, 1),
(15, 1),
(17, 1),
(18, 1),
(19, 1),
(20, 1),
(21, 1),
(22, 1),
(23, 1),
(24, 1),
(25, 1),
(26, 4),
(27, 1),
(28, 1),
(29, 1),
(30, 1),
(31, 1),
(32, 1),
(33, 1),
(34, 1),
(35, 1),
(36, 1),
(37, 1),
(38, 1),
(39, 1),
(40, 1),
(41, 1),
(42, 1),
(43, 1),
(44, 1),
(45, 1),
(46, 1),
(47, 1),
(48, 1),
(49, 1),
(50, 1),
(51, 1),
(52, 1),
(53, 1),
(54, 1),
(55, 1),
(56, 1),
(57, 1),
(58, 1),
(59, 1),
(60, 1),
(61, 1),
(62, 1),
(63, 1),
],
&[
(0, 2),
(3, 98),
(7, 93),
(8, 87),
(9, 39),
(10, 2),
(12, 71),
(14, 47),
(15, 76),
(16, 6),
(17, 38),
(19, 61),
(20, 87),
(21, 1),
(22, 5),
(23, 43),
(25, 48),
(26, 87),
(28, 81),
(29, 69),
(30, 7),
(31, 47),
(32, 32),
(33, 38),
(35, 39),
(38, 65),
(39, 98),
(42, 43),
(43, 52),
(44, 99),
(45, 88),
(48, 24),
(51, 61),
(52, 22),
(53, 58),
(55, 26),
(56, 32),
(58, 57),
(60, 29),
(61, 78),
(62, 9),
(63, 44),
(64, 29),
],
&[
(0, 94),
(2, 49),
(3, 63),
(4, 7),
(6, 93),
(7, 17),
(8, 91),
(9, 18),
(10, 85),
(11, 11),
(12, 45),
(13, 42),
(15, 91),
(16, 44),
(17, 36),
(18, 68),
(19, 24),
(20, 17),
(21, 59),
(22, 97),
(24, 20),
(25, 7),
(26, 85),
(27, 69),
(28, 78),
(29, 84),
(30, 35),
(31, 49),
(33, 83),
(34, 97),
(35, 29),
(36, 43),
(37, 59),
(38, 79),
(39, 74),
(40, 21),
(41, 5),
(42, 47),
(43, 27),
(44, 59),
(45, 97),
(46, 91),
(47, 81),
(48, 57),
(49, 47),
(50, 64),
(51, 86),
(52, 60),
(53, 52),
(54, 14),
(55, 23),
(56, 64),
(57, 40),
(58, 5),
(59, 30),
(60, 81),
(61, 62),
(62, 39),
(63, 93),
(64, 82),
],
];
let fieldnorms: &[u32] = &[
624, 668, 725, 670, 851, 169, 537, 627, 200, 757, 51, 272, 835, 89, 750, 63, 272, 406,
394, 390, 822, 449, 257, 571, 527, 855, 4, 98, 548, 413, 539, 351, 596, 151, 728, 152,
766, 829, 20, 828, 477, 251, 743, 646, 136, 477, 909, 907, 266, 341, 676, 161, 40, 384,
347, 707, 42, 397, 482, 814, 801, 528, 465, 410, 171,
];
let posting_lists_owned: Vec<Vec<(DocId, u32)>> =
posting_lists.iter().map(|pl| pl.to_vec()).collect();
test_block_wand_intersection_aux(&posting_lists_owned, fieldnorms);
}
#[test]
fn test_block_wand_intersection_disjoint() {
// Two posting lists with no overlap — intersection is empty.