Buffer up to 2048 doc ids in for_each_docset_buffered

The no-score collection path (Weight::for_each_no_score) handed the collector's collect_block one COLLECT_BLOCK_BUFFER_LEN (64) block at a time. For aggregations this is the dominant path, and 64 docs per collect_block under-amortizes the per-call overhead. for_each_docset_buffered now owns a 2048-element heap buffer and fills it through successive fill_buffer calls over 64-element windows, flushing a single larger block to collect_block. fill_buffer keeps its 64-element window contract, so no DocSet implementation changes. The buffer is allocated with Box::new_zeroed_slice (stable since 1.92, hence the MSRV bump) to zero directly on the heap.
Cargo clippy fix (#2943 )
2026-06-03 17:10:48 +00:00 · 2026-06-01 15:25:39 +02:00 · 2026-06-01 14:39:44 +02:00 · 2026-05-19 13:29:35 +02:00 · 2026-05-19 11:43:17 +02:00 · 2026-05-19 11:38:48 +02:00
16 changed files with 438 additions and 353 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2021"
-rust-version = "1.86"
+rust-version = "1.92"
 exclude = ["benches/*.json", "benches/*.txt"]

 [dependencies]
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -121,7 +121,7 @@ pub struct FileSlice {

 impl fmt::Debug for FileSlice {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "FileSlice({:?}, {:?})", &self.data, self.range)
+        write!(f, "FileSlice({:?}, {:?})", self.data, self.range)
    }
 }

--- a/src/aggregation/metric/cardinality.rs
+++ b/src/aggregation/metric/cardinality.rs
@@ -171,7 +171,7 @@ impl CouponCache {
            let uninitialized_coupon = Coupon::from_hash(0);
            let mut coupon_map: Vec<Coupon> =
                vec![uninitialized_coupon; highest_term_ord as usize + 1];
-            for (term_ord, coupon) in term_ords.into_iter().zip(coupons.into_iter()) {
+            for (term_ord, coupon) in term_ords.into_iter().zip(coupons) {
                coupon_map[term_ord as usize] = coupon;
            }
            CouponCache::Dense {
@@ -825,7 +825,7 @@ impl<'de> Deserialize<'de> for CardinalityCollector {
 impl CardinalityCollector {
    fn new(salt: u8) -> Self {
        Self {
-            sketch: HllSketch::new(LG_K, HllType::Hll4),
+            sketch: HllSketch::new(LG_K, HllType::Hll8),
            salt,
        }
    }
@@ -856,7 +856,7 @@ impl CardinalityCollector {
        let mut union = HllUnion::new(LG_K);
        union.update(&self.sketch);
        union.update(&right.sketch);
-        self.sketch = union.to_sketch(HllType::Hll4);
+        self.sketch = union.to_sketch(HllType::Hll8);
        Ok(())
    }
 }
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -301,11 +301,14 @@ pub trait SegmentCollector: 'static {
    /// The query pushes the scored document to the collector via this method.
    fn collect(&mut self, doc: DocId, score: Score);

-    /// The query pushes the scored document to the collector via this method.
+    /// The query pushes the matched documents to the collector via this method.
    /// This method is used when the collector does not require scoring.
    ///
-    /// See [`COLLECT_BLOCK_BUFFER_LEN`](crate::COLLECT_BLOCK_BUFFER_LEN) for the
-    /// buffer size passed to the collector.
+    /// `docs` is a block of matched doc ids. Doc ids are produced in increasing
+    /// order, in windows of [`COLLECT_BLOCK_BUFFER_LEN`](crate::COLLECT_BLOCK_BUFFER_LEN),
+    /// but several windows are accumulated before being flushed here, so the
+    /// block may be larger than `COLLECT_BLOCK_BUFFER_LEN`. Implementations must
+    /// not assume any particular maximum length.
    fn collect_block(&mut self, docs: &[DocId]) {
        for doc in docs {
            self.collect(*doc, 0.0);
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -52,7 +52,7 @@ impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {
        if schema_type != T::to_type() {
            return Err(crate::TantivyError::SchemaError(format!(
                "Field `{}` is of type {schema_type:?}, not of the type {:?}.",
-                &self.field,
+                self.field,
                T::to_type()
            )));
        }
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -11,9 +11,14 @@ use crate::DocId;
 /// to compare `[u32; 4]`.
 pub const TERMINATED: DocId = i32::MAX as u32;

-/// The collect_block method on `SegmentCollector` uses a buffer of this size.
-/// Passed results to `collect_block` will not exceed this size and will be
-/// exactly this size as long as we can fill the buffer.
+/// Window size used by [`DocSet::fill_buffer`]: a single `fill_buffer` call
+/// writes at most this many doc ids, and exactly this many as long as the
+/// `DocSet` is not exhausted.
+///
+/// Note that this is *not* the maximum length of the slice passed to
+/// `SegmentCollector::collect_block`: the collection loop accumulates several
+/// such windows into a larger buffer before flushing it, so `collect_block`
+/// may receive a block larger than `COLLECT_BLOCK_BUFFER_LEN`.
 pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;

 /// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`].
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -322,7 +322,7 @@ impl SegmentReader {
                            // Without expand dots enabled dots need to be escaped.
                            let escaped_json_path = json_path.replace('.', "\\.");
                            let full_path = format!("{field_name}.{escaped_json_path}");
-                            let full_path_unescaped = format!("{}.{}", field_name, &json_path);
+                            let full_path_unescaped = format!("{}.{}", field_name, json_path);
                            map_to_canonical.insert(full_path_unescaped, full_path.to_string());
                            full_path
                        } else {
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;

-use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
 use crate::index::SegmentReader;
 use crate::postings::FreqReadingOption;
 use crate::query::disjunction::Disjunction;
@@ -531,13 +530,12 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
    ) -> crate::Result<()> {
        let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?;
        let num_docs = reader.num_docs();
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];

        match scorer {
            SpecializedScorer::TermUnion(term_scorers) => {
                let mut union_scorer =
                    BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
-                for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
+                for_each_docset_buffered(&mut union_scorer, callback);
            }
            SpecializedScorer::TermIntersection(term_scorers) => {
                let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
@@ -545,10 +543,10 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
                    .map(|term_scorer| Box::new(term_scorer) as Box<dyn Scorer>)
                    .collect();
                let mut intersection = intersect_scorers(boxed_scorers, num_docs);
-                for_each_docset_buffered(intersection.as_mut(), &mut buffer, callback);
+                for_each_docset_buffered(intersection.as_mut(), callback);
            }
            SpecializedScorer::Other(mut scorer) => {
-                for_each_docset_buffered(scorer.as_mut(), &mut buffer, callback);
+                for_each_docset_buffered(scorer.as_mut(), callback);
            }
        }
        Ok(())
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -1,5 +1,5 @@
 use super::term_scorer::TermScorer;
-use crate::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN};
+use crate::docset::DocSet;
 use crate::fieldnorm::FieldNormReader;
 use crate::index::SegmentReader;
 use crate::postings::SegmentPostings;
@@ -92,13 +92,11 @@ impl Weight for TermWeight {
    ) -> crate::Result<()> {
        match self.specialized_scorer(reader, 1.0)? {
            TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => {
-                let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-                for_each_docset_buffered(&mut term_scorer, &mut buffer, callback);
+                for_each_docset_buffered(&mut term_scorer, callback);
            }
            TermOrEmptyOrAllScorer::Empty => {}
            TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => {
-                let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-                for_each_docset_buffered(&mut all_scorer, &mut buffer, callback);
+                for_each_docset_buffered(&mut all_scorer, callback);
            }
        };

--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -17,18 +17,56 @@ pub(crate) fn for_each_scorer<TScorer: Scorer + ?Sized>(
    }
 }

-/// Iterates through all of the documents matched by the DocSet
-/// `DocSet`.
+/// Number of `COLLECT_BLOCK_BUFFER_LEN`-sized windows accumulated into the large
+/// buffer before it is flushed to the collector via `collect_block`.
+const NUM_WINDOWS_PER_BLOCK: usize = 32;
+/// Size of the buffer accumulated before invoking the callback (2_048 = 32 * 64).
+/// `fill_buffer` keeps writing `COLLECT_BLOCK_BUFFER_LEN`-sized windows; this only
+/// changes how much we accumulate before flushing.
+const LARGE_COLLECT_BUFFER_LEN: usize = COLLECT_BLOCK_BUFFER_LEN * NUM_WINDOWS_PER_BLOCK;
+
+/// Iterates through all of the documents matched by the `DocSet`, flushing
+/// blocks of up to `LARGE_COLLECT_BUFFER_LEN` doc ids to `callback`.
+///
+/// `fill_buffer` only ever writes `COLLECT_BLOCK_BUFFER_LEN` doc ids at a time,
+/// so we accumulate several such windows into a single larger buffer before
+/// handing it to the collector. This amortizes the per-`collect_block` overhead
+/// (virtual dispatch, aggregation setup) over more documents.
 #[inline]
 pub(crate) fn for_each_docset_buffered<T: DocSet + ?Sized>(
    docset: &mut T,
-    buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN],
    mut callback: impl FnMut(&[DocId]),
 ) {
+    // Heap-allocated once per call (i.e. once per segment in the no-score path).
+    // `new_zeroed_slice` zeroes directly on the heap, avoiding a 2_048-element
+    // stack temporary.
+    // SAFETY: an all-zero bit pattern is a valid value for every `DocId` (u32),
+    // so the zeroed slice is fully initialized.
+    let mut buffer: Box<[DocId]> =
+        unsafe { Box::new_zeroed_slice(LARGE_COLLECT_BUFFER_LEN).assume_init() };
    loop {
-        let num_items = docset.fill_buffer(buffer);
-        callback(&buffer[..num_items]);
-        if num_items != buffer.len() {
+        let mut filled = 0;
+        let mut reached_end = false;
+        // Fill the large buffer one `COLLECT_BLOCK_BUFFER_LEN` window at a time.
+        // `chunks_exact_mut` yields windows of exactly `COLLECT_BLOCK_BUFFER_LEN`
+        // because `LARGE_COLLECT_BUFFER_LEN` is a multiple of it (empty remainder).
+        // The windows are contiguous and filled in order, so the doc ids always
+        // occupy the contiguous prefix `buffer[..filled]`.
+        for window in buffer.chunks_exact_mut(COLLECT_BLOCK_BUFFER_LEN) {
+            // SAFETY: each `window` is a slice of exactly `COLLECT_BLOCK_BUFFER_LEN`
+            // elements, so reinterpreting its start pointer as a fixed-size array
+            // reference of that length is valid.
+            let window: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN] =
+                unsafe { &mut *window.as_mut_ptr().cast::<[DocId; COLLECT_BLOCK_BUFFER_LEN]>() };
+            let num_items = docset.fill_buffer(window);
+            filled += num_items;
+            if num_items != COLLECT_BLOCK_BUFFER_LEN {
+                reached_end = true;
+                break;
+            }
+        }
+        callback(&buffer[..filled]);
+        if reached_end {
            break;
        }
    }
@@ -104,9 +142,7 @@ pub trait Weight: Send + Sync + 'static {
        callback: &mut dyn FnMut(&[DocId]),
    ) -> crate::Result<()> {
        let mut docset = self.scorer(reader, 1.0)?;
-
-        let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
-        for_each_docset_buffered(&mut docset, &mut buffer, callback);
+        for_each_docset_buffered(&mut docset, callback);
        Ok(())
    }

--- a/src/store/index/skip_index.rs
+++ b/src/store/index/skip_index.rs
@@ -94,13 +94,7 @@ impl SkipIndex {
            byte_range: 0..first_layer_len,
        };
        for layer in &self.layers {
-            if let Some(checkpoint) =
-                layer.seek_start_at_offset(target, cur_checkpoint.byte_range.start)
-            {
-                cur_checkpoint = checkpoint;
-            } else {
-                return None;
-            }
+            cur_checkpoint = layer.seek_start_at_offset(target, cur_checkpoint.byte_range.start)?;
        }
        Some(cur_checkpoint)
    }
--- a/sstable/src/dictionary.rs
+++ b/sstable/src/dictionary.rs
@@ -14,11 +14,8 @@ use itertools::Itertools;
 use tantivy_fst::Automaton;
 use tantivy_fst::automaton::AlwaysMatch;

-use crate::sstable_index_v3::SSTableIndexV3Empty;
 use crate::streamer::{Streamer, StreamerBuilder};
-use crate::{
-    BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, SSTableIndexV3, TermOrdinal, VoidSSTable,
-};
+use crate::{BlockAddr, DeltaReader, Reader, SSTable, SSTableIndex, TermOrdinal, VoidSSTable};

 /// An SSTable is a sorted map that associates sorted `&[u8]` keys
 /// to any kind of typed values.
@@ -288,33 +285,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
        let (sstable_slice, index_slice) = main_slice.split(index_offset as usize);
        let sstable_index_bytes = index_slice.read_bytes()?;

-        let sstable_index = match version {
-            2 => SSTableIndex::V2(
-                crate::sstable_index_v2::SSTableIndex::load(sstable_index_bytes).map_err(|_| {
-                    io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
-                })?,
-            ),
-            3 => {
-                let (sstable_index_bytes, mut footerv3_len_bytes) = sstable_index_bytes.rsplit(8);
-                let store_offset = u64::deserialize(&mut footerv3_len_bytes)?;
-                if store_offset != 0 {
-                    SSTableIndex::V3(
-                        SSTableIndexV3::load(sstable_index_bytes, store_offset).map_err(|_| {
-                            io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
-                        })?,
-                    )
-                } else {
-                    // if store_offset is zero, there is no index, so we build a pseudo-index
-                    // assuming a single block of sstable covering everything.
-                    SSTableIndex::V3Empty(SSTableIndexV3Empty::load(index_offset as usize))
-                }
-            }
-            _ => {
-                return Err(io::Error::other(format!(
-                    "Unsupported sstable version, expected one of [2, 3], found {version}"
-                )));
-            }
-        };
+        let sstable_index = SSTableIndex::open(version, index_offset, sstable_index_bytes)?;

        Ok(Dictionary {
            sstable_slice,
@@ -525,10 +496,15 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {

        // Open the block for the first ordinal.
        let mut bytes = Vec::new();
-        let mut current_block_addr = self.sstable_index.get_block_with_ord(ord);
+        let (mut current_block_addr, block_id) = self.sstable_index.get_and_locate_with_ord(ord);
        let mut current_sstable_delta_reader =
            self.sstable_delta_reader_block(current_block_addr.clone())?;
        let mut current_block_ordinal = current_block_addr.first_ordinal;
+        let mut current_block_end_bound = self
+            .sstable_index
+            .get_block(block_id + 1)
+            .map(|block_addr| block_addr.first_ordinal)
+            .unwrap_or(u64::MAX);

        loop {
            // move to the ord inside the current block
@@ -557,17 +533,19 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
                }
            };

-            // TODO optimization: it is silly to do a binary search to get the block every single
-            // time.
-            //
-            // Check if block changed for new term_ord
-            let new_block_addr = self.sstable_index.get_block_with_ord(next_ord);
-            if new_block_addr != current_block_addr {
+            if next_ord >= current_block_end_bound {
+                let (new_block_addr, block_id) =
+                    self.sstable_index.get_and_locate_with_ord(next_ord);
                current_block_addr = new_block_addr;
                current_block_ordinal = current_block_addr.first_ordinal;
                current_sstable_delta_reader =
                    self.sstable_delta_reader_block(current_block_addr.clone())?;
                bytes.clear();
+                current_block_end_bound = self
+                    .sstable_index
+                    .get_block(block_id + 1)
+                    .map(|block_addr| block_addr.first_ordinal)
+                    .unwrap_or(u64::MAX)
            }
            ord = next_ord;
        }
--- a/sstable/src/index/mod.rs
+++ b/sstable/src/index/mod.rs
@@ -0,0 +1,319 @@
+pub(crate) mod v2;
+pub(crate) mod v3;
+
+use std::io::{self, Read, Write};
+use std::ops::Range;
+
+use common::{BinarySerializable, FixedSize, OwnedBytes};
+use tantivy_fst::{Automaton, MapBuilder};
+
+use crate::{TermOrdinal, common_prefix_len};
+
+#[derive(Debug, Clone)]
+pub enum SSTableIndex {
+    V2(v2::SSTableIndex),
+    V3(v3::SSTableIndexV3),
+    V3Empty(v3::SSTableIndexV3Empty),
+}
+
+impl SSTableIndex {
+    pub(crate) fn open(
+        version: u32,
+        index_offset: u64,
+        index_bytes: OwnedBytes,
+    ) -> io::Result<Self> {
+        let index = match version {
+            2 => {
+                SSTableIndex::V2(v2::SSTableIndex::load(index_bytes).map_err(|_| {
+                    io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption")
+                })?)
+            }
+            3 => {
+                let (index_bytes, mut footerv3_len_bytes) = index_bytes.rsplit(8);
+                let store_offset = u64::deserialize(&mut footerv3_len_bytes)?;
+                if store_offset != 0 {
+                    SSTableIndex::V3(v3::SSTableIndexV3::load(index_bytes, store_offset).map_err(
+                        |_| io::Error::new(io::ErrorKind::InvalidData, "SSTable corruption"),
+                    )?)
+                } else {
+                    // if store_offset is zero, there is no index, so we build a pseudo-index
+                    // assuming a single block of sstable covering everything.
+                    SSTableIndex::V3Empty(v3::SSTableIndexV3Empty::load(index_offset as usize))
+                }
+            }
+            _ => {
+                return Err(io::Error::other(format!(
+                    "Unsupported sstable version, expected one of [2, 3], found {version}"
+                )));
+            }
+        };
+        Ok(index)
+    }
+
+    /// Get the [`BlockAddr`] of the requested block.
+    pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.get_block(block_id as usize),
+            SSTableIndex::V3(v3_index) => v3_index.get_block(block_id),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block(block_id),
+        }
+    }
+
+    /// Get the block id of the block that would contain `key`.
+    ///
+    /// Returns None if `key` is lexicographically after the last key recorded.
+    pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.locate_with_key(key).map(|i| i as u64),
+            SSTableIndex::V3(v3_index) => v3_index.locate_with_key(key),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_key(key),
+        }
+    }
+
+    /// Get the [`BlockAddr`] of the block that would contain `key`.
+    ///
+    /// Returns None if `key` is lexicographically after the last key recorded.
+    pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.get_block_with_key(key),
+            SSTableIndex::V3(v3_index) => v3_index.get_block_with_key(key),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_key(key),
+        }
+    }
+
+    pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.locate_with_ord(ord) as u64,
+            SSTableIndex::V3(v3_index) => v3_index.locate_with_ord(ord),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_ord(ord),
+        }
+    }
+
+    /// Get the [`BlockAddr`] of the block containing the `ord`-th term.
+    pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.get_block_with_ord(ord),
+            SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord(ord),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_ord(ord),
+        }
+    }
+
+    pub(crate) fn get_and_locate_with_ord(&self, ord: TermOrdinal) -> (BlockAddr, u64) {
+        match self {
+            SSTableIndex::V2(v2_index) => v2_index.get_and_locate_with_ord(ord),
+            SSTableIndex::V3(v3_index) => v3_index.get_and_locate_with_ord(ord),
+            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_and_locate_with_ord(ord),
+        }
+    }
+
+    pub fn get_block_for_automaton<'a>(
+        &'a self,
+        automaton: &'a impl Automaton,
+    ) -> impl Iterator<Item = (u64, BlockAddr)> + 'a {
+        match self {
+            SSTableIndex::V2(v2_index) => {
+                BlockIter::V2(v2_index.get_block_for_automaton(automaton))
+            }
+            SSTableIndex::V3(v3_index) => {
+                BlockIter::V3(v3_index.get_block_for_automaton(automaton))
+            }
+            SSTableIndex::V3Empty(v3_empty) => {
+                BlockIter::V3Empty(std::iter::once((0, v3_empty.block_addr.clone())))
+            }
+        }
+    }
+}
+
+enum BlockIter<V2, V3, T> {
+    V2(V2),
+    V3(V3),
+    V3Empty(std::iter::Once<T>),
+}
+
+impl<V2: Iterator<Item = T>, V3: Iterator<Item = T>, T> Iterator for BlockIter<V2, V3, T> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            BlockIter::V2(v2) => v2.next(),
+            BlockIter::V3(v3) => v3.next(),
+            BlockIter::V3Empty(once) => once.next(),
+        }
+    }
+}
+
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub struct BlockAddr {
+    pub first_ordinal: u64,
+    pub byte_range: Range<usize>,
+}
+
+impl BlockAddr {
+    fn to_block_start(&self) -> BlockStartAddr {
+        BlockStartAddr {
+            first_ordinal: self.first_ordinal,
+            byte_range_start: self.byte_range.start,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct BlockStartAddr {
+    first_ordinal: u64,
+    byte_range_start: usize,
+}
+
+impl BlockStartAddr {
+    fn to_block_addr(&self, byte_range_end: usize) -> BlockAddr {
+        BlockAddr {
+            first_ordinal: self.first_ordinal,
+            byte_range: self.byte_range_start..byte_range_end,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub(crate) struct BlockMeta {
+    /// Any byte string that is lexicographically greater or equal to
+    /// the last key in the block,
+    /// and yet strictly smaller than the first key in the next block.
+    pub last_key_or_greater: Vec<u8>,
+    pub block_addr: BlockAddr,
+}
+
+impl BinarySerializable for BlockStartAddr {
+    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
+        let start = self.byte_range_start as u64;
+        start.serialize(writer)?;
+        self.first_ordinal.serialize(writer)
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let byte_range_start = u64::deserialize(reader)? as usize;
+        let first_ordinal = u64::deserialize(reader)?;
+        Ok(BlockStartAddr {
+            first_ordinal,
+            byte_range_start,
+        })
+    }
+
+    // Provided method
+    fn num_bytes(&self) -> u64 {
+        BlockStartAddr::SIZE_IN_BYTES as u64
+    }
+}
+
+impl FixedSize for BlockStartAddr {
+    const SIZE_IN_BYTES: usize = 2 * u64::SIZE_IN_BYTES;
+}
+
+/// Given that left < right,
+/// mutates `left into a shorter byte string left'` that
+/// matches `left <= left' < right`.
+fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
+    assert!(&left[..] < right);
+    let common_len = common_prefix_len(left, right);
+    if left.len() == common_len {
+        return;
+    }
+    // It is possible to do one character shorter in some case,
+    // but it is not worth the extra complexity
+    for pos in (common_len + 1)..left.len() {
+        if left[pos] != u8::MAX {
+            left[pos] += 1;
+            left.truncate(pos + 1);
+            return;
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct SSTableIndexBuilder {
+    blocks: Vec<BlockMeta>,
+}
+
+impl SSTableIndexBuilder {
+    /// In order to make the index as light as possible, we
+    /// try to find a shorter alternative to the last key of the last block
+    /// that is still smaller than the next key.
+    pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
+        if let Some(last_block) = self.blocks.last_mut() {
+            find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
+        }
+    }
+
+    pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
+        self.blocks.push(BlockMeta {
+            last_key_or_greater: last_key.to_vec(),
+            block_addr: BlockAddr {
+                byte_range,
+                first_ordinal,
+            },
+        })
+    }
+
+    pub fn serialize<W: std::io::Write>(&self, wrt: W) -> io::Result<u64> {
+        if self.blocks.len() <= 1 {
+            return Ok(0);
+        }
+        let counting_writer = common::CountingWriter::wrap(wrt);
+        let mut map_builder = MapBuilder::new(counting_writer).map_err(fst_error_to_io_error)?;
+        for (i, block) in self.blocks.iter().enumerate() {
+            map_builder
+                .insert(&block.last_key_or_greater, i as u64)
+                .map_err(fst_error_to_io_error)?;
+        }
+        let counting_writer = map_builder.into_inner().map_err(fst_error_to_io_error)?;
+        let written_bytes = counting_writer.written_bytes();
+        let mut wrt = counting_writer.finish();
+
+        let mut block_store_writer = v3::BlockAddrStoreWriter::new();
+        for block in &self.blocks {
+            block_store_writer.write_block_meta(block.block_addr.clone())?;
+        }
+        block_store_writer.serialize(&mut wrt)?;
+
+        Ok(written_bytes)
+    }
+}
+
+fn fst_error_to_io_error(error: tantivy_fst::Error) -> io::Error {
+    match error {
+        tantivy_fst::Error::Fst(fst_error) => io::Error::other(fst_error),
+        tantivy_fst::Error::Io(ioerror) => ioerror,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[track_caller]
+    fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
+        let mut left_buf = left.to_vec();
+        super::find_shorter_str_in_between(&mut left_buf, right);
+        assert!(left_buf.len() <= left.len());
+        assert!(left <= &left_buf);
+        assert!(&left_buf[..] < right);
+    }
+
+    #[test]
+    fn test_find_shorter_str_in_between() {
+        test_find_shorter_str_in_between_aux(b"", b"hello");
+        test_find_shorter_str_in_between_aux(b"abc", b"abcd");
+        test_find_shorter_str_in_between_aux(b"abcd", b"abd");
+        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
+        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
+        test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
+    }
+
+    use proptest::prelude::*;
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(100))]
+        #[test]
+        fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
+            if left < right {
+                test_find_shorter_str_in_between_aux(&left, &right);
+            }
+        }
+    }
+}
--- a/sstable/src/sstable_index_v2.rs
+++ b/sstable/src/sstable_index_v2.rs
@@ -77,6 +77,13 @@ impl SSTableIndex {
        self.get_block(self.locate_with_ord(ord)).unwrap()
    }

+    pub(crate) fn get_and_locate_with_ord(&self, ord: TermOrdinal) -> (BlockAddr, u64) {
+        let location = self.locate_with_ord(ord);
+        // locate_with_ord always returns an index within range
+        let block_addr = self.get_block(location).unwrap();
+        (block_addr, location as u64)
+    }
+
    pub(crate) fn get_block_for_automaton<'a>(
        &'a self,
        automaton: &'a impl Automaton,
--- a/sstable/src/sstable_index_v3.rs
+++ b/sstable/src/sstable_index_v3.rs
@@ -1,106 +1,14 @@
 use std::io::{self, Read, Write};
-use std::ops::Range;
 use std::sync::Arc;

 use common::{BinarySerializable, FixedSize, OwnedBytes};
 use tantivy_bitpacker::{BitPacker, compute_num_bits};
 use tantivy_fst::raw::Fst;
-use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer};
+use tantivy_fst::{Automaton, IntoStreamer, Map, Streamer};

+use super::{BlockAddr, BlockStartAddr};
 use crate::block_match_automaton::can_block_match_automaton;
-use crate::{SSTableDataCorruption, TermOrdinal, common_prefix_len};
-
-#[derive(Debug, Clone)]
-pub enum SSTableIndex {
-    V2(crate::sstable_index_v2::SSTableIndex),
-    V3(SSTableIndexV3),
-    V3Empty(SSTableIndexV3Empty),
-}
-
-impl SSTableIndex {
-    /// Get the [`BlockAddr`] of the requested block.
-    pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
-        match self {
-            SSTableIndex::V2(v2_index) => v2_index.get_block(block_id as usize),
-            SSTableIndex::V3(v3_index) => v3_index.get_block(block_id),
-            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block(block_id),
-        }
-    }
-
-    /// Get the block id of the block that would contain `key`.
-    ///
-    /// Returns None if `key` is lexicographically after the last key recorded.
-    pub(crate) fn locate_with_key(&self, key: &[u8]) -> Option<u64> {
-        match self {
-            SSTableIndex::V2(v2_index) => v2_index.locate_with_key(key).map(|i| i as u64),
-            SSTableIndex::V3(v3_index) => v3_index.locate_with_key(key),
-            SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_key(key),
-        }
-    }
-
-    /// Get the [`BlockAddr`] of the block that would contain `key`.
-    ///
-    /// Returns None if `key` is lexicographically after the last key recorded.
-    pub fn get_block_with_key(&self, key: &[u8]) -> Option<BlockAddr> {
-        match self {
-            SSTableIndex::V2(v2_index) => v2_index.get_block_with_key(key),
-            SSTableIndex::V3(v3_index) => v3_index.get_block_with_key(key),
-            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_key(key),
-        }
-    }
-
-    pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 {
-        match self {
-            SSTableIndex::V2(v2_index) => v2_index.locate_with_ord(ord) as u64,
-            SSTableIndex::V3(v3_index) => v3_index.locate_with_ord(ord),
-            SSTableIndex::V3Empty(v3_empty) => v3_empty.locate_with_ord(ord),
-        }
-    }
-
-    /// Get the [`BlockAddr`] of the block containing the `ord`-th term.
-    pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr {
-        match self {
-            SSTableIndex::V2(v2_index) => v2_index.get_block_with_ord(ord),
-            SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord(ord),
-            SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_ord(ord),
-        }
-    }
-
-    pub fn get_block_for_automaton<'a>(
-        &'a self,
-        automaton: &'a impl Automaton,
-    ) -> impl Iterator<Item = (u64, BlockAddr)> + 'a {
-        match self {
-            SSTableIndex::V2(v2_index) => {
-                BlockIter::V2(v2_index.get_block_for_automaton(automaton))
-            }
-            SSTableIndex::V3(v3_index) => {
-                BlockIter::V3(v3_index.get_block_for_automaton(automaton))
-            }
-            SSTableIndex::V3Empty(v3_empty) => {
-                BlockIter::V3Empty(std::iter::once((0, v3_empty.block_addr.clone())))
-            }
-        }
-    }
-}
-
-enum BlockIter<V2, V3, T> {
-    V2(V2),
-    V3(V3),
-    V3Empty(std::iter::Once<T>),
-}
-
-impl<V2: Iterator<Item = T>, V3: Iterator<Item = T>, T> Iterator for BlockIter<V2, V3, T> {
-    type Item = T;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self {
-            BlockIter::V2(v2) => v2.next(),
-            BlockIter::V3(v3) => v3.next(),
-            BlockIter::V3Empty(once) => once.next(),
-        }
-    }
-}
+use crate::{SSTableDataCorruption, TermOrdinal};

 #[derive(Debug, Clone)]
 pub struct SSTableIndexV3 {
@@ -160,6 +68,11 @@ impl SSTableIndexV3 {
        self.block_addr_store.binary_search_ord(ord).1
    }

+    pub(crate) fn get_and_locate_with_ord(&self, ord: TermOrdinal) -> (BlockAddr, u64) {
+        let (location, block_addr) = self.block_addr_store.binary_search_ord(ord);
+        (block_addr, location)
+    }
+
    pub(crate) fn get_block_for_automaton<'a>(
        &'a self,
        automaton: &'a impl Automaton,
@@ -216,7 +129,7 @@ impl<A: Automaton> Iterator for GetBlockForAutomaton<'_, A> {

 #[derive(Debug, Clone)]
 pub struct SSTableIndexV3Empty {
-    block_addr: BlockAddr,
+    pub block_addr: BlockAddr,
 }

 impl SSTableIndexV3Empty {
@@ -230,8 +143,8 @@ impl SSTableIndexV3Empty {
    }

    /// Get the [`BlockAddr`] of the requested block.
-    pub(crate) fn get_block(&self, _block_id: u64) -> Option<BlockAddr> {
-        Some(self.block_addr.clone())
+    pub(crate) fn get_block(&self, block_id: u64) -> Option<BlockAddr> {
+        (block_id == 0).then(|| self.block_addr.clone())
    }

    /// Get the block id of the block that would contain `key`.
@@ -256,146 +169,9 @@ impl SSTableIndexV3Empty {
    pub(crate) fn get_block_with_ord(&self, _ord: TermOrdinal) -> BlockAddr {
        self.block_addr.clone()
    }
-}
-#[derive(Clone, Eq, PartialEq, Debug)]
-pub struct BlockAddr {
-    pub first_ordinal: u64,
-    pub byte_range: Range<usize>,
-}

-impl BlockAddr {
-    fn to_block_start(&self) -> BlockStartAddr {
-        BlockStartAddr {
-            first_ordinal: self.first_ordinal,
-            byte_range_start: self.byte_range.start,
-        }
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-struct BlockStartAddr {
-    first_ordinal: u64,
-    byte_range_start: usize,
-}
-
-impl BlockStartAddr {
-    fn to_block_addr(&self, byte_range_end: usize) -> BlockAddr {
-        BlockAddr {
-            first_ordinal: self.first_ordinal,
-            byte_range: self.byte_range_start..byte_range_end,
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub(crate) struct BlockMeta {
-    /// Any byte string that is lexicographically greater or equal to
-    /// the last key in the block,
-    /// and yet strictly smaller than the first key in the next block.
-    pub last_key_or_greater: Vec<u8>,
-    pub block_addr: BlockAddr,
-}
-
-impl BinarySerializable for BlockStartAddr {
-    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        let start = self.byte_range_start as u64;
-        start.serialize(writer)?;
-        self.first_ordinal.serialize(writer)
-    }
-
-    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
-        let byte_range_start = u64::deserialize(reader)? as usize;
-        let first_ordinal = u64::deserialize(reader)?;
-        Ok(BlockStartAddr {
-            first_ordinal,
-            byte_range_start,
-        })
-    }
-
-    // Provided method
-    fn num_bytes(&self) -> u64 {
-        BlockStartAddr::SIZE_IN_BYTES as u64
-    }
-}
-
-impl FixedSize for BlockStartAddr {
-    const SIZE_IN_BYTES: usize = 2 * u64::SIZE_IN_BYTES;
-}
-
-/// Given that left < right,
-/// mutates `left into a shorter byte string left'` that
-/// matches `left <= left' < right`.
-fn find_shorter_str_in_between(left: &mut Vec<u8>, right: &[u8]) {
-    assert!(&left[..] < right);
-    let common_len = common_prefix_len(left, right);
-    if left.len() == common_len {
-        return;
-    }
-    // It is possible to do one character shorter in some case,
-    // but it is not worth the extra complexity
-    for pos in (common_len + 1)..left.len() {
-        if left[pos] != u8::MAX {
-            left[pos] += 1;
-            left.truncate(pos + 1);
-            return;
-        }
-    }
-}
-
-#[derive(Default)]
-pub struct SSTableIndexBuilder {
-    blocks: Vec<BlockMeta>,
-}
-
-impl SSTableIndexBuilder {
-    /// In order to make the index as light as possible, we
-    /// try to find a shorter alternative to the last key of the last block
-    /// that is still smaller than the next key.
-    pub(crate) fn shorten_last_block_key_given_next_key(&mut self, next_key: &[u8]) {
-        if let Some(last_block) = self.blocks.last_mut() {
-            find_shorter_str_in_between(&mut last_block.last_key_or_greater, next_key);
-        }
-    }
-
-    pub fn add_block(&mut self, last_key: &[u8], byte_range: Range<usize>, first_ordinal: u64) {
-        self.blocks.push(BlockMeta {
-            last_key_or_greater: last_key.to_vec(),
-            block_addr: BlockAddr {
-                byte_range,
-                first_ordinal,
-            },
-        })
-    }
-
-    pub fn serialize<W: std::io::Write>(&self, wrt: W) -> io::Result<u64> {
-        if self.blocks.len() <= 1 {
-            return Ok(0);
-        }
-        let counting_writer = common::CountingWriter::wrap(wrt);
-        let mut map_builder = MapBuilder::new(counting_writer).map_err(fst_error_to_io_error)?;
-        for (i, block) in self.blocks.iter().enumerate() {
-            map_builder
-                .insert(&block.last_key_or_greater, i as u64)
-                .map_err(fst_error_to_io_error)?;
-        }
-        let counting_writer = map_builder.into_inner().map_err(fst_error_to_io_error)?;
-        let written_bytes = counting_writer.written_bytes();
-        let mut wrt = counting_writer.finish();
-
-        let mut block_store_writer = BlockAddrStoreWriter::new();
-        for block in &self.blocks {
-            block_store_writer.write_block_meta(block.block_addr.clone())?;
-        }
-        block_store_writer.serialize(&mut wrt)?;
-
-        Ok(written_bytes)
-    }
-}
-
-fn fst_error_to_io_error(error: tantivy_fst::Error) -> io::Error {
-    match error {
-        tantivy_fst::Error::Fst(fst_error) => io::Error::other(fst_error),
-        tantivy_fst::Error::Io(ioerror) => ioerror,
+    pub(crate) fn get_and_locate_with_ord(&self, _ord: TermOrdinal) -> (BlockAddr, u64) {
+        (self.block_addr.clone(), 0)
    }
 }

@@ -647,14 +423,14 @@ fn binary_search(max: u64, cmp_fn: impl Fn(u64) -> std::cmp::Ordering) -> Result
    Err(left)
 }

-struct BlockAddrStoreWriter {
+pub(crate) struct BlockAddrStoreWriter {
    buffer_block_metas: Vec<u8>,
    buffer_addrs: Vec<u8>,
    block_addrs: Vec<BlockAddr>,
 }

 impl BlockAddrStoreWriter {
-    fn new() -> Self {
+    pub(crate) fn new() -> Self {
        BlockAddrStoreWriter {
            buffer_block_metas: Vec::new(),
            buffer_addrs: Vec::new(),
@@ -662,7 +438,7 @@ impl BlockAddrStoreWriter {
        }
    }

-    fn flush_block(&mut self) -> io::Result<()> {
+    pub(crate) fn flush_block(&mut self) -> io::Result<()> {
        if self.block_addrs.is_empty() {
            return Ok(());
        }
@@ -741,7 +517,7 @@ impl BlockAddrStoreWriter {
        Ok(())
    }

-    fn write_block_meta(&mut self, block_addr: BlockAddr) -> io::Result<()> {
+    pub(crate) fn write_block_meta(&mut self, block_addr: BlockAddr) -> io::Result<()> {
        self.block_addrs.push(block_addr);
        if self.block_addrs.len() >= STORE_BLOCK_LEN {
            self.flush_block()?;
@@ -749,7 +525,7 @@ impl BlockAddrStoreWriter {
        Ok(())
    }

-    fn serialize<W: std::io::Write>(&mut self, wrt: &mut W) -> io::Result<()> {
+    pub(crate) fn serialize<W: std::io::Write>(&mut self, wrt: &mut W) -> io::Result<()> {
        self.flush_block()?;
        let len = self.buffer_block_metas.len() as u64;
        len.serialize(wrt)?;
@@ -824,8 +600,9 @@ mod tests {
    use common::OwnedBytes;

    use super::*;
-    use crate::SSTableDataCorruption;
    use crate::block_match_automaton::tests::EqBuffer;
+    use crate::index::BlockMeta;
+    use crate::{SSTableDataCorruption, SSTableIndexBuilder};

    #[test]
    fn test_sstable_index() {
@@ -874,36 +651,7 @@ mod tests {
        assert!(matches!(data_corruption_err, SSTableDataCorruption));
    }

-    #[track_caller]
-    fn test_find_shorter_str_in_between_aux(left: &[u8], right: &[u8]) {
-        let mut left_buf = left.to_vec();
-        super::find_shorter_str_in_between(&mut left_buf, right);
-        assert!(left_buf.len() <= left.len());
-        assert!(left <= &left_buf);
-        assert!(&left_buf[..] < right);
-    }
-
-    #[test]
-    fn test_find_shorter_str_in_between() {
-        test_find_shorter_str_in_between_aux(b"", b"hello");
-        test_find_shorter_str_in_between_aux(b"abc", b"abcd");
-        test_find_shorter_str_in_between_aux(b"abcd", b"abd");
-        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[1]);
-        test_find_shorter_str_in_between_aux(&[0, 0, 0], &[0, 0, 1]);
-        test_find_shorter_str_in_between_aux(&[0, 0, 255, 255, 255, 0u8], &[0, 1]);
-    }
-
-    use proptest::prelude::*;
-
-    proptest! {
-        #![proptest_config(ProptestConfig::with_cases(100))]
-        #[test]
-        fn test_proptest_find_shorter_str(left in any::<Vec<u8>>(), right in any::<Vec<u8>>()) {
-            if left < right {
-                test_find_shorter_str_in_between_aux(&left, &right);
-            }
-        }
-    }
+    //    use proptest::prelude::*;

    #[test]
    fn test_find_best_slop() {
--- a/sstable/src/lib.rs
+++ b/sstable/src/lib.rs
@@ -47,9 +47,8 @@ pub mod merge;
 mod streamer;
 pub mod value;

-mod sstable_index_v3;
-pub use sstable_index_v3::{BlockAddr, SSTableIndex, SSTableIndexBuilder, SSTableIndexV3};
-mod sstable_index_v2;
+mod index;
+pub use index::{BlockAddr, SSTableIndex, SSTableIndexBuilder};
 pub(crate) mod vint;
 pub use dictionary::{Dictionary, TermOrdHit};
 pub use streamer::{Streamer, StreamerBuilder};
Author	SHA1	Message	Date
Paul Masurel	468850e9f4	Buffer up to 2048 doc ids in for_each_docset_buffered The no-score collection path (Weight::for_each_no_score) handed the collector's collect_block one COLLECT_BLOCK_BUFFER_LEN (64) block at a time. For aggregations this is the dominant path, and 64 docs per collect_block under-amortizes the per-call overhead. for_each_docset_buffered now owns a 2048-element heap buffer and fills it through successive fill_buffer calls over 64-element windows, flushing a single larger block to collect_block. fill_buffer keeps its 64-element window contract, so no DocSet implementation changes. The buffer is allocated with Box::new_zeroed_slice (stable since 1.92, hence the MSRV bump) to zero directly on the heap.	2026-06-01 15:25:39 +02:00
Paul Masurel	a27c64998f	Cargo clippy fix (#2943 ) Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>	2026-06-01 14:39:44 +02:00
Paul Masurel	46b3fb9ed3	Relying on upstream version of datasketch and stop using HLL 4. (#2936 ) We were relying on a fork for: a bugfix in LIST serialization a better API exposing a new Coupon type, required for caching coupons. We also stop using HLL8 in hope to fix https://datadoghq.atlassian.net/browse/CLOUDPREM-625 Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>	2026-05-19 13:29:35 +02:00
trinity-1686a	fbe620b9b4	Merge pull request #2933 from quickwit-oss/1686a/sstable-opt optimise sstable index access pattern	2026-05-19 11:43:17 +02:00
trinity-1686a	95d8a3989a	cr	2026-05-19 11:38:48 +02:00
trinity-1686a	ea61a68db4	skip sstable index binary search when ordinal is in same block	2026-05-16 11:35:38 +02:00
trinity-1686a	c367df37c1	refactor sstable index	2026-05-16 11:30:02 +02:00