encode some part of posting list as -1 instead of direct values (#2185)

* add support for delta-1 encoding posting list * encode term frequency minus one * don't emit tf for json integer terms * make skipreader not pub(crate) mutable
2026-01-04 00:02:55 +00:00 · 2023-10-20 16:58:26 +02:00
parent c2b0469180
commit 0d4589219b
9 changed files with 240 additions and 65 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { git = "https://github.com/quickwit-oss/bitpacking", rev = "f730b75", default-features = false, features = ["bitpacker4x"] }
 census = "0.4.0"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -109,6 +109,35 @@ mod tests_mmap {
        }
    }

+    #[test]
+    fn test_json_field_number() {
+        // this test was added specifically to reach some cases related to using json fields, with
+        // frequency enabled, to store integers, with enough documents containing a single integer
+        // that the posting list can be bitpacked.
+        let mut schema_builder = Schema::builder();
+
+        let json_field = schema_builder.add_json_field("json", TEXT);
+        let index = Index::create_in_ram(schema_builder.build());
+        let mut index_writer = index.writer_for_tests().unwrap();
+        for _ in 0..256 {
+            let json = serde_json::json!({"somekey": 1u64, "otherkey": -2i64});
+            index_writer.add_document(doc!(json_field=>json)).unwrap();
+
+            let json = serde_json::json!({"somekey": "1str", "otherkey": "2str"});
+            index_writer.add_document(doc!(json_field=>json)).unwrap();
+        }
+        index_writer.commit().unwrap();
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.num_docs(), 512);
+        let parse_query = QueryParser::for_index(&index, Vec::new());
+        {
+            let query = parse_query.parse_query(r"json.somekey:1").unwrap();
+            let num_docs = searcher.search(&query, &Count).unwrap();
+            assert_eq!(num_docs, 256);
+        }
+    }
+
    #[test]
    fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
        let mut schema_builder = Schema::builder();
--- a/src/positions/reader.rs
+++ b/src/positions/reader.rs
@@ -92,7 +92,7 @@ impl PositionReader {
            // that block is bitpacked.
            let bit_width = bit_widths[block_rel_id];
            self.block_decoder
-                .uncompress_block_unsorted(compressed_data, bit_width);
+                .uncompress_block_unsorted(compressed_data, bit_width, false);
        } else {
            // that block is vint encoded.
            self.block_decoder
--- a/src/positions/serializer.rs
+++ b/src/positions/serializer.rs
@@ -62,8 +62,9 @@ impl<W: io::Write> PositionSerializer<W> {
            return;
        }
        if self.block.len() == COMPRESSION_BLOCK_SIZE {
-            let (bit_width, block_encoded): (u8, &[u8]) =
-                self.block_encoder.compress_block_unsorted(&self.block[..]);
+            let (bit_width, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_unsorted(&self.block[..], false);
            self.bit_widths.push(bit_width);
            self.positions_buffer.extend(block_encoded);
        } else {
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -24,13 +24,13 @@ fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
 #[derive(Clone)]
 pub struct BlockSegmentPostings {
    pub(crate) doc_decoder: BlockDecoder,
-    loaded_offset: usize,
+    block_loaded: bool,
    freq_decoder: BlockDecoder,
    freq_reading_option: FreqReadingOption,
    block_max_score_cache: Option<Score>,
    doc_freq: u32,
    data: OwnedBytes,
-    pub(crate) skip_reader: SkipReader,
+    skip_reader: SkipReader,
 }

 fn decode_bitpacked_block(
@@ -40,10 +40,16 @@ fn decode_bitpacked_block(
    doc_offset: DocId,
    doc_num_bits: u8,
    tf_num_bits: u8,
+    strict_delta: bool,
 ) {
-    let num_consumed_bytes = doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits);
+    let num_consumed_bytes =
+        doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits, strict_delta);
    if let Some(freq_decoder) = freq_decoder_opt {
-        freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits);
+        freq_decoder.uncompress_block_unsorted(
+            &data[num_consumed_bytes..],
+            tf_num_bits,
+            strict_delta,
+        );
    }
 }

@@ -57,11 +63,15 @@ fn decode_vint_block(
    let num_consumed_bytes =
        doc_decoder.uncompress_vint_sorted(data, doc_offset, num_vint_docs, TERMINATED);
    if let Some(freq_decoder) = freq_decoder_opt {
-        freq_decoder.uncompress_vint_unsorted(
-            &data[num_consumed_bytes..],
-            num_vint_docs,
-            TERMINATED,
-        );
+        // if it's a json term with freq, containing less than 256 docs, we can reach here thinking
+        // we have a freq, despite not really having one.
+        if data.len() > num_consumed_bytes {
+            freq_decoder.uncompress_vint_unsorted(
+                &data[num_consumed_bytes..],
+                num_vint_docs,
+                TERMINATED,
+            );
+        }
    }
 }

@@ -78,28 +88,46 @@ fn split_into_skips_and_postings(
 }

 impl BlockSegmentPostings {
+    /// Opens a `BlockSegmentPostings`.
+    /// `doc_freq` is the number of documents in the posting list.
+    /// `record_option` represents the amount of data available according to the schema.
+    /// `requested_option` is the amount of data requested by the user.
+    /// If for instance, we do not request for term frequencies, this function will not decompress
+    /// term frequency blocks.
    pub(crate) fn open(
        doc_freq: u32,
        data: FileSlice,
-        record_option: IndexRecordOption,
+        mut record_option: IndexRecordOption,
        requested_option: IndexRecordOption,
    ) -> io::Result<BlockSegmentPostings> {
+        let bytes = data.read_bytes()?;
+        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
+        let skip_reader = match skip_data_opt {
+            Some(skip_data) => {
+                let block_count = doc_freq as usize / COMPRESSION_BLOCK_SIZE;
+                // 8 is the minimum size of a block with frequency (can be more if pos are stored
+                // too)
+                if skip_data.len() < 8 * block_count {
+                    // the field might be encoded with frequency, but this term in particular isn't.
+                    // This can happen for JSON field with term frequencies:
+                    // - text terms are encoded with term freqs.
+                    // - numerical terms are encoded without term freqs.
+                    record_option = IndexRecordOption::Basic;
+                }
+                SkipReader::new(skip_data, doc_freq, record_option)
+            }
+            None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option),
+        };
+
        let freq_reading_option = match (record_option, requested_option) {
            (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
            (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
            (_, _) => FreqReadingOption::ReadFreq,
        };

-        let bytes = data.read_bytes()?;
-        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
-        let skip_reader = match skip_data_opt {
-            Some(skip_data) => SkipReader::new(skip_data, doc_freq, record_option),
-            None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option),
-        };
-
        let mut block_segment_postings = BlockSegmentPostings {
            doc_decoder: BlockDecoder::with_val(TERMINATED),
-            loaded_offset: usize::MAX,
+            block_loaded: false,
            freq_decoder: BlockDecoder::with_val(1),
            freq_reading_option,
            block_max_score_cache: None,
@@ -169,7 +197,7 @@ impl BlockSegmentPostings {
            split_into_skips_and_postings(doc_freq, postings_data)?;
        self.data = postings_data;
        self.block_max_score_cache = None;
-        self.loaded_offset = usize::MAX;
+        self.block_loaded = false;
        if let Some(skip_data) = skip_data_opt {
            self.skip_reader.reset(skip_data, doc_freq);
        } else {
@@ -265,22 +293,23 @@ impl BlockSegmentPostings {
    pub(crate) fn shallow_seek(&mut self, target_doc: DocId) {
        if self.skip_reader.seek(target_doc) {
            self.block_max_score_cache = None;
+            self.block_loaded = false;
        }
    }

    pub(crate) fn block_is_loaded(&self) -> bool {
-        self.loaded_offset == self.skip_reader.byte_offset()
+        self.block_loaded
    }

    pub(crate) fn load_block(&mut self) {
        let offset = self.skip_reader.byte_offset();
-        if self.loaded_offset == offset {
+        if self.block_is_loaded() {
            return;
        }
-        self.loaded_offset = offset;
        match self.skip_reader.block_info() {
            BlockInfo::BitPacked {
                doc_num_bits,
+                strict_delta_encoded,
                tf_num_bits,
                ..
            } => {
@@ -295,6 +324,7 @@ impl BlockSegmentPostings {
                    self.skip_reader.last_doc_in_previous_block,
                    doc_num_bits,
                    tf_num_bits,
+                    strict_delta_encoded,
                );
            }
            BlockInfo::VInt { num_docs } => {
@@ -318,13 +348,13 @@ impl BlockSegmentPostings {
                );
            }
        }
+        self.block_loaded = true;
    }

    /// Advance to the next block.
-    ///
-    /// Returns false if and only if there is no remaining block.
    pub fn advance(&mut self) {
        self.skip_reader.advance();
+        self.block_loaded = false;
        self.block_max_score_cache = None;
        self.load_block();
    }
@@ -333,7 +363,7 @@ impl BlockSegmentPostings {
    pub fn empty() -> BlockSegmentPostings {
        BlockSegmentPostings {
            doc_decoder: BlockDecoder::with_val(TERMINATED),
-            loaded_offset: 0,
+            block_loaded: true,
            freq_decoder: BlockDecoder::with_val(1),
            freq_reading_option: FreqReadingOption::NoFreq,
            block_max_score_cache: None,
@@ -342,6 +372,10 @@ impl BlockSegmentPostings {
            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
        }
    }
+
+    pub(crate) fn skip_reader(&self) -> &SkipReader {
+        &self.skip_reader
+    }
 }

 #[cfg(test)]
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -33,14 +33,40 @@ impl BlockEncoder {
    }

    pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> (u8, &[u8]) {
-        let num_bits = self.bitpacker.num_bits_sorted(offset, block);
+        // if offset is zero, convert it to None. This is correct as long as we do the same when
+        // decompressing. It's required in case the block starts with an actual zero.
+        let offset = if offset == 0u32 { None } else { Some(offset) };
+
+        let num_bits = self.bitpacker.num_bits_strictly_sorted(offset, block);
        let written_size =
            self.bitpacker
-                .compress_sorted(offset, block, &mut self.output[..], num_bits);
+                .compress_strictly_sorted(offset, block, &mut self.output[..], num_bits);
        (num_bits, &self.output[..written_size])
    }

-    pub fn compress_block_unsorted(&mut self, block: &[u32]) -> (u8, &[u8]) {
+    /// Compress a single block of unsorted numbers.
+    ///
+    /// If `minus_one_encoded` is set, each value must be >= 1, and will be encoded in a sligly
+    /// more compact format. This is useful for some values where 0 isn't a correct value, such
+    /// as term frequency, but isn't correct for some usages like position lists, where 0 can
+    /// appear.
+    pub fn compress_block_unsorted(
+        &mut self,
+        block: &[u32],
+        minus_one_encoded: bool,
+    ) -> (u8, &[u8]) {
+        debug_assert!(!minus_one_encoded || !block.contains(&0));
+
+        let mut block_minus_one = [0; COMPRESSION_BLOCK_SIZE];
+        let block = if minus_one_encoded {
+            for (elem_min_one, elem) in block_minus_one.iter_mut().zip(block) {
+                *elem_min_one = elem - 1;
+            }
+            &block_minus_one
+        } else {
+            block
+        };
+
        let num_bits = self.bitpacker.num_bits(block);
        let written_size = self
            .bitpacker
@@ -71,21 +97,55 @@ impl BlockDecoder {
        }
    }

+    /// Decompress block of sorted integers.
+    ///
+    /// `strict_delta` depends on what encoding was used. Older version of tantivy never use strict
+    /// deltas, newer versions always use them.
    pub fn uncompress_block_sorted(
        &mut self,
        compressed_data: &[u8],
        offset: u32,
        num_bits: u8,
+        strict_delta: bool,
    ) -> usize {
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        self.bitpacker
-            .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
+        if strict_delta {
+            let offset = std::num::NonZeroU32::new(offset).map(std::num::NonZeroU32::get);
+
+            self.output_len = COMPRESSION_BLOCK_SIZE;
+            self.bitpacker.decompress_strictly_sorted(
+                offset,
+                compressed_data,
+                &mut self.output,
+                num_bits,
+            )
+        } else {
+            self.output_len = COMPRESSION_BLOCK_SIZE;
+            self.bitpacker
+                .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
+        }
    }

-    pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize {
+    /// Decompress block of unsorted integers.
+    ///
+    /// `minus_one_encoded` depends on what encoding was used. Older version of tantivy never use
+    /// that encoding. Newer version use it for some structures, but not all. See the corresponding
+    /// call to `BlockEncoder::compress_block_unsorted`.
+    pub fn uncompress_block_unsorted(
+        &mut self,
+        compressed_data: &[u8],
+        num_bits: u8,
+        minus_one_encoded: bool,
+    ) -> usize {
        self.output_len = COMPRESSION_BLOCK_SIZE;
-        self.bitpacker
-            .decompress(compressed_data, &mut self.output, num_bits)
+        let res = self
+            .bitpacker
+            .decompress(compressed_data, &mut self.output, num_bits);
+        if minus_one_encoded {
+            for val in &mut self.output {
+                *val += 1;
+            }
+        }
+        res
    }

    #[inline]
@@ -218,7 +278,8 @@ pub mod tests {
        let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 0);
        let mut decoder = BlockDecoder::default();
        {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 0, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(compressed_data, 0, num_bits, true);
            assert_eq!(consumed_num_bytes, compressed_data.len());
        }
        for i in 0..128 {
@@ -233,7 +294,8 @@ pub mod tests {
        let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 10);
        let mut decoder = BlockDecoder::default();
        {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 10, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(compressed_data, 10, num_bits, true);
            assert_eq!(consumed_num_bytes, compressed_data.len());
        }
        for i in 0..128 {
@@ -252,7 +314,8 @@ pub mod tests {
        compressed.push(173u8);
        let mut decoder = BlockDecoder::default();
        {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(&compressed, 10, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(&compressed, 10, num_bits, true);
            assert_eq!(consumed_num_bytes, compressed.len() - 1);
            assert_eq!(compressed[consumed_num_bytes], 173u8);
        }
@@ -263,21 +326,25 @@ pub mod tests {

    #[test]
    fn test_encode_unsorted_block_with_junk() {
-        let mut compressed: Vec<u8> = Vec::new();
-        let n = 128;
-        let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect();
-        let mut encoder = BlockEncoder::default();
-        let (num_bits, compressed_data) = encoder.compress_block_unsorted(&vals);
-        compressed.extend_from_slice(compressed_data);
-        compressed.push(173u8);
-        let mut decoder = BlockDecoder::default();
-        {
-            let consumed_num_bytes = decoder.uncompress_block_unsorted(&compressed, num_bits);
-            assert_eq!(consumed_num_bytes + 1, compressed.len());
-            assert_eq!(compressed[consumed_num_bytes], 173u8);
-        }
-        for i in 0..n {
-            assert_eq!(vals[i], decoder.output(i));
+        for minus_one_encode in [false, true] {
+            let mut compressed: Vec<u8> = Vec::new();
+            let n = 128;
+            let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect();
+            let mut encoder = BlockEncoder::default();
+            let (num_bits, compressed_data) =
+                encoder.compress_block_unsorted(&vals, minus_one_encode);
+            compressed.extend_from_slice(compressed_data);
+            compressed.push(173u8);
+            let mut decoder = BlockDecoder::default();
+            {
+                let consumed_num_bytes =
+                    decoder.uncompress_block_unsorted(&compressed, num_bits, minus_one_encode);
+                assert_eq!(consumed_num_bytes + 1, compressed.len());
+                assert_eq!(compressed[consumed_num_bytes], 173u8);
+            }
+            for i in 0..n {
+                assert_eq!(vals[i], decoder.output(i));
+            }
        }
    }

@@ -344,7 +411,7 @@ mod bench {
        let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
        let mut decoder = BlockDecoder::default();
        b.iter(|| {
-            decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
+            decoder.uncompress_block_sorted(compressed, 0u32, num_bits, true);
        });
    }

--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -301,6 +301,7 @@ pub struct PostingsSerializer<W: Write> {
    bm25_weight: Option<Bm25Weight>,
    avg_fieldnorm: Score, /* Average number of term in the field for that segment.
                           * this value is used to compute the block wand information. */
+    term_has_freq: bool,
 }

 impl<W: Write> PostingsSerializer<W> {
@@ -325,13 +326,15 @@ impl<W: Write> PostingsSerializer<W> {
            fieldnorm_reader,
            bm25_weight: None,
            avg_fieldnorm,
+            term_has_freq: false,
        }
    }

    pub fn new_term(&mut self, term_doc_freq: u32) {
        self.bm25_weight = None;

-        if !self.mode.has_freq() {
+        self.term_has_freq = self.mode.has_freq() && term_doc_freq != 0;
+        if !self.term_has_freq {
            return;
        }

@@ -365,10 +368,10 @@ impl<W: Write> PostingsSerializer<W> {
            // last el block 0, offset block 1,
            self.postings_write.extend(block_encoded);
        }
-        if self.mode.has_freq() {
+        if self.term_has_freq {
            let (num_bits, block_encoded): (u8, &[u8]) = self
                .block_encoder
-                .compress_block_unsorted(self.block.term_freqs());
+                .compress_block_unsorted(self.block.term_freqs(), true);
            self.postings_write.extend(block_encoded);
            self.skip_write.write_term_freq(num_bits);
            if self.mode.has_positions() {
@@ -432,7 +435,7 @@ impl<W: Write> PostingsSerializer<W> {
                self.postings_write.write_all(block_encoded)?;
            }
            // ... Idem for term frequencies
-            if self.mode.has_freq() {
+            if self.term_has_freq {
                let block_encoded = self
                    .block_encoder
                    .compress_vint_unsorted(self.block.term_freqs());
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -6,6 +6,22 @@ use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};

+// doc num bits uses the following encoding:
+// given 0b a b cdefgh
+//         |1|2|   3  |
+// - 1: unused
+// - 2: is delta-1 encoded. 0 if not, 1, if yes
+// - 3: a 6 bit number in 0..=32, the actual bitwidth
+fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 {
+    bitwidth | ((delta_1 as u8) << 6)
+}
+
+fn decode_bitwidth(raw_bitwidth: u8) -> (u8, bool) {
+    let delta_1 = (raw_bitwidth >> 6 & 1) != 0;
+    let bitwidth = raw_bitwidth & 0x3f;
+    (bitwidth, delta_1)
+}
+
 #[inline]
 fn encode_block_wand_max_tf(max_tf: u32) -> u8 {
    max_tf.min(u8::MAX as u32) as u8
@@ -41,7 +57,7 @@ impl SkipSerializer {

    pub fn write_doc(&mut self, last_doc: DocId, doc_num_bits: u8) {
        write_u32(last_doc, &mut self.buffer);
-        self.buffer.push(doc_num_bits);
+        self.buffer.push(encode_bitwidth(doc_num_bits, true));
    }

    pub fn write_term_freq(&mut self, tf_num_bits: u8) {
@@ -85,6 +101,7 @@ pub(crate) struct SkipReader {
 pub(crate) enum BlockInfo {
    BitPacked {
        doc_num_bits: u8,
+        strict_delta_encoded: bool,
        tf_num_bits: u8,
        tf_sum: u32,
        block_wand_fieldnorm_id: u8,
@@ -172,12 +189,13 @@ impl SkipReader {
        let bytes = self.owned_read.as_slice();
        let advance_len: usize;
        self.last_doc_in_block = read_u32(bytes);
-        let doc_num_bits = bytes[4];
+        let (doc_num_bits, strict_delta_encoded) = decode_bitwidth(bytes[4]);
        match self.skip_info {
            IndexRecordOption::Basic => {
                advance_len = 5;
                self.block_info = BlockInfo::BitPacked {
                    doc_num_bits,
+                    strict_delta_encoded,
                    tf_num_bits: 0,
                    tf_sum: 0,
                    block_wand_fieldnorm_id: 0,
@@ -191,6 +209,7 @@ impl SkipReader {
                advance_len = 8;
                self.block_info = BlockInfo::BitPacked {
                    doc_num_bits,
+                    strict_delta_encoded,
                    tf_num_bits,
                    tf_sum: 0,
                    block_wand_fieldnorm_id,
@@ -205,6 +224,7 @@ impl SkipReader {
                advance_len = 12;
                self.block_info = BlockInfo::BitPacked {
                    doc_num_bits,
+                    strict_delta_encoded,
                    tf_num_bits,
                    tf_sum,
                    block_wand_fieldnorm_id,
@@ -268,7 +288,9 @@ impl SkipReader {
 #[cfg(test)]
 mod tests {

-    use super::{BlockInfo, IndexRecordOption, SkipReader, SkipSerializer};
+    use super::{
+        decode_bitwidth, encode_bitwidth, BlockInfo, IndexRecordOption, SkipReader, SkipSerializer,
+    };
    use crate::directory::OwnedBytes;
    use crate::postings::compression::COMPRESSION_BLOCK_SIZE;

@@ -310,6 +332,7 @@ mod tests {
            skip_reader.block_info,
            BlockInfo::BitPacked {
                doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                tf_num_bits: 3u8,
                tf_sum: 0,
                block_wand_fieldnorm_id: 13,
@@ -322,6 +345,7 @@ mod tests {
            skip_reader.block_info(),
            BlockInfo::BitPacked {
                doc_num_bits: 5u8,
+                strict_delta_encoded: true,
                tf_num_bits: 2u8,
                tf_sum: 0,
                block_wand_fieldnorm_id: 8,
@@ -352,6 +376,7 @@ mod tests {
            skip_reader.block_info(),
            BlockInfo::BitPacked {
                doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                tf_num_bits: 0,
                tf_sum: 0u32,
                block_wand_fieldnorm_id: 0,
@@ -364,6 +389,7 @@ mod tests {
            skip_reader.block_info(),
            BlockInfo::BitPacked {
                doc_num_bits: 5u8,
+                strict_delta_encoded: true,
                tf_num_bits: 0,
                tf_sum: 0u32,
                block_wand_fieldnorm_id: 0,
@@ -393,6 +419,7 @@ mod tests {
            skip_reader.block_info(),
            BlockInfo::BitPacked {
                doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                tf_num_bits: 0,
                tf_sum: 0u32,
                block_wand_fieldnorm_id: 0,
@@ -402,4 +429,18 @@ mod tests {
        skip_reader.advance();
        assert_eq!(skip_reader.block_info(), BlockInfo::VInt { num_docs: 0u32 });
    }
+
+    #[test]
+    fn test_encode_decode_bitwidth() {
+        for bitwidth in 0..=32 {
+            for delta_1 in [false, true] {
+                assert_eq!(
+                    (bitwidth, delta_1),
+                    decode_bitwidth(encode_bitwidth(bitwidth, delta_1))
+                );
+            }
+        }
+        assert_eq!(0b01000010, encode_bitwidth(0b10, true));
+        assert_eq!(0b00000010, encode_bitwidth(0b10, false));
+    }
 }
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -93,7 +93,7 @@ impl TermScorer {
    }

    pub fn last_doc_in_block(&self) -> DocId {
-        self.postings.block_cursor.skip_reader.last_doc_in_block()
+        self.postings.block_cursor.skip_reader().last_doc_in_block()
    }
 }