From 0d4589219bcf8b4f10326e599c23ac102d28e42b Mon Sep 17 00:00:00 2001
From: trinity-1686a <trinity@quickwit.io>
Date: Fri, 20 Oct 2023 16:58:26 +0200
Subject: [PATCH] encode some part of posting list as -1 instead of direct
 values (#2185)

* add support for delta-1 encoding posting list

* encode term frequency minus one

* don't emit tf for json integer terms

* make skipreader not pub(crate) mutable
---
 Cargo.toml                             |   2 +-
 src/indexer/mod.rs                     |  29 ++++++
 src/positions/reader.rs                |   2 +-
 src/positions/serializer.rs            |   5 +-
 src/postings/block_segment_postings.rs |  84 ++++++++++++-----
 src/postings/compression/mod.rs        | 123 +++++++++++++++++++------
 src/postings/serializer.rs             |  11 ++-
 src/postings/skip.rs                   |  47 +++++++++-
 src/query/term_query/term_scorer.rs    |   2 +-
 9 files changed, 240 insertions(+), 65 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 637b2e9c4..0e4457b83 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
-bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
+bitpacking = { git = "https://github.com/quickwit-oss/bitpacking", rev = "f730b75", default-features = false, features = ["bitpacker4x"] }
 census = "0.4.0"
 rustc-hash = "1.1.0"
 thiserror = "1.0.30"
diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs
index d5ae094b5..666909391 100644
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -109,6 +109,35 @@ mod tests_mmap {
         }
     }
 
+    #[test]
+    fn test_json_field_number() {
+        // this test was added specifically to reach some cases related to using json fields, with
+        // frequency enabled, to store integers, with enough documents containing a single integer
+        // that the posting list can be bitpacked.
+        let mut schema_builder = Schema::builder();
+
+        let json_field = schema_builder.add_json_field("json", TEXT);
+        let index = Index::create_in_ram(schema_builder.build());
+        let mut index_writer = index.writer_for_tests().unwrap();
+        for _ in 0..256 {
+            let json = serde_json::json!({"somekey": 1u64, "otherkey": -2i64});
+            index_writer.add_document(doc!(json_field=>json)).unwrap();
+
+            let json = serde_json::json!({"somekey": "1str", "otherkey": "2str"});
+            index_writer.add_document(doc!(json_field=>json)).unwrap();
+        }
+        index_writer.commit().unwrap();
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        assert_eq!(searcher.num_docs(), 512);
+        let parse_query = QueryParser::for_index(&index, Vec::new());
+        {
+            let query = parse_query.parse_query(r"json.somekey:1").unwrap();
+            let num_docs = searcher.search(&query, &Count).unwrap();
+            assert_eq!(num_docs, 256);
+        }
+    }
+
     #[test]
     fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
         let mut schema_builder = Schema::builder();
diff --git a/src/positions/reader.rs b/src/positions/reader.rs
index bb2d35c1b..0f5e97dc5 100644
--- a/src/positions/reader.rs
+++ b/src/positions/reader.rs
@@ -92,7 +92,7 @@ impl PositionReader {
             // that block is bitpacked.
             let bit_width = bit_widths[block_rel_id];
             self.block_decoder
-                .uncompress_block_unsorted(compressed_data, bit_width);
+                .uncompress_block_unsorted(compressed_data, bit_width, false);
         } else {
             // that block is vint encoded.
             self.block_decoder
diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs
index eee31d5cd..f41923e8b 100644
--- a/src/positions/serializer.rs
+++ b/src/positions/serializer.rs
@@ -62,8 +62,9 @@ impl<W: io::Write> PositionSerializer<W> {
             return;
         }
         if self.block.len() == COMPRESSION_BLOCK_SIZE {
-            let (bit_width, block_encoded): (u8, &[u8]) =
-                self.block_encoder.compress_block_unsorted(&self.block[..]);
+            let (bit_width, block_encoded): (u8, &[u8]) = self
+                .block_encoder
+                .compress_block_unsorted(&self.block[..], false);
             self.bit_widths.push(bit_width);
             self.positions_buffer.extend(block_encoded);
         } else {
diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs
index bf131627c..366809a95 100644
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -24,13 +24,13 @@ fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
 #[derive(Clone)]
 pub struct BlockSegmentPostings {
     pub(crate) doc_decoder: BlockDecoder,
-    loaded_offset: usize,
+    block_loaded: bool,
     freq_decoder: BlockDecoder,
     freq_reading_option: FreqReadingOption,
     block_max_score_cache: Option<Score>,
     doc_freq: u32,
     data: OwnedBytes,
-    pub(crate) skip_reader: SkipReader,
+    skip_reader: SkipReader,
 }
 
 fn decode_bitpacked_block(
@@ -40,10 +40,16 @@ fn decode_bitpacked_block(
     doc_offset: DocId,
     doc_num_bits: u8,
     tf_num_bits: u8,
+    strict_delta: bool,
 ) {
-    let num_consumed_bytes = doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits);
+    let num_consumed_bytes =
+        doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits, strict_delta);
     if let Some(freq_decoder) = freq_decoder_opt {
-        freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits);
+        freq_decoder.uncompress_block_unsorted(
+            &data[num_consumed_bytes..],
+            tf_num_bits,
+            strict_delta,
+        );
     }
 }
 
@@ -57,11 +63,15 @@ fn decode_vint_block(
     let num_consumed_bytes =
         doc_decoder.uncompress_vint_sorted(data, doc_offset, num_vint_docs, TERMINATED);
     if let Some(freq_decoder) = freq_decoder_opt {
-        freq_decoder.uncompress_vint_unsorted(
-            &data[num_consumed_bytes..],
-            num_vint_docs,
-            TERMINATED,
-        );
+        // if it's a json term with freq, containing less than 256 docs, we can reach here thinking
+        // we have a freq, despite not really having one.
+        if data.len() > num_consumed_bytes {
+            freq_decoder.uncompress_vint_unsorted(
+                &data[num_consumed_bytes..],
+                num_vint_docs,
+                TERMINATED,
+            );
+        }
     }
 }
 
@@ -78,28 +88,46 @@ fn split_into_skips_and_postings(
 }
 
 impl BlockSegmentPostings {
+    /// Opens a `BlockSegmentPostings`.
+    /// `doc_freq` is the number of documents in the posting list.
+    /// `record_option` represents the amount of data available according to the schema.
+    /// `requested_option` is the amount of data requested by the user.
+    /// If for instance, we do not request for term frequencies, this function will not decompress
+    /// term frequency blocks.
     pub(crate) fn open(
         doc_freq: u32,
         data: FileSlice,
-        record_option: IndexRecordOption,
+        mut record_option: IndexRecordOption,
         requested_option: IndexRecordOption,
     ) -> io::Result<BlockSegmentPostings> {
+        let bytes = data.read_bytes()?;
+        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
+        let skip_reader = match skip_data_opt {
+            Some(skip_data) => {
+                let block_count = doc_freq as usize / COMPRESSION_BLOCK_SIZE;
+                // 8 is the minimum size of a block with frequency (can be more if pos are stored
+                // too)
+                if skip_data.len() < 8 * block_count {
+                    // the field might be encoded with frequency, but this term in particular isn't.
+                    // This can happen for JSON field with term frequencies:
+                    // - text terms are encoded with term freqs.
+                    // - numerical terms are encoded without term freqs.
+                    record_option = IndexRecordOption::Basic;
+                }
+                SkipReader::new(skip_data, doc_freq, record_option)
+            }
+            None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option),
+        };
+
         let freq_reading_option = match (record_option, requested_option) {
             (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq,
             (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq,
             (_, _) => FreqReadingOption::ReadFreq,
         };
 
-        let bytes = data.read_bytes()?;
-        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?;
-        let skip_reader = match skip_data_opt {
-            Some(skip_data) => SkipReader::new(skip_data, doc_freq, record_option),
-            None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option),
-        };
-
         let mut block_segment_postings = BlockSegmentPostings {
             doc_decoder: BlockDecoder::with_val(TERMINATED),
-            loaded_offset: usize::MAX,
+            block_loaded: false,
             freq_decoder: BlockDecoder::with_val(1),
             freq_reading_option,
             block_max_score_cache: None,
@@ -169,7 +197,7 @@ impl BlockSegmentPostings {
             split_into_skips_and_postings(doc_freq, postings_data)?;
         self.data = postings_data;
         self.block_max_score_cache = None;
-        self.loaded_offset = usize::MAX;
+        self.block_loaded = false;
         if let Some(skip_data) = skip_data_opt {
             self.skip_reader.reset(skip_data, doc_freq);
         } else {
@@ -265,22 +293,23 @@ impl BlockSegmentPostings {
     pub(crate) fn shallow_seek(&mut self, target_doc: DocId) {
         if self.skip_reader.seek(target_doc) {
             self.block_max_score_cache = None;
+            self.block_loaded = false;
         }
     }
 
     pub(crate) fn block_is_loaded(&self) -> bool {
-        self.loaded_offset == self.skip_reader.byte_offset()
+        self.block_loaded
     }
 
     pub(crate) fn load_block(&mut self) {
         let offset = self.skip_reader.byte_offset();
-        if self.loaded_offset == offset {
+        if self.block_is_loaded() {
             return;
         }
-        self.loaded_offset = offset;
         match self.skip_reader.block_info() {
             BlockInfo::BitPacked {
                 doc_num_bits,
+                strict_delta_encoded,
                 tf_num_bits,
                 ..
             } => {
@@ -295,6 +324,7 @@ impl BlockSegmentPostings {
                     self.skip_reader.last_doc_in_previous_block,
                     doc_num_bits,
                     tf_num_bits,
+                    strict_delta_encoded,
                 );
             }
             BlockInfo::VInt { num_docs } => {
@@ -318,13 +348,13 @@ impl BlockSegmentPostings {
                 );
             }
         }
+        self.block_loaded = true;
     }
 
     /// Advance to the next block.
-    ///
-    /// Returns false if and only if there is no remaining block.
     pub fn advance(&mut self) {
         self.skip_reader.advance();
+        self.block_loaded = false;
         self.block_max_score_cache = None;
         self.load_block();
     }
@@ -333,7 +363,7 @@ impl BlockSegmentPostings {
     pub fn empty() -> BlockSegmentPostings {
         BlockSegmentPostings {
             doc_decoder: BlockDecoder::with_val(TERMINATED),
-            loaded_offset: 0,
+            block_loaded: true,
             freq_decoder: BlockDecoder::with_val(1),
             freq_reading_option: FreqReadingOption::NoFreq,
             block_max_score_cache: None,
@@ -342,6 +372,10 @@ impl BlockSegmentPostings {
             skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
         }
     }
+
+    pub(crate) fn skip_reader(&self) -> &SkipReader {
+        &self.skip_reader
+    }
 }
 
 #[cfg(test)]
diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs
index 7e2691480..f8a8a3193 100644
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -33,14 +33,40 @@ impl BlockEncoder {
     }
 
     pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> (u8, &[u8]) {
-        let num_bits = self.bitpacker.num_bits_sorted(offset, block);
+        // if offset is zero, convert it to None. This is correct as long as we do the same when
+        // decompressing. It's required in case the block starts with an actual zero.
+        let offset = if offset == 0u32 { None } else { Some(offset) };
+
+        let num_bits = self.bitpacker.num_bits_strictly_sorted(offset, block);
         let written_size =
             self.bitpacker
-                .compress_sorted(offset, block, &mut self.output[..], num_bits);
+                .compress_strictly_sorted(offset, block, &mut self.output[..], num_bits);
         (num_bits, &self.output[..written_size])
     }
 
-    pub fn compress_block_unsorted(&mut self, block: &[u32]) -> (u8, &[u8]) {
+    /// Compress a single block of unsorted numbers.
+    ///
+    /// If `minus_one_encoded` is set, each value must be >= 1, and will be encoded in a sligly
+    /// more compact format. This is useful for some values where 0 isn't a correct value, such
+    /// as term frequency, but isn't correct for some usages like position lists, where 0 can
+    /// appear.
+    pub fn compress_block_unsorted(
+        &mut self,
+        block: &[u32],
+        minus_one_encoded: bool,
+    ) -> (u8, &[u8]) {
+        debug_assert!(!minus_one_encoded || !block.contains(&0));
+
+        let mut block_minus_one = [0; COMPRESSION_BLOCK_SIZE];
+        let block = if minus_one_encoded {
+            for (elem_min_one, elem) in block_minus_one.iter_mut().zip(block) {
+                *elem_min_one = elem - 1;
+            }
+            &block_minus_one
+        } else {
+            block
+        };
+
         let num_bits = self.bitpacker.num_bits(block);
         let written_size = self
             .bitpacker
@@ -71,21 +97,55 @@ impl BlockDecoder {
         }
     }
 
+    /// Decompress block of sorted integers.
+    ///
+    /// `strict_delta` depends on what encoding was used. Older version of tantivy never use strict
+    /// deltas, newer versions always use them.
     pub fn uncompress_block_sorted(
         &mut self,
         compressed_data: &[u8],
         offset: u32,
         num_bits: u8,
+        strict_delta: bool,
     ) -> usize {
-        self.output_len = COMPRESSION_BLOCK_SIZE;
-        self.bitpacker
-            .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
+        if strict_delta {
+            let offset = std::num::NonZeroU32::new(offset).map(std::num::NonZeroU32::get);
+
+            self.output_len = COMPRESSION_BLOCK_SIZE;
+            self.bitpacker.decompress_strictly_sorted(
+                offset,
+                compressed_data,
+                &mut self.output,
+                num_bits,
+            )
+        } else {
+            self.output_len = COMPRESSION_BLOCK_SIZE;
+            self.bitpacker
+                .decompress_sorted(offset, compressed_data, &mut self.output, num_bits)
+        }
     }
 
-    pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize {
+    /// Decompress block of unsorted integers.
+    ///
+    /// `minus_one_encoded` depends on what encoding was used. Older version of tantivy never use
+    /// that encoding. Newer version use it for some structures, but not all. See the corresponding
+    /// call to `BlockEncoder::compress_block_unsorted`.
+    pub fn uncompress_block_unsorted(
+        &mut self,
+        compressed_data: &[u8],
+        num_bits: u8,
+        minus_one_encoded: bool,
+    ) -> usize {
         self.output_len = COMPRESSION_BLOCK_SIZE;
-        self.bitpacker
-            .decompress(compressed_data, &mut self.output, num_bits)
+        let res = self
+            .bitpacker
+            .decompress(compressed_data, &mut self.output, num_bits);
+        if minus_one_encoded {
+            for val in &mut self.output {
+                *val += 1;
+            }
+        }
+        res
     }
 
     #[inline]
@@ -218,7 +278,8 @@ pub mod tests {
         let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 0);
         let mut decoder = BlockDecoder::default();
         {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 0, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(compressed_data, 0, num_bits, true);
             assert_eq!(consumed_num_bytes, compressed_data.len());
         }
         for i in 0..128 {
@@ -233,7 +294,8 @@ pub mod tests {
         let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 10);
         let mut decoder = BlockDecoder::default();
         {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 10, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(compressed_data, 10, num_bits, true);
             assert_eq!(consumed_num_bytes, compressed_data.len());
         }
         for i in 0..128 {
@@ -252,7 +314,8 @@ pub mod tests {
         compressed.push(173u8);
         let mut decoder = BlockDecoder::default();
         {
-            let consumed_num_bytes = decoder.uncompress_block_sorted(&compressed, 10, num_bits);
+            let consumed_num_bytes =
+                decoder.uncompress_block_sorted(&compressed, 10, num_bits, true);
             assert_eq!(consumed_num_bytes, compressed.len() - 1);
             assert_eq!(compressed[consumed_num_bytes], 173u8);
         }
@@ -263,21 +326,25 @@ pub mod tests {
 
     #[test]
     fn test_encode_unsorted_block_with_junk() {
-        let mut compressed: Vec<u8> = Vec::new();
-        let n = 128;
-        let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect();
-        let mut encoder = BlockEncoder::default();
-        let (num_bits, compressed_data) = encoder.compress_block_unsorted(&vals);
-        compressed.extend_from_slice(compressed_data);
-        compressed.push(173u8);
-        let mut decoder = BlockDecoder::default();
-        {
-            let consumed_num_bytes = decoder.uncompress_block_unsorted(&compressed, num_bits);
-            assert_eq!(consumed_num_bytes + 1, compressed.len());
-            assert_eq!(compressed[consumed_num_bytes], 173u8);
-        }
-        for i in 0..n {
-            assert_eq!(vals[i], decoder.output(i));
+        for minus_one_encode in [false, true] {
+            let mut compressed: Vec<u8> = Vec::new();
+            let n = 128;
+            let vals: Vec<u32> = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect();
+            let mut encoder = BlockEncoder::default();
+            let (num_bits, compressed_data) =
+                encoder.compress_block_unsorted(&vals, minus_one_encode);
+            compressed.extend_from_slice(compressed_data);
+            compressed.push(173u8);
+            let mut decoder = BlockDecoder::default();
+            {
+                let consumed_num_bytes =
+                    decoder.uncompress_block_unsorted(&compressed, num_bits, minus_one_encode);
+                assert_eq!(consumed_num_bytes + 1, compressed.len());
+                assert_eq!(compressed[consumed_num_bytes], 173u8);
+            }
+            for i in 0..n {
+                assert_eq!(vals[i], decoder.output(i));
+            }
         }
     }
 
@@ -344,7 +411,7 @@ mod bench {
         let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32);
         let mut decoder = BlockDecoder::default();
         b.iter(|| {
-            decoder.uncompress_block_sorted(compressed, 0u32, num_bits);
+            decoder.uncompress_block_sorted(compressed, 0u32, num_bits, true);
         });
     }
 
diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs
index afaedf0fd..bf0d4d2ef 100644
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -301,6 +301,7 @@ pub struct PostingsSerializer<W: Write> {
     bm25_weight: Option<Bm25Weight>,
     avg_fieldnorm: Score, /* Average number of term in the field for that segment.
                            * this value is used to compute the block wand information. */
+    term_has_freq: bool,
 }
 
 impl<W: Write> PostingsSerializer<W> {
@@ -325,13 +326,15 @@ impl<W: Write> PostingsSerializer<W> {
             fieldnorm_reader,
             bm25_weight: None,
             avg_fieldnorm,
+            term_has_freq: false,
         }
     }
 
     pub fn new_term(&mut self, term_doc_freq: u32) {
         self.bm25_weight = None;
 
-        if !self.mode.has_freq() {
+        self.term_has_freq = self.mode.has_freq() && term_doc_freq != 0;
+        if !self.term_has_freq {
             return;
         }
 
@@ -365,10 +368,10 @@ impl<W: Write> PostingsSerializer<W> {
             // last el block 0, offset block 1,
             self.postings_write.extend(block_encoded);
         }
-        if self.mode.has_freq() {
+        if self.term_has_freq {
             let (num_bits, block_encoded): (u8, &[u8]) = self
                 .block_encoder
-                .compress_block_unsorted(self.block.term_freqs());
+                .compress_block_unsorted(self.block.term_freqs(), true);
             self.postings_write.extend(block_encoded);
             self.skip_write.write_term_freq(num_bits);
             if self.mode.has_positions() {
@@ -432,7 +435,7 @@ impl<W: Write> PostingsSerializer<W> {
                 self.postings_write.write_all(block_encoded)?;
             }
             // ... Idem for term frequencies
-            if self.mode.has_freq() {
+            if self.term_has_freq {
                 let block_encoded = self
                     .block_encoder
                     .compress_vint_unsorted(self.block.term_freqs());
diff --git a/src/postings/skip.rs b/src/postings/skip.rs
index 1da70d376..1f5eb3577 100644
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -6,6 +6,22 @@ use crate::query::Bm25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};
 
+// doc num bits uses the following encoding:
+// given 0b a b cdefgh
+//         |1|2|   3  |
+// - 1: unused
+// - 2: is delta-1 encoded. 0 if not, 1, if yes
+// - 3: a 6 bit number in 0..=32, the actual bitwidth
+fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 {
+    bitwidth | ((delta_1 as u8) << 6)
+}
+
+fn decode_bitwidth(raw_bitwidth: u8) -> (u8, bool) {
+    let delta_1 = (raw_bitwidth >> 6 & 1) != 0;
+    let bitwidth = raw_bitwidth & 0x3f;
+    (bitwidth, delta_1)
+}
+
 #[inline]
 fn encode_block_wand_max_tf(max_tf: u32) -> u8 {
     max_tf.min(u8::MAX as u32) as u8
@@ -41,7 +57,7 @@ impl SkipSerializer {
 
     pub fn write_doc(&mut self, last_doc: DocId, doc_num_bits: u8) {
         write_u32(last_doc, &mut self.buffer);
-        self.buffer.push(doc_num_bits);
+        self.buffer.push(encode_bitwidth(doc_num_bits, true));
     }
 
     pub fn write_term_freq(&mut self, tf_num_bits: u8) {
@@ -85,6 +101,7 @@ pub(crate) struct SkipReader {
 pub(crate) enum BlockInfo {
     BitPacked {
         doc_num_bits: u8,
+        strict_delta_encoded: bool,
         tf_num_bits: u8,
         tf_sum: u32,
         block_wand_fieldnorm_id: u8,
@@ -172,12 +189,13 @@ impl SkipReader {
         let bytes = self.owned_read.as_slice();
         let advance_len: usize;
         self.last_doc_in_block = read_u32(bytes);
-        let doc_num_bits = bytes[4];
+        let (doc_num_bits, strict_delta_encoded) = decode_bitwidth(bytes[4]);
         match self.skip_info {
             IndexRecordOption::Basic => {
                 advance_len = 5;
                 self.block_info = BlockInfo::BitPacked {
                     doc_num_bits,
+                    strict_delta_encoded,
                     tf_num_bits: 0,
                     tf_sum: 0,
                     block_wand_fieldnorm_id: 0,
@@ -191,6 +209,7 @@ impl SkipReader {
                 advance_len = 8;
                 self.block_info = BlockInfo::BitPacked {
                     doc_num_bits,
+                    strict_delta_encoded,
                     tf_num_bits,
                     tf_sum: 0,
                     block_wand_fieldnorm_id,
@@ -205,6 +224,7 @@ impl SkipReader {
                 advance_len = 12;
                 self.block_info = BlockInfo::BitPacked {
                     doc_num_bits,
+                    strict_delta_encoded,
                     tf_num_bits,
                     tf_sum,
                     block_wand_fieldnorm_id,
@@ -268,7 +288,9 @@ impl SkipReader {
 #[cfg(test)]
 mod tests {
 
-    use super::{BlockInfo, IndexRecordOption, SkipReader, SkipSerializer};
+    use super::{
+        decode_bitwidth, encode_bitwidth, BlockInfo, IndexRecordOption, SkipReader, SkipSerializer,
+    };
     use crate::directory::OwnedBytes;
     use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
 
@@ -310,6 +332,7 @@ mod tests {
             skip_reader.block_info,
             BlockInfo::BitPacked {
                 doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                 tf_num_bits: 3u8,
                 tf_sum: 0,
                 block_wand_fieldnorm_id: 13,
@@ -322,6 +345,7 @@ mod tests {
             skip_reader.block_info(),
             BlockInfo::BitPacked {
                 doc_num_bits: 5u8,
+                strict_delta_encoded: true,
                 tf_num_bits: 2u8,
                 tf_sum: 0,
                 block_wand_fieldnorm_id: 8,
@@ -352,6 +376,7 @@ mod tests {
             skip_reader.block_info(),
             BlockInfo::BitPacked {
                 doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                 tf_num_bits: 0,
                 tf_sum: 0u32,
                 block_wand_fieldnorm_id: 0,
@@ -364,6 +389,7 @@ mod tests {
             skip_reader.block_info(),
             BlockInfo::BitPacked {
                 doc_num_bits: 5u8,
+                strict_delta_encoded: true,
                 tf_num_bits: 0,
                 tf_sum: 0u32,
                 block_wand_fieldnorm_id: 0,
@@ -393,6 +419,7 @@ mod tests {
             skip_reader.block_info(),
             BlockInfo::BitPacked {
                 doc_num_bits: 2u8,
+                strict_delta_encoded: true,
                 tf_num_bits: 0,
                 tf_sum: 0u32,
                 block_wand_fieldnorm_id: 0,
@@ -402,4 +429,18 @@ mod tests {
         skip_reader.advance();
         assert_eq!(skip_reader.block_info(), BlockInfo::VInt { num_docs: 0u32 });
     }
+
+    #[test]
+    fn test_encode_decode_bitwidth() {
+        for bitwidth in 0..=32 {
+            for delta_1 in [false, true] {
+                assert_eq!(
+                    (bitwidth, delta_1),
+                    decode_bitwidth(encode_bitwidth(bitwidth, delta_1))
+                );
+            }
+        }
+        assert_eq!(0b01000010, encode_bitwidth(0b10, true));
+        assert_eq!(0b00000010, encode_bitwidth(0b10, false));
+    }
 }
diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs
index 2ec31794a..ef82a31d8 100644
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -93,7 +93,7 @@ impl TermScorer {
     }
 
     pub fn last_doc_in_block(&self) -> DocId {
-        self.postings.block_cursor.skip_reader.last_doc_in_block()
+        self.postings.block_cursor.skip_reader().last_doc_in_block()
     }
 }