From 0d4589219bcf8b4f10326e599c23ac102d28e42b Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Fri, 20 Oct 2023 16:58:26 +0200 Subject: [PATCH] encode some part of posting list as -1 instead of direct values (#2185) * add support for delta-1 encoding posting list * encode term frequency minus one * don't emit tf for json integer terms * make skipreader not pub(crate) mutable --- Cargo.toml | 2 +- src/indexer/mod.rs | 29 ++++++ src/positions/reader.rs | 2 +- src/positions/serializer.rs | 5 +- src/postings/block_segment_postings.rs | 84 ++++++++++++----- src/postings/compression/mod.rs | 123 +++++++++++++++++++------ src/postings/serializer.rs | 11 ++- src/postings/skip.rs | 47 +++++++++- src/query/term_query/term_scorer.rs | 2 +- 9 files changed, 240 insertions(+), 65 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 637b2e9c4..0e4457b83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ uuid = { version = "1.0.0", features = ["v4", "serde"] } crossbeam-channel = "0.5.4" rust-stemmers = "1.2.0" downcast-rs = "1.2.0" -bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] } +bitpacking = { git = "https://github.com/quickwit-oss/bitpacking", rev = "f730b75", default-features = false, features = ["bitpacker4x"] } census = "0.4.0" rustc-hash = "1.1.0" thiserror = "1.0.30" diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index d5ae094b5..666909391 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -109,6 +109,35 @@ mod tests_mmap { } } + #[test] + fn test_json_field_number() { + // this test was added specifically to reach some cases related to using json fields, with + // frequency enabled, to store integers, with enough documents containing a single integer + // that the posting list can be bitpacked. + let mut schema_builder = Schema::builder(); + + let json_field = schema_builder.add_json_field("json", TEXT); + let index = Index::create_in_ram(schema_builder.build()); + let mut index_writer = index.writer_for_tests().unwrap(); + for _ in 0..256 { + let json = serde_json::json!({"somekey": 1u64, "otherkey": -2i64}); + index_writer.add_document(doc!(json_field=>json)).unwrap(); + + let json = serde_json::json!({"somekey": "1str", "otherkey": "2str"}); + index_writer.add_document(doc!(json_field=>json)).unwrap(); + } + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + assert_eq!(searcher.num_docs(), 512); + let parse_query = QueryParser::for_index(&index, Vec::new()); + { + let query = parse_query.parse_query(r"json.somekey:1").unwrap(); + let num_docs = searcher.search(&query, &Count).unwrap(); + assert_eq!(num_docs, 256); + } + } + #[test] fn test_json_field_expand_dots_enabled_dot_escape_not_required() { let mut schema_builder = Schema::builder(); diff --git a/src/positions/reader.rs b/src/positions/reader.rs index bb2d35c1b..0f5e97dc5 100644 --- a/src/positions/reader.rs +++ b/src/positions/reader.rs @@ -92,7 +92,7 @@ impl PositionReader { // that block is bitpacked. let bit_width = bit_widths[block_rel_id]; self.block_decoder - .uncompress_block_unsorted(compressed_data, bit_width); + .uncompress_block_unsorted(compressed_data, bit_width, false); } else { // that block is vint encoded. self.block_decoder diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs index eee31d5cd..f41923e8b 100644 --- a/src/positions/serializer.rs +++ b/src/positions/serializer.rs @@ -62,8 +62,9 @@ impl PositionSerializer { return; } if self.block.len() == COMPRESSION_BLOCK_SIZE { - let (bit_width, block_encoded): (u8, &[u8]) = - self.block_encoder.compress_block_unsorted(&self.block[..]); + let (bit_width, block_encoded): (u8, &[u8]) = self + .block_encoder + .compress_block_unsorted(&self.block[..], false); self.bit_widths.push(bit_width); self.positions_buffer.extend(block_encoded); } else { diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs index bf131627c..366809a95 100644 --- a/src/postings/block_segment_postings.rs +++ b/src/postings/block_segment_postings.rs @@ -24,13 +24,13 @@ fn max_score>(mut it: I) -> Option { #[derive(Clone)] pub struct BlockSegmentPostings { pub(crate) doc_decoder: BlockDecoder, - loaded_offset: usize, + block_loaded: bool, freq_decoder: BlockDecoder, freq_reading_option: FreqReadingOption, block_max_score_cache: Option, doc_freq: u32, data: OwnedBytes, - pub(crate) skip_reader: SkipReader, + skip_reader: SkipReader, } fn decode_bitpacked_block( @@ -40,10 +40,16 @@ fn decode_bitpacked_block( doc_offset: DocId, doc_num_bits: u8, tf_num_bits: u8, + strict_delta: bool, ) { - let num_consumed_bytes = doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits); + let num_consumed_bytes = + doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits, strict_delta); if let Some(freq_decoder) = freq_decoder_opt { - freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits); + freq_decoder.uncompress_block_unsorted( + &data[num_consumed_bytes..], + tf_num_bits, + strict_delta, + ); } } @@ -57,11 +63,15 @@ fn decode_vint_block( let num_consumed_bytes = doc_decoder.uncompress_vint_sorted(data, doc_offset, num_vint_docs, TERMINATED); if let Some(freq_decoder) = freq_decoder_opt { - freq_decoder.uncompress_vint_unsorted( - &data[num_consumed_bytes..], - num_vint_docs, - TERMINATED, - ); + // if it's a json term with freq, containing less than 256 docs, we can reach here thinking + // we have a freq, despite not really having one. + if data.len() > num_consumed_bytes { + freq_decoder.uncompress_vint_unsorted( + &data[num_consumed_bytes..], + num_vint_docs, + TERMINATED, + ); + } } } @@ -78,28 +88,46 @@ fn split_into_skips_and_postings( } impl BlockSegmentPostings { + /// Opens a `BlockSegmentPostings`. + /// `doc_freq` is the number of documents in the posting list. + /// `record_option` represents the amount of data available according to the schema. + /// `requested_option` is the amount of data requested by the user. + /// If for instance, we do not request for term frequencies, this function will not decompress + /// term frequency blocks. pub(crate) fn open( doc_freq: u32, data: FileSlice, - record_option: IndexRecordOption, + mut record_option: IndexRecordOption, requested_option: IndexRecordOption, ) -> io::Result { + let bytes = data.read_bytes()?; + let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?; + let skip_reader = match skip_data_opt { + Some(skip_data) => { + let block_count = doc_freq as usize / COMPRESSION_BLOCK_SIZE; + // 8 is the minimum size of a block with frequency (can be more if pos are stored + // too) + if skip_data.len() < 8 * block_count { + // the field might be encoded with frequency, but this term in particular isn't. + // This can happen for JSON field with term frequencies: + // - text terms are encoded with term freqs. + // - numerical terms are encoded without term freqs. + record_option = IndexRecordOption::Basic; + } + SkipReader::new(skip_data, doc_freq, record_option) + } + None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option), + }; + let freq_reading_option = match (record_option, requested_option) { (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq, (_, IndexRecordOption::Basic) => FreqReadingOption::SkipFreq, (_, _) => FreqReadingOption::ReadFreq, }; - let bytes = data.read_bytes()?; - let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, bytes)?; - let skip_reader = match skip_data_opt { - Some(skip_data) => SkipReader::new(skip_data, doc_freq, record_option), - None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option), - }; - let mut block_segment_postings = BlockSegmentPostings { doc_decoder: BlockDecoder::with_val(TERMINATED), - loaded_offset: usize::MAX, + block_loaded: false, freq_decoder: BlockDecoder::with_val(1), freq_reading_option, block_max_score_cache: None, @@ -169,7 +197,7 @@ impl BlockSegmentPostings { split_into_skips_and_postings(doc_freq, postings_data)?; self.data = postings_data; self.block_max_score_cache = None; - self.loaded_offset = usize::MAX; + self.block_loaded = false; if let Some(skip_data) = skip_data_opt { self.skip_reader.reset(skip_data, doc_freq); } else { @@ -265,22 +293,23 @@ impl BlockSegmentPostings { pub(crate) fn shallow_seek(&mut self, target_doc: DocId) { if self.skip_reader.seek(target_doc) { self.block_max_score_cache = None; + self.block_loaded = false; } } pub(crate) fn block_is_loaded(&self) -> bool { - self.loaded_offset == self.skip_reader.byte_offset() + self.block_loaded } pub(crate) fn load_block(&mut self) { let offset = self.skip_reader.byte_offset(); - if self.loaded_offset == offset { + if self.block_is_loaded() { return; } - self.loaded_offset = offset; match self.skip_reader.block_info() { BlockInfo::BitPacked { doc_num_bits, + strict_delta_encoded, tf_num_bits, .. } => { @@ -295,6 +324,7 @@ impl BlockSegmentPostings { self.skip_reader.last_doc_in_previous_block, doc_num_bits, tf_num_bits, + strict_delta_encoded, ); } BlockInfo::VInt { num_docs } => { @@ -318,13 +348,13 @@ impl BlockSegmentPostings { ); } } + self.block_loaded = true; } /// Advance to the next block. - /// - /// Returns false if and only if there is no remaining block. pub fn advance(&mut self) { self.skip_reader.advance(); + self.block_loaded = false; self.block_max_score_cache = None; self.load_block(); } @@ -333,7 +363,7 @@ impl BlockSegmentPostings { pub fn empty() -> BlockSegmentPostings { BlockSegmentPostings { doc_decoder: BlockDecoder::with_val(TERMINATED), - loaded_offset: 0, + block_loaded: true, freq_decoder: BlockDecoder::with_val(1), freq_reading_option: FreqReadingOption::NoFreq, block_max_score_cache: None, @@ -342,6 +372,10 @@ impl BlockSegmentPostings { skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic), } } + + pub(crate) fn skip_reader(&self) -> &SkipReader { + &self.skip_reader + } } #[cfg(test)] diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs index 7e2691480..f8a8a3193 100644 --- a/src/postings/compression/mod.rs +++ b/src/postings/compression/mod.rs @@ -33,14 +33,40 @@ impl BlockEncoder { } pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> (u8, &[u8]) { - let num_bits = self.bitpacker.num_bits_sorted(offset, block); + // if offset is zero, convert it to None. This is correct as long as we do the same when + // decompressing. It's required in case the block starts with an actual zero. + let offset = if offset == 0u32 { None } else { Some(offset) }; + + let num_bits = self.bitpacker.num_bits_strictly_sorted(offset, block); let written_size = self.bitpacker - .compress_sorted(offset, block, &mut self.output[..], num_bits); + .compress_strictly_sorted(offset, block, &mut self.output[..], num_bits); (num_bits, &self.output[..written_size]) } - pub fn compress_block_unsorted(&mut self, block: &[u32]) -> (u8, &[u8]) { + /// Compress a single block of unsorted numbers. + /// + /// If `minus_one_encoded` is set, each value must be >= 1, and will be encoded in a sligly + /// more compact format. This is useful for some values where 0 isn't a correct value, such + /// as term frequency, but isn't correct for some usages like position lists, where 0 can + /// appear. + pub fn compress_block_unsorted( + &mut self, + block: &[u32], + minus_one_encoded: bool, + ) -> (u8, &[u8]) { + debug_assert!(!minus_one_encoded || !block.contains(&0)); + + let mut block_minus_one = [0; COMPRESSION_BLOCK_SIZE]; + let block = if minus_one_encoded { + for (elem_min_one, elem) in block_minus_one.iter_mut().zip(block) { + *elem_min_one = elem - 1; + } + &block_minus_one + } else { + block + }; + let num_bits = self.bitpacker.num_bits(block); let written_size = self .bitpacker @@ -71,21 +97,55 @@ impl BlockDecoder { } } + /// Decompress block of sorted integers. + /// + /// `strict_delta` depends on what encoding was used. Older version of tantivy never use strict + /// deltas, newer versions always use them. pub fn uncompress_block_sorted( &mut self, compressed_data: &[u8], offset: u32, num_bits: u8, + strict_delta: bool, ) -> usize { - self.output_len = COMPRESSION_BLOCK_SIZE; - self.bitpacker - .decompress_sorted(offset, compressed_data, &mut self.output, num_bits) + if strict_delta { + let offset = std::num::NonZeroU32::new(offset).map(std::num::NonZeroU32::get); + + self.output_len = COMPRESSION_BLOCK_SIZE; + self.bitpacker.decompress_strictly_sorted( + offset, + compressed_data, + &mut self.output, + num_bits, + ) + } else { + self.output_len = COMPRESSION_BLOCK_SIZE; + self.bitpacker + .decompress_sorted(offset, compressed_data, &mut self.output, num_bits) + } } - pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize { + /// Decompress block of unsorted integers. + /// + /// `minus_one_encoded` depends on what encoding was used. Older version of tantivy never use + /// that encoding. Newer version use it for some structures, but not all. See the corresponding + /// call to `BlockEncoder::compress_block_unsorted`. + pub fn uncompress_block_unsorted( + &mut self, + compressed_data: &[u8], + num_bits: u8, + minus_one_encoded: bool, + ) -> usize { self.output_len = COMPRESSION_BLOCK_SIZE; - self.bitpacker - .decompress(compressed_data, &mut self.output, num_bits) + let res = self + .bitpacker + .decompress(compressed_data, &mut self.output, num_bits); + if minus_one_encoded { + for val in &mut self.output { + *val += 1; + } + } + res } #[inline] @@ -218,7 +278,8 @@ pub mod tests { let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 0); let mut decoder = BlockDecoder::default(); { - let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 0, num_bits); + let consumed_num_bytes = + decoder.uncompress_block_sorted(compressed_data, 0, num_bits, true); assert_eq!(consumed_num_bytes, compressed_data.len()); } for i in 0..128 { @@ -233,7 +294,8 @@ pub mod tests { let (num_bits, compressed_data) = encoder.compress_block_sorted(&vals, 10); let mut decoder = BlockDecoder::default(); { - let consumed_num_bytes = decoder.uncompress_block_sorted(compressed_data, 10, num_bits); + let consumed_num_bytes = + decoder.uncompress_block_sorted(compressed_data, 10, num_bits, true); assert_eq!(consumed_num_bytes, compressed_data.len()); } for i in 0..128 { @@ -252,7 +314,8 @@ pub mod tests { compressed.push(173u8); let mut decoder = BlockDecoder::default(); { - let consumed_num_bytes = decoder.uncompress_block_sorted(&compressed, 10, num_bits); + let consumed_num_bytes = + decoder.uncompress_block_sorted(&compressed, 10, num_bits, true); assert_eq!(consumed_num_bytes, compressed.len() - 1); assert_eq!(compressed[consumed_num_bytes], 173u8); } @@ -263,21 +326,25 @@ pub mod tests { #[test] fn test_encode_unsorted_block_with_junk() { - let mut compressed: Vec = Vec::new(); - let n = 128; - let vals: Vec = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect(); - let mut encoder = BlockEncoder::default(); - let (num_bits, compressed_data) = encoder.compress_block_unsorted(&vals); - compressed.extend_from_slice(compressed_data); - compressed.push(173u8); - let mut decoder = BlockDecoder::default(); - { - let consumed_num_bytes = decoder.uncompress_block_unsorted(&compressed, num_bits); - assert_eq!(consumed_num_bytes + 1, compressed.len()); - assert_eq!(compressed[consumed_num_bytes], 173u8); - } - for i in 0..n { - assert_eq!(vals[i], decoder.output(i)); + for minus_one_encode in [false, true] { + let mut compressed: Vec = Vec::new(); + let n = 128; + let vals: Vec = (0..n).map(|i| 11u32 + (i as u32) * 7u32 % 12).collect(); + let mut encoder = BlockEncoder::default(); + let (num_bits, compressed_data) = + encoder.compress_block_unsorted(&vals, minus_one_encode); + compressed.extend_from_slice(compressed_data); + compressed.push(173u8); + let mut decoder = BlockDecoder::default(); + { + let consumed_num_bytes = + decoder.uncompress_block_unsorted(&compressed, num_bits, minus_one_encode); + assert_eq!(consumed_num_bytes + 1, compressed.len()); + assert_eq!(compressed[consumed_num_bytes], 173u8); + } + for i in 0..n { + assert_eq!(vals[i], decoder.output(i)); + } } } @@ -344,7 +411,7 @@ mod bench { let (num_bits, compressed) = encoder.compress_block_sorted(&data, 0u32); let mut decoder = BlockDecoder::default(); b.iter(|| { - decoder.uncompress_block_sorted(compressed, 0u32, num_bits); + decoder.uncompress_block_sorted(compressed, 0u32, num_bits, true); }); } diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index afaedf0fd..bf0d4d2ef 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -301,6 +301,7 @@ pub struct PostingsSerializer { bm25_weight: Option, avg_fieldnorm: Score, /* Average number of term in the field for that segment. * this value is used to compute the block wand information. */ + term_has_freq: bool, } impl PostingsSerializer { @@ -325,13 +326,15 @@ impl PostingsSerializer { fieldnorm_reader, bm25_weight: None, avg_fieldnorm, + term_has_freq: false, } } pub fn new_term(&mut self, term_doc_freq: u32) { self.bm25_weight = None; - if !self.mode.has_freq() { + self.term_has_freq = self.mode.has_freq() && term_doc_freq != 0; + if !self.term_has_freq { return; } @@ -365,10 +368,10 @@ impl PostingsSerializer { // last el block 0, offset block 1, self.postings_write.extend(block_encoded); } - if self.mode.has_freq() { + if self.term_has_freq { let (num_bits, block_encoded): (u8, &[u8]) = self .block_encoder - .compress_block_unsorted(self.block.term_freqs()); + .compress_block_unsorted(self.block.term_freqs(), true); self.postings_write.extend(block_encoded); self.skip_write.write_term_freq(num_bits); if self.mode.has_positions() { @@ -432,7 +435,7 @@ impl PostingsSerializer { self.postings_write.write_all(block_encoded)?; } // ... Idem for term frequencies - if self.mode.has_freq() { + if self.term_has_freq { let block_encoded = self .block_encoder .compress_vint_unsorted(self.block.term_freqs()); diff --git a/src/postings/skip.rs b/src/postings/skip.rs index 1da70d376..1f5eb3577 100644 --- a/src/postings/skip.rs +++ b/src/postings/skip.rs @@ -6,6 +6,22 @@ use crate::query::Bm25Weight; use crate::schema::IndexRecordOption; use crate::{DocId, Score, TERMINATED}; +// doc num bits uses the following encoding: +// given 0b a b cdefgh +// |1|2| 3 | +// - 1: unused +// - 2: is delta-1 encoded. 0 if not, 1, if yes +// - 3: a 6 bit number in 0..=32, the actual bitwidth +fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 { + bitwidth | ((delta_1 as u8) << 6) +} + +fn decode_bitwidth(raw_bitwidth: u8) -> (u8, bool) { + let delta_1 = (raw_bitwidth >> 6 & 1) != 0; + let bitwidth = raw_bitwidth & 0x3f; + (bitwidth, delta_1) +} + #[inline] fn encode_block_wand_max_tf(max_tf: u32) -> u8 { max_tf.min(u8::MAX as u32) as u8 @@ -41,7 +57,7 @@ impl SkipSerializer { pub fn write_doc(&mut self, last_doc: DocId, doc_num_bits: u8) { write_u32(last_doc, &mut self.buffer); - self.buffer.push(doc_num_bits); + self.buffer.push(encode_bitwidth(doc_num_bits, true)); } pub fn write_term_freq(&mut self, tf_num_bits: u8) { @@ -85,6 +101,7 @@ pub(crate) struct SkipReader { pub(crate) enum BlockInfo { BitPacked { doc_num_bits: u8, + strict_delta_encoded: bool, tf_num_bits: u8, tf_sum: u32, block_wand_fieldnorm_id: u8, @@ -172,12 +189,13 @@ impl SkipReader { let bytes = self.owned_read.as_slice(); let advance_len: usize; self.last_doc_in_block = read_u32(bytes); - let doc_num_bits = bytes[4]; + let (doc_num_bits, strict_delta_encoded) = decode_bitwidth(bytes[4]); match self.skip_info { IndexRecordOption::Basic => { advance_len = 5; self.block_info = BlockInfo::BitPacked { doc_num_bits, + strict_delta_encoded, tf_num_bits: 0, tf_sum: 0, block_wand_fieldnorm_id: 0, @@ -191,6 +209,7 @@ impl SkipReader { advance_len = 8; self.block_info = BlockInfo::BitPacked { doc_num_bits, + strict_delta_encoded, tf_num_bits, tf_sum: 0, block_wand_fieldnorm_id, @@ -205,6 +224,7 @@ impl SkipReader { advance_len = 12; self.block_info = BlockInfo::BitPacked { doc_num_bits, + strict_delta_encoded, tf_num_bits, tf_sum, block_wand_fieldnorm_id, @@ -268,7 +288,9 @@ impl SkipReader { #[cfg(test)] mod tests { - use super::{BlockInfo, IndexRecordOption, SkipReader, SkipSerializer}; + use super::{ + decode_bitwidth, encode_bitwidth, BlockInfo, IndexRecordOption, SkipReader, SkipSerializer, + }; use crate::directory::OwnedBytes; use crate::postings::compression::COMPRESSION_BLOCK_SIZE; @@ -310,6 +332,7 @@ mod tests { skip_reader.block_info, BlockInfo::BitPacked { doc_num_bits: 2u8, + strict_delta_encoded: true, tf_num_bits: 3u8, tf_sum: 0, block_wand_fieldnorm_id: 13, @@ -322,6 +345,7 @@ mod tests { skip_reader.block_info(), BlockInfo::BitPacked { doc_num_bits: 5u8, + strict_delta_encoded: true, tf_num_bits: 2u8, tf_sum: 0, block_wand_fieldnorm_id: 8, @@ -352,6 +376,7 @@ mod tests { skip_reader.block_info(), BlockInfo::BitPacked { doc_num_bits: 2u8, + strict_delta_encoded: true, tf_num_bits: 0, tf_sum: 0u32, block_wand_fieldnorm_id: 0, @@ -364,6 +389,7 @@ mod tests { skip_reader.block_info(), BlockInfo::BitPacked { doc_num_bits: 5u8, + strict_delta_encoded: true, tf_num_bits: 0, tf_sum: 0u32, block_wand_fieldnorm_id: 0, @@ -393,6 +419,7 @@ mod tests { skip_reader.block_info(), BlockInfo::BitPacked { doc_num_bits: 2u8, + strict_delta_encoded: true, tf_num_bits: 0, tf_sum: 0u32, block_wand_fieldnorm_id: 0, @@ -402,4 +429,18 @@ mod tests { skip_reader.advance(); assert_eq!(skip_reader.block_info(), BlockInfo::VInt { num_docs: 0u32 }); } + + #[test] + fn test_encode_decode_bitwidth() { + for bitwidth in 0..=32 { + for delta_1 in [false, true] { + assert_eq!( + (bitwidth, delta_1), + decode_bitwidth(encode_bitwidth(bitwidth, delta_1)) + ); + } + } + assert_eq!(0b01000010, encode_bitwidth(0b10, true)); + assert_eq!(0b00000010, encode_bitwidth(0b10, false)); + } } diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 2ec31794a..ef82a31d8 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -93,7 +93,7 @@ impl TermScorer { } pub fn last_doc_in_block(&self) -> DocId { - self.postings.block_cursor.skip_reader.last_doc_in_block() + self.postings.block_cursor.skip_reader().last_doc_in_block() } }