From f38daab7f7e8b6cca1884d093fa862777bd58043 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 30 Apr 2021 14:47:58 +0200 Subject: [PATCH] add base value to blocked bitpacker --- bitpacker/src/blocked_bitpacker.rs | 32 ++++++++++++++++++++++-------- src/indexer/segment_serializer.rs | 5 +++++ src/indexer/segment_writer.rs | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs index ea76bae97..97c124b1d 100644 --- a/bitpacker/src/blocked_bitpacker.rs +++ b/bitpacker/src/blocked_bitpacker.rs @@ -25,12 +25,16 @@ pub struct BlockedBitpacker { #[derive(Debug, Clone, Default)] struct BlockedBitpackerEntryMetaData { encoded: u64, + base_value: u64, } impl BlockedBitpackerEntryMetaData { - fn new(offset: u64, num_bits: u8) -> Self { + fn new(offset: u64, num_bits: u8, base_value: u64) -> Self { let encoded = offset | (num_bits as u64) << (64 - 8); - Self { encoded } + Self { + encoded, + base_value, + } } fn offset(&self) -> u64 { (self.encoded << 8) >> 8 @@ -38,11 +42,14 @@ impl BlockedBitpackerEntryMetaData { fn num_bits(&self) -> u8 { (self.encoded >> 56) as u8 } + fn base_value(&self) -> u64 { + self.base_value + } } #[test] fn metadata_test() { - let meta = BlockedBitpackerEntryMetaData::new(50000, 6); + let meta = BlockedBitpackerEntryMetaData::new(50000, 6, 40000); assert_eq!(meta.offset(), 50000); assert_eq!(meta.num_bits(), 6); } @@ -80,10 +87,11 @@ impl BlockedBitpacker { return; } let mut bit_packer = BitPacker::new(); + let base_value = self.cache.iter().min().unwrap(); let num_bits_block = self .cache .iter() - .map(|val| compute_num_bits(*val)) + .map(|val| compute_num_bits(*val - base_value)) .max() .unwrap(); self.compressed_blocks @@ -95,12 +103,20 @@ impl BlockedBitpacker { // (to be done in BitPacker) for val in self.cache.iter() { bit_packer - .write(*val, num_bits_block, &mut self.compressed_blocks) + .write( + *val - base_value, + num_bits_block, + &mut self.compressed_blocks, + ) .expect("cannot write bitpacking to output"); // write to im can't fail } bit_packer.flush(&mut self.compressed_blocks).unwrap(); self.offset_and_bits - .push(BlockedBitpackerEntryMetaData::new(offset, num_bits_block)); + .push(BlockedBitpackerEntryMetaData::new( + offset, + num_bits_block, + *base_value, + )); self.cache.clear(); self.compressed_blocks @@ -114,14 +130,14 @@ impl BlockedBitpacker { pos_in_block as u64, &self.compressed_blocks[metadata.offset() as usize..], ); - unpacked + unpacked + metadata.base_value() } else { self.cache[pos_in_block] } } pub fn iter(&self) -> impl Iterator + '_ { - // todo performance: we could decompress the whole block and cache it instead + // todo performance: we could decompress a whole block and cache it instead let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE; let iter = (0..bitpacked_elems) .map(move |idx| self.get(idx)) diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index 9b8babde9..97c0196b8 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -36,6 +36,11 @@ impl SegmentSerializer { }) } + /// The memory used (inclusive childs) + pub fn mem_usage(&self) -> usize { + self.store_writer.mem_usage() + } + pub fn segment(&self) -> &Segment { &self.segment } diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index e4064b106..586dfbd25 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -113,6 +113,7 @@ impl SegmentWriter { self.multifield_postings.mem_usage() + self.fieldnorms_writer.mem_usage() + self.fast_field_writers.mem_usage() + + self.segment_serializer.mem_usage() } /// Indexes a new document