calc mem_usage of more structs

calc mem_usage of more structs in index creation add some comments
2025-12-23 02:29:57 +00:00 · 2021-04-30 14:16:39 +02:00
parent 83cf638a2e
commit 25b9429929
9 changed files with 73 additions and 11 deletions
--- a/bitpacker/benches/bench.rs
+++ b/bitpacker/benches/bench.rs
@@ -10,21 +10,24 @@ mod tests {
    fn bench_blockedbitp_read(b: &mut Bencher) {
        let mut blocked_bitpacker = BlockedBitpacker::new();
        for val in 0..=21500 {
-            blocked_bitpacker.add(val);
+            blocked_bitpacker.add(val * val);
        }
        b.iter(|| {
+            let mut out = 0;
            for val in 0..=21500 {
-                blocked_bitpacker.get(val);
+                out = blocked_bitpacker.get(val);
            }
+            out
        });
    }
    #[bench]
-    fn bench_blockbitp_create(b: &mut Bencher) {
+    fn bench_blockedbitp_create(b: &mut Bencher) {
        b.iter(|| {
            let mut blocked_bitpacker = BlockedBitpacker::new();
            for val in 0..=21500 {
-                blocked_bitpacker.add(val);
+                blocked_bitpacker.add(val * val);
            }
+            blocked_bitpacker
        });
    }
 }
--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -4,7 +4,7 @@ use super::{bitpacker::BitPacker, compute_num_bits};

 const BLOCK_SIZE: usize = 128;

-/// BlockedBitpacker compresses data in blocks of
+/// `BlockedBitpacker` compresses data in blocks of
 /// 128 elements, while keeping an index on it
 ///
 #[derive(Debug, Clone)]
@@ -16,6 +16,12 @@ pub struct BlockedBitpacker {
    offset_and_bits: Vec<BlockedBitpackerEntryMetaData>,
 }

+/// `BlockedBitpackerEntryMetaData` encodes the
+/// offset and bit_width into a u64 bit field
+///
+/// This saves some space, since 7byte is more
+/// than enough and also keeps the access fast
+/// because of alignment
 #[derive(Debug, Clone, Default)]
 struct BlockedBitpackerEntryMetaData {
    encoded: u64,
@@ -23,7 +29,7 @@ struct BlockedBitpackerEntryMetaData {

 impl BlockedBitpackerEntryMetaData {
    fn new(offset: u64, num_bits: u8) -> Self {
-        let encoded = offset | (num_bits as u64) << 56;
+        let encoded = offset | (num_bits as u64) << (64 - 8);
        Self { encoded }
    }
    fn offset(&self) -> u64 {
@@ -33,6 +39,7 @@ impl BlockedBitpackerEntryMetaData {
        (self.encoded >> 56) as u8
    }
 }
+
 #[test]
 fn metadata_test() {
    let meta = BlockedBitpackerEntryMetaData::new(50000, 6);
@@ -51,8 +58,10 @@ impl BlockedBitpacker {
        }
    }

-    pub fn get_memory_usage(&self) -> usize {
-        self.compressed_blocks.capacity()
+    /// The memory used (inclusive childs)
+    pub fn mem_usage(&self) -> usize {
+        std::mem::size_of::<BlockedBitpacker>()
+            + self.compressed_blocks.capacity()
            + self.offset_and_bits.capacity()
                * std::mem::size_of_val(&self.offset_and_bits.get(0).cloned().unwrap_or_default())
            + self.cache.capacity()
@@ -80,6 +89,10 @@ impl BlockedBitpacker {
        self.compressed_blocks
            .resize(self.compressed_blocks.len() - 8, 0); // remove padding for bitpacker
        let offset = self.compressed_blocks.len() as u64;
+        // todo performance: for some bit_width we
+        // can encode multiple vals into the
+        // mini_buffer before checking to flush
+        // (to be done in BitPacker)
        for val in self.cache.iter() {
            bit_packer
                .write(*val, num_bits_block, &mut self.compressed_blocks)
@@ -108,6 +121,7 @@ impl BlockedBitpacker {
    }

    pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
+        // todo performance: we could decompress the whole block and cache it instead
        let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
        let iter = (0..bitpacked_elems)
            .map(move |idx| self.get(idx))
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -14,7 +14,7 @@ pub use crate::blocked_bitpacker::BlockedBitpacker;
 ///
 /// The logic is slightly more convoluted here as for optimization
 /// reasons, we want to ensure that a value spawns over at most 8 bytes
-/// of aligns bytes.
+/// of aligned bytes.
 ///
 /// Spanning over 9 bytes is possible for instance, if we do
 /// bitpacking with an amplitude of 63 bits.