From b493051fe4e8aa6bfdf10fcd869864e10048c272 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 24 Nov 2016 23:11:30 +0900 Subject: [PATCH] Compression working without simd working --- src/common/mod.rs | 3 +- src/compression/compression.rs | 77 ++++++++++++++++++++++++++-------- src/fastfield/reader.rs | 2 +- src/fastfield/serializer.rs | 2 +- 4 files changed, 62 insertions(+), 22 deletions(-) diff --git a/src/common/mod.rs b/src/common/mod.rs index 50549ccac..0ab876a81 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,7 +1,7 @@ mod serialize; mod timer; mod vint; -mod bitpacker; +pub mod bitpacker; pub use self::serialize::BinarySerializable; @@ -9,7 +9,6 @@ pub use self::timer::Timing; pub use self::timer::TimerTree; pub use self::timer::OpenTimer; pub use self::vint::VInt; -pub use self::bitpacker::compute_num_bits; use std::io; diff --git a/src/compression/compression.rs b/src/compression/compression.rs index b336f386b..2c15a73a2 100644 --- a/src/compression/compression.rs +++ b/src/compression/compression.rs @@ -47,28 +47,69 @@ mod compression { #[cfg(not(feature="simdcompression"))] mod compression { - pub fn compress_sorted(vals: &[u32], output: &mut [u8], offset: u32) -> usize { - panic!("aaa"); - } - - pub fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset_: u32) -> usize { - let len = uncompress_unsorted(compressed_data, output); - let mut offset = offset_; - for i in 0..len { - output[i] += offset; - offset = output[i]; + + use compression::NUM_DOCS_PER_BLOCK; + use common::bitpacker::compute_num_bits; + use common::bitpacker::{BitPacker, BitUnpacker}; + use std::cmp; + use std::io::Write; + + pub fn compress_sorted(vals: &[u32], mut output: &mut [u8], offset: u32) -> usize { + // TODO remove the alloc + let mut deltas = Vec::with_capacity(NUM_DOCS_PER_BLOCK); + unsafe { deltas.set_len(NUM_DOCS_PER_BLOCK ); } + let mut max_delta = 0; + { + let mut local_offset = offset; + for i in 0..NUM_DOCS_PER_BLOCK { + let val = vals[i]; + let delta = val - local_offset; + max_delta = cmp::max(max_delta, delta); + deltas[i] = delta; + local_offset = val; + } } - len + let num_bits = compute_num_bits(max_delta); + output.write_all(&[num_bits]).unwrap(); + let mut bit_packer = BitPacker::new(output, num_bits as usize); + for val in &deltas { + bit_packer.write(*val).unwrap(); + } + let (_, written_size) = bit_packer.close().expect("packing in memory should never fail"); + written_size + 1 } - pub fn compress_unsorted(vals: &[u32], output: &mut [u8]) -> usize { - // let max = vals.iter().max().expect("compress unsorted called with an empty array"); - // let num_bits = - panic!("aaa"); + pub fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], mut offset: u32) -> usize { + let num_bits = compressed_data[0]; + let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize); + for i in 0..NUM_DOCS_PER_BLOCK { + let delta = bit_unpacker.get(i); + let val = offset + delta; + output[i] = val; + offset = val; + } + 1 + (num_bits as usize * NUM_DOCS_PER_BLOCK + 7) / 8 + } + + pub fn compress_unsorted(vals: &[u32], mut output: &mut [u8]) -> usize { + let max = vals.iter().cloned().max().expect("compress unsorted called with an empty array"); + let num_bits = compute_num_bits(max); + output.write_all(&[num_bits]).unwrap(); + let mut bit_packer = BitPacker::new(output, num_bits as usize); + for val in vals { + bit_packer.write(*val).unwrap(); + } + let (_, written_size) = bit_packer.close().expect("packing in memory should never fail"); + 1 + written_size } pub fn uncompress_unsorted(compressed_data: &[u8], output: &mut [u32]) -> usize { - panic!("aaa"); + let num_bits = compressed_data[0]; + let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize); + for i in 0..NUM_DOCS_PER_BLOCK { + output[i] = bit_unpacker.get(i); + } + 1 + (num_bits as usize * NUM_DOCS_PER_BLOCK + 7) / 8 } } @@ -296,12 +337,12 @@ mod tests { let n = 128; let vals: Vec = (0..n).map(|i| 11u32 + (i as u32)*7u32 % 12).collect(); let mut encoder = BlockEncoder::new(); - let compressed_data = encoder.compress_block_sorted(&vals, 10); + let compressed_data = encoder.compress_block_unsorted(&vals); compressed.extend_from_slice(compressed_data); compressed.push(173u8); let mut decoder = BlockDecoder::new(); { - let remaining_data = decoder.uncompress_block_sorted(&compressed, 10); + let remaining_data = decoder.uncompress_block_unsorted(&compressed); assert_eq!(remaining_data.len(), 1); assert_eq!(remaining_data[0], 173u8); } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 1b1c3014b..ee3c8b670 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -11,7 +11,7 @@ use schema::FAST; use directory::{WritePtr, RAMDirectory, Directory}; use fastfield::FastFieldSerializer; use fastfield::U32FastFieldsWriter; -use common::compute_num_bits; +use common::bitpacker::compute_num_bits; lazy_static! { diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 13e71b239..06d95462d 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -1,7 +1,7 @@ use common::BinarySerializable; use directory::WritePtr; use schema::Field; -use common::compute_num_bits; +use common::bitpacker::compute_num_bits; use std::io; use std::io::{Write, Seek, SeekFrom};