From 5b7f2f7100060fc71f08dc04b00559cc3eced792 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 1 May 2016 11:58:54 +0900 Subject: [PATCH] Renamed S4BP128* --- cpp/encode.cpp | 23 +++++++++++++---------- src/compression/mod.rs | 36 ++++++++++++++++++------------------ src/core/reader.rs | 3 ++- src/postings/serializer.rs | 6 +++--- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/cpp/encode.cpp b/cpp/encode.cpp index fa538b0f1..f4f6f24af 100644 --- a/cpp/encode.cpp +++ b/cpp/encode.cpp @@ -70,7 +70,7 @@ extern "C" { } - size_t encode_sorted_native( + size_t encode_s4_bp128_dm_native( uint32_t* begin, const size_t num_els, uint32_t* output, @@ -83,6 +83,17 @@ extern "C" { return output_length; } + size_t decode_s4_bp128_dm_native( + const uint32_t* compressed_data, + const size_t compressed_size, + uint32_t* uncompressed, + const size_t uncompressed_capacity) { + size_t num_ints = uncompressed_capacity; + codec_sorted -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints); + return num_ints; + } + + size_t encode_unsorted_native( uint32_t* begin, const size_t num_els, @@ -96,15 +107,7 @@ extern "C" { return output_length; } - size_t decode_sorted_native( - const uint32_t* compressed_data, - const size_t compressed_size, - uint32_t* uncompressed, - const size_t uncompressed_capacity) { - size_t num_ints = uncompressed_capacity; - codec_sorted -> decodeArray(compressed_data, compressed_size, uncompressed, num_ints); - return num_ints; - } + size_t decode_unsorted_native( const uint32_t* compressed_data, diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 9e816ca64..ca97f6b5b 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -9,8 +9,8 @@ extern { fn intersection_native(left_data: *const u32, left_size: size_t, right_data: *const u32, right_size: size_t, output: *mut u32) -> size_t; // complete s4-bp128-dm - fn encode_sorted_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t; - fn decode_sorted_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t; + fn encode_s4_bp128_dm_native(data: *mut u32, num_els: size_t, output: *mut u32, output_capacity: size_t) -> size_t; + fn decode_s4_bp128_dm_native(compressed_data: *const u32, compressed_size: size_t, uncompressed: *mut u32, output_capacity: size_t) -> size_t; // bp128, only encodes group of 128 u32 at a time fn encode_sorted_block128_native(data: *mut u32, output: *mut u32, output_capacity: size_t) -> size_t; @@ -115,7 +115,7 @@ impl Block128Encoder { let written_size: usize; unsafe { ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), 128); - written_size = encode_sorted_native( + written_size = encode_s4_bp128_dm_native( self.input_buffer.as_mut_ptr(), 128, self.output_buffer.as_mut_ptr(), @@ -139,7 +139,7 @@ impl Block128Decoder { compressed_data: &[u32], uncompressed_values: &mut [u32]) -> size_t { unsafe { - return decode_sorted_native( + return decode_s4_bp128_dm_native( compressed_data.as_ptr(), compressed_data.len() as size_t, uncompressed_values.as_mut_ptr(), @@ -152,15 +152,15 @@ impl Block128Decoder { // s4-bp128-dm -pub struct Encoder { +pub struct S4BP128Encoder { input_buffer: Vec, output_buffer: Vec, } -impl Encoder { +impl S4BP128Encoder { - pub fn new() -> Encoder { - Encoder { + pub fn new() -> S4BP128Encoder { + S4BP128Encoder { input_buffer: Vec::new(), output_buffer: Vec::new(), } @@ -177,7 +177,7 @@ impl Encoder { // TODO use clone_from when available unsafe { ptr::copy_nonoverlapping(input.as_ptr(), self.input_buffer.as_mut_ptr(), input_len); - let written_size = encode_sorted_native( + let written_size = encode_s4_bp128_dm_native( self.input_buffer.as_mut_ptr(), input_len as size_t, self.output_buffer.as_mut_ptr(), @@ -190,19 +190,19 @@ impl Encoder { -pub struct Decoder; +pub struct S4BP128Decoder; -impl Decoder { +impl S4BP128Decoder { - pub fn new() -> Decoder { - Decoder + pub fn new() -> S4BP128Decoder { + S4BP128Decoder } pub fn decode_sorted(&self, compressed_data: &[u32], uncompressed_values: &mut [u32]) -> size_t { unsafe { - return decode_sorted_native( + return decode_s4_bp128_dm_native( compressed_data.as_ptr(), compressed_data.len() as size_t, uncompressed_values.as_mut_ptr(), @@ -281,7 +281,7 @@ mod tests { #[test] fn test_encode_big() { - let mut encoder = Encoder::new(); + let mut encoder = S4BP128Encoder::new(); let num_ints = 10000 as usize; let expected_length = 1274; let input: Vec = (0..num_ints as u32) @@ -289,7 +289,7 @@ mod tests { .into_iter().collect(); let encoded_data = encoder.encode_sorted(&input); assert_eq!(encoded_data.len(), expected_length); - let decoder = Decoder::new(); + let decoder = S4BP128Decoder::new(); let mut decoded_data: Vec = (0..num_ints as u32).collect(); assert_eq!(num_ints, decoder.decode_sorted(&encoded_data[..], &mut decoded_data)); assert_eq!(decoded_data, input); @@ -368,10 +368,10 @@ mod tests { fn bench_decode(b: &mut Bencher) { const TEST_SIZE: usize = 1_000_000; let arr = generate_array(TEST_SIZE, 0.1); - let mut encoder = Encoder::new(); + let mut encoder = S4BP128Encoder::new(); let encoded = encoder.encode_sorted(&arr); let mut uncompressed: Vec = (0..TEST_SIZE as u32).collect(); - let decoder = Decoder; + let decoder = S4BP128Decoder; b.iter(|| { decoder.decode_sorted(&encoded, &mut uncompressed); }); diff --git a/src/core/reader.rs b/src/core/reader.rs index 59afdfe1f..c271efa04 100644 --- a/src/core/reader.rs +++ b/src/core/reader.rs @@ -19,6 +19,7 @@ use core::convert_to_ioerror; use common::BinarySerializable; use fastfield::{U32FastFieldsReader, U32FastFieldReader}; use compression; +use compression::S4BP128Decoder; use std::mem; impl fmt::Debug for SegmentReader { @@ -74,7 +75,7 @@ impl SegmentPostings { let mut doc_ids: Vec = Vec::with_capacity(doc_freq as usize); unsafe { doc_ids.set_len(doc_freq as usize); } { - let decoder = compression::Decoder::new(); + let decoder = compression::S4BP128Decoder::new(); decoder.decode_sorted(&data_u32[1..(num_u32s+1) as usize], &mut doc_ids); SegmentPostings(doc_ids) } diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 5b497650a..1e4eda50e 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -2,7 +2,7 @@ use datastruct::FstMapBuilder; use super::TermInfo; use schema::Term; use directory::WritePtr; -use compression; +use compression::S4BP128Encoder; use DocId; use core::index::Segment; use std::io; @@ -15,7 +15,7 @@ pub struct PostingsSerializer { positions_write: WritePtr, written_bytes_postings: usize, written_bytes_positions: usize, - encoder: compression::Encoder, + encoder: S4BP128Encoder, doc_ids: Vec, } @@ -32,7 +32,7 @@ impl PostingsSerializer { positions_write: positions_write, written_bytes_postings: 0, written_bytes_positions: 0, - encoder: compression::Encoder::new(), + encoder: S4BP128Encoder::new(), doc_ids: Vec::new(), }) }