From f4d271177c1d8309ce8d3bf5bbe2cbeaa8bcbfc8 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 11 Jun 2021 09:44:28 +0200 Subject: [PATCH] add inline, add readme --- fastfield_codecs/README.md | 17 +++++++++++++++++ fastfield_codecs/src/bitpacked.rs | 3 +++ fastfield_codecs/src/linearinterpol.rs | 14 ++++++++++++++ .../src/multilinearinterpol/mod.rs | 19 ++++++------------- 4 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 fastfield_codecs/README.md diff --git a/fastfield_codecs/README.md b/fastfield_codecs/README.md new file mode 100644 index 000000000..3dfe08a36 --- /dev/null +++ b/fastfield_codecs/README.md @@ -0,0 +1,17 @@ + + +# Fast Field Codecs + +This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy. + +## Contributing + +Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference. + +A codec needs to implement 3 parts: + +A reader implementing `CodecReader` to read the codec. +A serializer implementing `FastFieldSerializerEstimate` for compression estimation. +`CodecId`, to identify the codec. + + diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 8d39378f0..60c8a9ae6 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -34,12 +34,15 @@ impl<'data> CodecReader for BitpackedFastFieldReader { bit_unpacker, }) } + #[inline] fn get_u64(&self, doc: u64, data: &[u8]) -> u64 { self.min_value_u64 + self.bit_unpacker.get(doc, &data) } + #[inline] fn min_value(&self) -> u64 { self.min_value_u64 } + #[inline] fn max_value(&self) -> u64 { self.max_value_u64 } diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index e3f1fa897..be69311fe 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -152,6 +152,7 @@ impl LinearInterpolFastFieldSerializer { } } +#[inline] fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { if num_vals <= 1 { return 0.0; @@ -161,9 +162,11 @@ fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 } +#[inline] fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { first_val + (pos as f32 * slope) as u64 } + impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer { /// estimation for linear interpolation is hard because, you don't know /// where the local maxima for the deviation of the calculated value are and @@ -219,6 +222,7 @@ impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer { } } +#[inline] fn distance + Ord>(x: T, y: T) -> T { if x < y { y - x @@ -286,6 +290,16 @@ mod tests { create_and_validate(&data, "large amplitude"); } #[test] + fn linear_interpol_fast_concave_data() { + let data = vec![0, 1, 2, 5, 8, 10, 20, 50]; + create_and_validate(&data, "concave data"); + } + #[test] + fn linear_interpol_fast_convex_data() { + let data = vec![0, 40, 60, 70, 75, 77]; + create_and_validate(&data, "convex data"); + } + #[test] fn linear_interpol_fast_field_test_simple() { let data = (10..=20_u64).collect::>(); diff --git a/fastfield_codecs/src/multilinearinterpol/mod.rs b/fastfield_codecs/src/multilinearinterpol/mod.rs index be1a4e144..8fec05c31 100644 --- a/fastfield_codecs/src/multilinearinterpol/mod.rs +++ b/fastfield_codecs/src/multilinearinterpol/mod.rs @@ -70,12 +70,7 @@ struct Function { impl Function { fn calc_slope(&mut self) { let num_vals = self.end_pos - self.start_pos; - let amplitude = self.value_end_pos as i64 - self.value_start_pos as i64; - if num_vals <= 1 { - self.slope = amplitude as f32; - } else { - self.slope = ((amplitude as f64) / (num_vals as u64 - 1) as f64) as f32; - } + get_slope(self.value_start_pos, self.value_end_pos, num_vals); } // split the interpolation into two function, change self and return the second split fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function { @@ -112,14 +107,12 @@ impl BinarySerializable for Function { let num_bits = u8::deserialize(reader)?; let interpolation = Function { data_start_offset, - start_pos: 0, - end_pos: 0, - value_end_pos: 0, value_start_pos, positive_val_offset: offset, num_bits, bit_unpacker: BitUnpacker::new(num_bits), slope, + ..Default::default() }; Ok(interpolation) @@ -160,11 +153,13 @@ impl BinarySerializable for MultiLinearInterpolFooter { } } +#[inline] fn get_interpolation_position(doc: u64) -> usize { let index = doc / CHUNK_SIZE; index as usize } +#[inline] fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function { &interpolations[get_interpolation_position(doc)] } @@ -220,13 +215,9 @@ impl MultiLinearInterpolFastFieldSerializer { let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1); let mut first_function = Function { - data_start_offset: 0, - start_pos: 0, end_pos: stats.num_vals, value_start_pos: first_val, value_end_pos: last_val, - slope: 0.0, - positive_val_offset: 0, ..Default::default() }; first_function.calc_slope(); @@ -308,10 +299,12 @@ impl MultiLinearInterpolFastFieldSerializer { Ok(()) } } +#[inline] fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 } +#[inline] fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { (first_val as i64 + (pos as f32 * slope) as i64) as u64 }