From f4d271177c1d8309ce8d3bf5bbe2cbeaa8bcbfc8 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Fri, 11 Jun 2021 09:44:28 +0200
Subject: [PATCH] add inline, add readme

---
 fastfield_codecs/README.md                    | 17 +++++++++++++++++
 fastfield_codecs/src/bitpacked.rs             |  3 +++
 fastfield_codecs/src/linearinterpol.rs        | 14 ++++++++++++++
 .../src/multilinearinterpol/mod.rs            | 19 ++++++-------------
 4 files changed, 40 insertions(+), 13 deletions(-)
 create mode 100644 fastfield_codecs/README.md
diff --git a/fastfield_codecs/README.md b/fastfield_codecs/README.md
new file mode 100644
index 000000000..3dfe08a36
--- /dev/null
+++ b/fastfield_codecs/README.md
@@ -0,0 +1,17 @@
+
+
+# Fast Field Codecs
+
+This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy.
+
+## Contributing
+
+Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference.
+
+A codec needs to implement 3 parts:
+
+A reader implementing `CodecReader` to read the codec.
+A serializer implementing `FastFieldSerializerEstimate` for compression estimation.
+`CodecId`, to identify the codec.
+
+
diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs
index 8d39378f0..60c8a9ae6 100644
--- a/fastfield_codecs/src/bitpacked.rs
+++ b/fastfield_codecs/src/bitpacked.rs
@@ -34,12 +34,15 @@ impl<'data> CodecReader for BitpackedFastFieldReader {
             bit_unpacker,
         })
     }
+    #[inline]
     fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
         self.min_value_u64 + self.bit_unpacker.get(doc, &data)
     }
+    #[inline]
     fn min_value(&self) -> u64 {
         self.min_value_u64
     }
+    #[inline]
     fn max_value(&self) -> u64 {
         self.max_value_u64
     }
diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs
index e3f1fa897..be69311fe 100644
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -152,6 +152,7 @@ impl LinearInterpolFastFieldSerializer {
     }
 }
 
+#[inline]
 fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
     if num_vals <= 1 {
         return 0.0;
@@ -161,9 +162,11 @@ fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
     ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
 }
 
+#[inline]
 fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
     first_val + (pos as f32 * slope) as u64
 }
+
 impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
     /// estimation for linear interpolation is hard because, you don't know
     /// where the local maxima for the deviation of the calculated value are and
@@ -219,6 +222,7 @@ impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
     }
 }
 
+#[inline]
 fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
     if x < y {
         y - x
@@ -286,6 +290,16 @@ mod tests {
         create_and_validate(&data, "large amplitude");
     }
     #[test]
+    fn linear_interpol_fast_concave_data() {
+        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
+        create_and_validate(&data, "concave data");
+    }
+    #[test]
+    fn linear_interpol_fast_convex_data() {
+        let data = vec![0, 40, 60, 70, 75, 77];
+        create_and_validate(&data, "convex data");
+    }
+    #[test]
     fn linear_interpol_fast_field_test_simple() {
         let data = (10..=20_u64).collect::<Vec<_>>();
 
diff --git a/fastfield_codecs/src/multilinearinterpol/mod.rs b/fastfield_codecs/src/multilinearinterpol/mod.rs
index be1a4e144..8fec05c31 100644
--- a/fastfield_codecs/src/multilinearinterpol/mod.rs
+++ b/fastfield_codecs/src/multilinearinterpol/mod.rs
@@ -70,12 +70,7 @@ struct Function {
 impl Function {
     fn calc_slope(&mut self) {
         let num_vals = self.end_pos - self.start_pos;
-        let amplitude = self.value_end_pos as i64 - self.value_start_pos as i64;
-        if num_vals <= 1 {
-            self.slope = amplitude as f32;
-        } else {
-            self.slope = ((amplitude as f64) / (num_vals as u64 - 1) as f64) as f32;
-        }
+        get_slope(self.value_start_pos, self.value_end_pos, num_vals);
     }
     // split the interpolation into two function, change self and return the second split
     fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
@@ -112,14 +107,12 @@ impl BinarySerializable for Function {
         let num_bits = u8::deserialize(reader)?;
         let interpolation = Function {
             data_start_offset,
-            start_pos: 0,
-            end_pos: 0,
-            value_end_pos: 0,
             value_start_pos,
             positive_val_offset: offset,
             num_bits,
             bit_unpacker: BitUnpacker::new(num_bits),
             slope,
+            ..Default::default()
         };
 
         Ok(interpolation)
@@ -160,11 +153,13 @@ impl BinarySerializable for MultiLinearInterpolFooter {
     }
 }
 
+#[inline]
 fn get_interpolation_position(doc: u64) -> usize {
     let index = doc / CHUNK_SIZE;
     index as usize
 }
 
+#[inline]
 fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function {
     &interpolations[get_interpolation_position(doc)]
 }
@@ -220,13 +215,9 @@ impl MultiLinearInterpolFastFieldSerializer {
         let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
 
         let mut first_function = Function {
-            data_start_offset: 0,
-            start_pos: 0,
             end_pos: stats.num_vals,
             value_start_pos: first_val,
             value_end_pos: last_val,
-            slope: 0.0,
-            positive_val_offset: 0,
             ..Default::default()
         };
         first_function.calc_slope();
@@ -308,10 +299,12 @@ impl MultiLinearInterpolFastFieldSerializer {
         Ok(())
     }
 }
+#[inline]
 fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
     ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
 }
 
+#[inline]
 fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
     (first_val as i64 + (pos as f32 * slope) as i64) as u64
 }