add inline, add readme

2026-01-04 16:22:55 +00:00 · 2021-06-11 09:44:28 +02:00
parent 451538fecf
commit f4d271177c
4 changed files with 40 additions and 13 deletions
--- a/fastfield_codecs/README.md
+++ b/fastfield_codecs/README.md
@@ -0,0 +1,17 @@
+
+
+# Fast Field Codecs
+
+This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy.
+
+## Contributing
+
+Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference.
+
+A codec needs to implement 3 parts:
+
+A reader implementing `CodecReader` to read the codec.
+A serializer implementing `FastFieldSerializerEstimate` for compression estimation.
+`CodecId`, to identify the codec.
+
+
--- a/fastfield_codecs/src/bitpacked.rs
+++ b/fastfield_codecs/src/bitpacked.rs
@@ -34,12 +34,15 @@ impl<'data> CodecReader for BitpackedFastFieldReader {
            bit_unpacker,
        })
    }
+    #[inline]
    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
        self.min_value_u64 + self.bit_unpacker.get(doc, &data)
    }
+    #[inline]
    fn min_value(&self) -> u64 {
        self.min_value_u64
    }
+    #[inline]
    fn max_value(&self) -> u64 {
        self.max_value_u64
    }
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -152,6 +152,7 @@ impl LinearInterpolFastFieldSerializer {
    }
 }

+#[inline]
 fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
    if num_vals <= 1 {
        return 0.0;
@@ -161,9 +162,11 @@ fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
    ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
 }

+#[inline]
 fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
    first_val + (pos as f32 * slope) as u64
 }
+
 impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
    /// estimation for linear interpolation is hard because, you don't know
    /// where the local maxima for the deviation of the calculated value are and
@@ -219,6 +222,7 @@ impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
    }
 }

+#[inline]
 fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
    if x < y {
        y - x
@@ -286,6 +290,16 @@ mod tests {
        create_and_validate(&data, "large amplitude");
    }
    #[test]
+    fn linear_interpol_fast_concave_data() {
+        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
+        create_and_validate(&data, "concave data");
+    }
+    #[test]
+    fn linear_interpol_fast_convex_data() {
+        let data = vec![0, 40, 60, 70, 75, 77];
+        create_and_validate(&data, "convex data");
+    }
+    #[test]
    fn linear_interpol_fast_field_test_simple() {
        let data = (10..=20_u64).collect::<Vec<_>>();

--- a/fastfield_codecs/src/multilinearinterpol/mod.rs
+++ b/fastfield_codecs/src/multilinearinterpol/mod.rs
@@ -70,12 +70,7 @@ struct Function {
 impl Function {
    fn calc_slope(&mut self) {
        let num_vals = self.end_pos - self.start_pos;
-        let amplitude = self.value_end_pos as i64 - self.value_start_pos as i64;
-        if num_vals <= 1 {
-            self.slope = amplitude as f32;
-        } else {
-            self.slope = ((amplitude as f64) / (num_vals as u64 - 1) as f64) as f32;
-        }
+        get_slope(self.value_start_pos, self.value_end_pos, num_vals);
    }
    // split the interpolation into two function, change self and return the second split
    fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
@@ -112,14 +107,12 @@ impl BinarySerializable for Function {
        let num_bits = u8::deserialize(reader)?;
        let interpolation = Function {
            data_start_offset,
-            start_pos: 0,
-            end_pos: 0,
-            value_end_pos: 0,
            value_start_pos,
            positive_val_offset: offset,
            num_bits,
            bit_unpacker: BitUnpacker::new(num_bits),
            slope,
+            ..Default::default()
        };

        Ok(interpolation)
@@ -160,11 +153,13 @@ impl BinarySerializable for MultiLinearInterpolFooter {
    }
 }

+#[inline]
 fn get_interpolation_position(doc: u64) -> usize {
    let index = doc / CHUNK_SIZE;
    index as usize
 }

+#[inline]
 fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function {
    &interpolations[get_interpolation_position(doc)]
 }
@@ -220,13 +215,9 @@ impl MultiLinearInterpolFastFieldSerializer {
        let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);

        let mut first_function = Function {
-            data_start_offset: 0,
-            start_pos: 0,
            end_pos: stats.num_vals,
            value_start_pos: first_val,
            value_end_pos: last_val,
-            slope: 0.0,
-            positive_val_offset: 0,
            ..Default::default()
        };
        first_function.calc_slope();
@@ -308,10 +299,12 @@ impl MultiLinearInterpolFastFieldSerializer {
        Ok(())
    }
 }
+#[inline]
 fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
    ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
 }

+#[inline]
 fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
    (first_val as i64 + (pos as f32 * slope) as i64) as u64
 }