add inline, add readme

This commit is contained in:
Pascal Seitz
2021-06-11 09:44:28 +02:00
parent 451538fecf
commit f4d271177c
4 changed files with 40 additions and 13 deletions

View File

@@ -0,0 +1,17 @@
# Fast Field Codecs
This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy.
## Contributing
Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference.
A codec needs to implement 3 parts:
A reader implementing `CodecReader` to read the codec.
A serializer implementing `FastFieldSerializerEstimate` for compression estimation.
`CodecId`, to identify the codec.

View File

@@ -34,12 +34,15 @@ impl<'data> CodecReader for BitpackedFastFieldReader {
bit_unpacker,
})
}
#[inline]
fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
self.min_value_u64 + self.bit_unpacker.get(doc, &data)
}
#[inline]
fn min_value(&self) -> u64 {
self.min_value_u64
}
#[inline]
fn max_value(&self) -> u64 {
self.max_value_u64
}

View File

@@ -152,6 +152,7 @@ impl LinearInterpolFastFieldSerializer {
}
}
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
if num_vals <= 1 {
return 0.0;
@@ -161,9 +162,11 @@ fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
first_val + (pos as f32 * slope) as u64
}
impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
/// estimation for linear interpolation is hard because, you don't know
/// where the local maxima for the deviation of the calculated value are and
@@ -219,6 +222,7 @@ impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
}
}
#[inline]
fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
if x < y {
y - x
@@ -286,6 +290,16 @@ mod tests {
create_and_validate(&data, "large amplitude");
}
#[test]
fn linear_interpol_fast_concave_data() {
let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
create_and_validate(&data, "concave data");
}
#[test]
fn linear_interpol_fast_convex_data() {
let data = vec![0, 40, 60, 70, 75, 77];
create_and_validate(&data, "convex data");
}
#[test]
fn linear_interpol_fast_field_test_simple() {
let data = (10..=20_u64).collect::<Vec<_>>();

View File

@@ -70,12 +70,7 @@ struct Function {
impl Function {
fn calc_slope(&mut self) {
let num_vals = self.end_pos - self.start_pos;
let amplitude = self.value_end_pos as i64 - self.value_start_pos as i64;
if num_vals <= 1 {
self.slope = amplitude as f32;
} else {
self.slope = ((amplitude as f64) / (num_vals as u64 - 1) as f64) as f32;
}
get_slope(self.value_start_pos, self.value_end_pos, num_vals);
}
// split the interpolation into two function, change self and return the second split
fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
@@ -112,14 +107,12 @@ impl BinarySerializable for Function {
let num_bits = u8::deserialize(reader)?;
let interpolation = Function {
data_start_offset,
start_pos: 0,
end_pos: 0,
value_end_pos: 0,
value_start_pos,
positive_val_offset: offset,
num_bits,
bit_unpacker: BitUnpacker::new(num_bits),
slope,
..Default::default()
};
Ok(interpolation)
@@ -160,11 +153,13 @@ impl BinarySerializable for MultiLinearInterpolFooter {
}
}
#[inline]
fn get_interpolation_position(doc: u64) -> usize {
let index = doc / CHUNK_SIZE;
index as usize
}
#[inline]
fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function {
&interpolations[get_interpolation_position(doc)]
}
@@ -220,13 +215,9 @@ impl MultiLinearInterpolFastFieldSerializer {
let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
let mut first_function = Function {
data_start_offset: 0,
start_pos: 0,
end_pos: stats.num_vals,
value_start_pos: first_val,
value_end_pos: last_val,
slope: 0.0,
positive_val_offset: 0,
..Default::default()
};
first_function.calc_slope();
@@ -308,10 +299,12 @@ impl MultiLinearInterpolFastFieldSerializer {
Ok(())
}
}
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
(first_val as i64 + (pos as f32 * slope) as i64) as u64
}