mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
add inline, add readme
This commit is contained in:
17
fastfield_codecs/README.md
Normal file
17
fastfield_codecs/README.md
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
|
||||
# Fast Field Codecs
|
||||
|
||||
This crate contains various fast field codecs, used to compress/decompress fast field data in tantivy.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributing is pretty straightforward. Since the bitpacking is the simplest compressor, you can check it for reference.
|
||||
|
||||
A codec needs to implement 3 parts:
|
||||
|
||||
A reader implementing `CodecReader` to read the codec.
|
||||
A serializer implementing `FastFieldSerializerEstimate` for compression estimation.
|
||||
`CodecId`, to identify the codec.
|
||||
|
||||
|
||||
@@ -34,12 +34,15 @@ impl<'data> CodecReader for BitpackedFastFieldReader {
|
||||
bit_unpacker,
|
||||
})
|
||||
}
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
|
||||
self.min_value_u64 + self.bit_unpacker.get(doc, &data)
|
||||
}
|
||||
#[inline]
|
||||
fn min_value(&self) -> u64 {
|
||||
self.min_value_u64
|
||||
}
|
||||
#[inline]
|
||||
fn max_value(&self) -> u64 {
|
||||
self.max_value_u64
|
||||
}
|
||||
|
||||
@@ -152,6 +152,7 @@ impl LinearInterpolFastFieldSerializer {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
|
||||
if num_vals <= 1 {
|
||||
return 0.0;
|
||||
@@ -161,9 +162,11 @@ fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
|
||||
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
first_val + (pos as f32 * slope) as u64
|
||||
}
|
||||
|
||||
impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
|
||||
/// estimation for linear interpolation is hard because, you don't know
|
||||
/// where the local maxima for the deviation of the calculated value are and
|
||||
@@ -219,6 +222,7 @@ impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
|
||||
if x < y {
|
||||
y - x
|
||||
@@ -286,6 +290,16 @@ mod tests {
|
||||
create_and_validate(&data, "large amplitude");
|
||||
}
|
||||
#[test]
|
||||
fn linear_interpol_fast_concave_data() {
|
||||
let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
|
||||
create_and_validate(&data, "concave data");
|
||||
}
|
||||
#[test]
|
||||
fn linear_interpol_fast_convex_data() {
|
||||
let data = vec![0, 40, 60, 70, 75, 77];
|
||||
create_and_validate(&data, "convex data");
|
||||
}
|
||||
#[test]
|
||||
fn linear_interpol_fast_field_test_simple() {
|
||||
let data = (10..=20_u64).collect::<Vec<_>>();
|
||||
|
||||
|
||||
@@ -70,12 +70,7 @@ struct Function {
|
||||
impl Function {
|
||||
fn calc_slope(&mut self) {
|
||||
let num_vals = self.end_pos - self.start_pos;
|
||||
let amplitude = self.value_end_pos as i64 - self.value_start_pos as i64;
|
||||
if num_vals <= 1 {
|
||||
self.slope = amplitude as f32;
|
||||
} else {
|
||||
self.slope = ((amplitude as f64) / (num_vals as u64 - 1) as f64) as f32;
|
||||
}
|
||||
get_slope(self.value_start_pos, self.value_end_pos, num_vals);
|
||||
}
|
||||
// split the interpolation into two function, change self and return the second split
|
||||
fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
|
||||
@@ -112,14 +107,12 @@ impl BinarySerializable for Function {
|
||||
let num_bits = u8::deserialize(reader)?;
|
||||
let interpolation = Function {
|
||||
data_start_offset,
|
||||
start_pos: 0,
|
||||
end_pos: 0,
|
||||
value_end_pos: 0,
|
||||
value_start_pos,
|
||||
positive_val_offset: offset,
|
||||
num_bits,
|
||||
bit_unpacker: BitUnpacker::new(num_bits),
|
||||
slope,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Ok(interpolation)
|
||||
@@ -160,11 +153,13 @@ impl BinarySerializable for MultiLinearInterpolFooter {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_interpolation_position(doc: u64) -> usize {
|
||||
let index = doc / CHUNK_SIZE;
|
||||
index as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function {
|
||||
&interpolations[get_interpolation_position(doc)]
|
||||
}
|
||||
@@ -220,13 +215,9 @@ impl MultiLinearInterpolFastFieldSerializer {
|
||||
let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1);
|
||||
|
||||
let mut first_function = Function {
|
||||
data_start_offset: 0,
|
||||
start_pos: 0,
|
||||
end_pos: stats.num_vals,
|
||||
value_start_pos: first_val,
|
||||
value_end_pos: last_val,
|
||||
slope: 0.0,
|
||||
positive_val_offset: 0,
|
||||
..Default::default()
|
||||
};
|
||||
first_function.calc_slope();
|
||||
@@ -308,10 +299,12 @@ impl MultiLinearInterpolFastFieldSerializer {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
|
||||
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
(first_val as i64 + (pos as f32 * slope) as i64) as u64
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user