rename, add codec_tester

This commit is contained in:
Pascal Seitz
2021-06-11 14:53:02 +02:00
parent ef4665945f
commit 1d41b96d32
5 changed files with 56 additions and 36 deletions

View File

@@ -1,4 +1,3 @@
use crate::CodecId;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
@@ -100,6 +99,8 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
pub struct BitpackedFastFieldSerializer {}
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
const ID: u8 = 1;
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
@@ -132,10 +133,6 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
num_bits as f32 / num_bits_uncompressed as f32
}
}
impl CodecId for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
const ID: u8 = 1;
}
#[cfg(test)]
mod tests {

View File

@@ -22,6 +22,11 @@ pub trait FastFieldCodecReader: Sized {
/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldCodecSerializer {
/// A codex needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
const NAME: &'static str;
const ID: u8;
/// returns an estimate of the compression ratio. if the compressor is unable to handle the
/// data it needs to return f32::MAX.
/// The baseline is uncompressed 64bit data.
@@ -39,15 +44,6 @@ pub trait FastFieldCodecSerializer {
) -> io::Result<()>;
}
/// `CodecId` is required by each Codec.
///
/// It needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
pub trait CodecId {
const NAME: &'static str;
const ID: u8;
}
/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
pub trait FastFieldDataAccess: Clone {
/// Return the value associated to the given document.
@@ -82,16 +78,20 @@ impl FastFieldDataAccess for Vec<u64> {
#[cfg(test)]
mod tests {
use crate::{
bitpacked::BitpackedFastFieldSerializer, linearinterpol::LinearInterpolFastFieldSerializer,
multilinearinterpol::MultiLinearInterpolFastFieldSerializer,
bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
},
};
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
name: &str,
) {
if S::estimate(&data, crate::tests::stats_from_vec(&data)) == f32::MAX {
return;
) -> (f32, f32) {
let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data));
if estimation == f32::MAX {
return (estimation, 0.0);
}
let mut out = vec![];
S::create(
@@ -113,6 +113,8 @@ mod tests {
);
}
}
let actual_compression = data.len() as f32 / out.len() as f32;
return (estimation, actual_compression);
}
pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
let mut data_and_names = vec![];
@@ -130,6 +132,35 @@ mod tests {
data_and_names
}
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
let codec_name = S::NAME;
for (data, data_set_name) in get_codec_test_data_sets() {
let (estimate, actual) =
crate::tests::create_and_validate::<S, R>(&data, data_set_name);
let result = if estimate == f32::MAX {
"Disabled".to_string()
} else {
format!("Estimate {:?} Actual {:?} ", estimate, actual)
};
println!(
"Codec {}, DataSet {}, {}",
codec_name, data_set_name, result
);
}
}
#[test]
fn test_codec_bitpacking() {
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
}
use super::*;
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);

View File

@@ -1,4 +1,3 @@
use crate::CodecId;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
@@ -112,6 +111,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
const NAME: &'static str = "LinearInterpol";
const ID: u8 = 2;
/// Creates a new fast field serializer.
fn create(
write: &mut impl Write,
@@ -228,11 +229,6 @@ fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
}
}
impl CodecId for LinearInterpolFastFieldSerializer {
const NAME: &'static str = "LinearInterpol";
const ID: u8 = 2;
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -1,4 +1,3 @@
use crate::CodecId;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
@@ -44,7 +43,7 @@ impl<W: io::Write> io::Write for TrackWriteSize<W> {
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct MultiLinearinterpolFastFieldReader {
pub struct MultiLinearInterpolFastFieldReader {
pub footer: MultiLinearInterpolFooter,
}
@@ -164,7 +163,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}
impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader {
impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
@@ -172,7 +171,7 @@ impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader {
let (_data, mut footer) = bytes.split_at(bytes.len() - (4 + footer_len) as usize);
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
Ok(MultiLinearinterpolFastFieldReader { footer })
Ok(MultiLinearInterpolFastFieldReader { footer })
}
#[inline]
@@ -211,6 +210,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
pub struct MultiLinearInterpolFastFieldSerializer {}
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
const NAME: &'static str = "MultiLinearInterpol";
const ID: u8 = 3;
/// Creates a new fast field serializer.
fn create(
write: &mut impl Write,
@@ -378,11 +379,6 @@ fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
}
}
impl CodecId for MultiLinearInterpolFastFieldSerializer {
const NAME: &'static str = "MultiLinearInterpol";
const ID: u8 = 3;
}
#[cfg(test)]
mod tests {
use super::*;
@@ -391,7 +387,7 @@ mod tests {
fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<
MultiLinearInterpolFastFieldSerializer,
MultiLinearinterpolFastFieldReader,
MultiLinearInterpolFastFieldReader,
>(&data, name);
}

View File

@@ -14,8 +14,8 @@ use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearinterpolFastFieldReader;
use fastfield_codecs::CodecId;
use fastfield_codecs::FastFieldCodecReader;
use fastfield_codecs::FastFieldCodecSerializer;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::path::Path;