From 1d41b96d32072cd8dc2f4f7d43b819c5243554a6 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 11 Jun 2021 14:53:02 +0200 Subject: [PATCH] rename, add codec_tester --- fastfield_codecs/src/bitpacked.rs | 7 +-- fastfield_codecs/src/lib.rs | 59 ++++++++++++++++----- fastfield_codecs/src/linearinterpol.rs | 8 +-- fastfield_codecs/src/multilinearinterpol.rs | 16 +++--- src/fastfield/reader.rs | 2 +- 5 files changed, 56 insertions(+), 36 deletions(-) diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index b0a373b46..01b363c52 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -1,4 +1,3 @@ -use crate::CodecId; use crate::FastFieldCodecReader; use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; @@ -100,6 +99,8 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> { pub struct BitpackedFastFieldSerializer {} impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { + const NAME: &'static str = "Bitpacked"; + const ID: u8 = 1; /// Creates a new fast field serializer. /// /// The serializer in fact encode the values by bitpacking @@ -132,10 +133,6 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { num_bits as f32 / num_bits_uncompressed as f32 } } -impl CodecId for BitpackedFastFieldSerializer { - const NAME: &'static str = "Bitpacked"; - const ID: u8 = 1; -} #[cfg(test)] mod tests { diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 473b9b83e..93128728b 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -22,6 +22,11 @@ pub trait FastFieldCodecReader: Sized { /// The FastFieldSerializerEstimate trait is required on all variants /// of fast field compressions, to decide which one to choose. pub trait FastFieldCodecSerializer { + /// A codex needs to provide a unique name and id, which is + /// used for debugging and de/serialization. + const NAME: &'static str; + const ID: u8; + /// returns an estimate of the compression ratio. if the compressor is unable to handle the /// data it needs to return f32::MAX. /// The baseline is uncompressed 64bit data. @@ -39,15 +44,6 @@ pub trait FastFieldCodecSerializer { ) -> io::Result<()>; } -/// `CodecId` is required by each Codec. -/// -/// It needs to provide a unique name and id, which is -/// used for debugging and de/serialization. -pub trait CodecId { - const NAME: &'static str; - const ID: u8; -} - /// FastFieldDataAccess is the trait to access fast field data during serialization and estimation. pub trait FastFieldDataAccess: Clone { /// Return the value associated to the given document. @@ -82,16 +78,20 @@ impl FastFieldDataAccess for Vec { #[cfg(test)] mod tests { use crate::{ - bitpacked::BitpackedFastFieldSerializer, linearinterpol::LinearInterpolFastFieldSerializer, - multilinearinterpol::MultiLinearInterpolFastFieldSerializer, + bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer}, + linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer}, + multilinearinterpol::{ + MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer, + }, }; pub fn create_and_validate( data: &[u64], name: &str, - ) { - if S::estimate(&data, crate::tests::stats_from_vec(&data)) == f32::MAX { - return; + ) -> (f32, f32) { + let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data)); + if estimation == f32::MAX { + return (estimation, 0.0); } let mut out = vec![]; S::create( @@ -113,6 +113,8 @@ mod tests { ); } } + let actual_compression = data.len() as f32 / out.len() as f32; + return (estimation, actual_compression); } pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { let mut data_and_names = vec![]; @@ -130,6 +132,35 @@ mod tests { data_and_names } + fn test_codec() { + let codec_name = S::NAME; + for (data, data_set_name) in get_codec_test_data_sets() { + let (estimate, actual) = + crate::tests::create_and_validate::(&data, data_set_name); + let result = if estimate == f32::MAX { + "Disabled".to_string() + } else { + format!("Estimate {:?} Actual {:?} ", estimate, actual) + }; + println!( + "Codec {}, DataSet {}, {}", + codec_name, data_set_name, result + ); + } + } + #[test] + fn test_codec_bitpacking() { + test_codec::(); + } + #[test] + fn test_codec_interpolation() { + test_codec::(); + } + #[test] + fn test_codec_multi_interpolation() { + test_codec::(); + } + use super::*; pub fn stats_from_vec(data: &[u64]) -> FastFieldStats { let min_value = data.iter().cloned().min().unwrap_or(0); diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index 0354cf1dd..f38c3855f 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -1,4 +1,3 @@ -use crate::CodecId; use crate::FastFieldCodecReader; use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; @@ -112,6 +111,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { } impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { + const NAME: &'static str = "LinearInterpol"; + const ID: u8 = 2; /// Creates a new fast field serializer. fn create( write: &mut impl Write, @@ -228,11 +229,6 @@ fn distance + Ord>(x: T, y: T) -> T { } } -impl CodecId for LinearInterpolFastFieldSerializer { - const NAME: &'static str = "LinearInterpol"; - const ID: u8 = 2; -} - #[cfg(test)] mod tests { use super::*; diff --git a/fastfield_codecs/src/multilinearinterpol.rs b/fastfield_codecs/src/multilinearinterpol.rs index 14c5fb1a6..a00836af7 100644 --- a/fastfield_codecs/src/multilinearinterpol.rs +++ b/fastfield_codecs/src/multilinearinterpol.rs @@ -1,4 +1,3 @@ -use crate::CodecId; use crate::FastFieldCodecReader; use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; @@ -44,7 +43,7 @@ impl io::Write for TrackWriteSize { /// Depending on the field type, a different /// fast field is required. #[derive(Clone)] -pub struct MultiLinearinterpolFastFieldReader { +pub struct MultiLinearInterpolFastFieldReader { pub footer: MultiLinearInterpolFooter, } @@ -164,7 +163,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio &interpolations[get_interpolation_position(doc)] } -impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader { +impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: &[u8]) -> io::Result { let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?; @@ -172,7 +171,7 @@ impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader { let (_data, mut footer) = bytes.split_at(bytes.len() - (4 + footer_len) as usize); let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?; - Ok(MultiLinearinterpolFastFieldReader { footer }) + Ok(MultiLinearInterpolFastFieldReader { footer }) } #[inline] @@ -211,6 +210,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { pub struct MultiLinearInterpolFastFieldSerializer {} impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { + const NAME: &'static str = "MultiLinearInterpol"; + const ID: u8 = 3; /// Creates a new fast field serializer. fn create( write: &mut impl Write, @@ -378,11 +379,6 @@ fn distance + Ord>(x: T, y: T) -> T { } } -impl CodecId for MultiLinearInterpolFastFieldSerializer { - const NAME: &'static str = "MultiLinearInterpol"; - const ID: u8 = 3; -} - #[cfg(test)] mod tests { use super::*; @@ -391,7 +387,7 @@ mod tests { fn create_and_validate(data: &[u64], name: &str) { crate::tests::create_and_validate::< MultiLinearInterpolFastFieldSerializer, - MultiLinearinterpolFastFieldReader, + MultiLinearInterpolFastFieldReader, >(&data, name); } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 4298ef457..525a8869f 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -14,8 +14,8 @@ use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; use fastfield_codecs::multilinearinterpol::MultiLinearinterpolFastFieldReader; -use fastfield_codecs::CodecId; use fastfield_codecs::FastFieldCodecReader; +use fastfield_codecs::FastFieldCodecSerializer; use std::collections::HashMap; use std::marker::PhantomData; use std::path::Path;