From c889ae10e4774009bb17ae6d5a4316fb6645b6c8 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Mon, 14 Jun 2021 16:16:25 +0200 Subject: [PATCH] add is_applicable to fast field codecs --- fastfield_codecs/src/bitpacked.rs | 6 +++ fastfield_codecs/src/lib.rs | 12 +++-- fastfield_codecs/src/linearinterpol.rs | 19 ++++--- fastfield_codecs/src/main.rs | 17 +++--- fastfield_codecs/src/multilinearinterpol.rs | 19 ++++--- src/fastfield/serializer/mod.rs | 60 +++++++++++---------- 6 files changed, 80 insertions(+), 53 deletions(-) diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index f7422b351..a4d5b44a4 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -126,6 +126,12 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { Ok(()) } + fn is_applicable( + _fastfield_accessor: &impl FastFieldDataAccess, + _stats: FastFieldStats, + ) -> bool { + true + } fn estimate(_fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { let amplitude = stats.max_value - stats.min_value; let num_bits = compute_num_bits(amplitude); diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index f14cad8df..3bf9e487e 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -27,8 +27,10 @@ pub trait FastFieldCodecSerializer { const NAME: &'static str; const ID: u8; - /// Returns an estimate of the compression ratio. if the compressor is unable to handle the - /// data it needs to return f32::MAX. + /// Check if the Codec is able to compress the data + fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> bool; + + /// Returns an estimate of the compression ratio. /// The baseline is uncompressed 64bit data. /// /// It could make sense to also return a value representing @@ -92,10 +94,10 @@ mod tests { data: &[u64], name: &str, ) -> (f32, f32) { - let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data)); - if estimation == f32::MAX { - return (estimation, 0.0); + if !S::is_applicable(&data, crate::tests::stats_from_vec(&data)) { + return (f32::MAX, 0.0); } + let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data)); let mut out = vec![]; S::serialize( &mut out, diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index e5286a0e6..c61ea2e2a 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -166,12 +166,12 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { footer.serialize(write)?; Ok(()) } - /// estimation for linear interpolation is hard because, you don't know - /// where the local maxima for the deviation of the calculated value are and - /// the offset to shift all values to >=0 is also unknown. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { + fn is_applicable( + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + ) -> bool { if stats.num_vals < 3 { - return f32::MAX; //disable compressor for this case + return false; //disable compressor for this case } // On serialisation the offset is added to the actual value. // We need to make sure this won't run into overflow calculation issues. @@ -183,9 +183,14 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { .checked_add(theorethical_maximum_offset) .is_none() { - return f32::MAX; + return false; } - + true + } + /// estimation for linear interpolation is hard because, you don't know + /// where the local maxima for the deviation of the calculated value are and + /// the offset to shift all values to >=0 is also unknown. + fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { let first_val = fastfield_accessor.get(0); let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1); let slope = get_slope(first_val, last_val, stats.num_vals); diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index cb5c665aa..71107333e 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -35,8 +35,8 @@ fn main() { .unwrap(); table.add_row(Row::new(vec![Cell::new(data_set_name).style_spec("Bbb")])); - for (est, comp, name) in results { - let (est_cell, ratio_cell) = if est == f32::MAX { + for (is_applicable, est, comp, name) in results { + let (est_cell, ratio_cell) = if !is_applicable { ("Codec Disabled".to_string(), "".to_string()) } else { (est.to_string(), comp.to_string()) @@ -93,11 +93,14 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { data_and_names } -pub fn serialize_with_codec(data: &[u64]) -> (f32, f32, &'static str) { - let estimation = S::estimate(&data, stats_from_vec(&data)); - if estimation == f32::MAX { - return (estimation, 0.0, S::NAME); +pub fn serialize_with_codec( + data: &[u64], +) -> (bool, f32, f32, &'static str) { + let is_applicable = S::is_applicable(&data, stats_from_vec(&data)); + if !is_applicable { + return (false, 0.0, 0.0, S::NAME); } + let estimation = S::estimate(&data, stats_from_vec(&data)); let mut out = vec![]; S::serialize( &mut out, @@ -109,7 +112,7 @@ pub fn serialize_with_codec(data: &[u64]) -> (f32, .unwrap(); let actual_compression = out.len() as f32 / (data.len() * 8) as f32; - return (estimation, actual_compression, S::NAME); + return (true, estimation, actual_compression, S::NAME); } pub fn stats_from_vec(data: &[u64]) -> FastFieldStats { diff --git a/fastfield_codecs/src/multilinearinterpol.rs b/fastfield_codecs/src/multilinearinterpol.rs index 0015fc76b..d52beb479 100644 --- a/fastfield_codecs/src/multilinearinterpol.rs +++ b/fastfield_codecs/src/multilinearinterpol.rs @@ -310,12 +310,12 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { Ok(()) } - /// estimation for linear interpolation is hard because, you don't know - /// where the local maxima are for the deviation of the calculated value and - /// the offset is also unknown. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { + fn is_applicable( + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + ) -> bool { if stats.num_vals < 5_000 { - return f32::MAX; + return false; } // On serialization the offset is added to the actual value. // We need to make sure this won't run into overflow calculation issues. @@ -327,9 +327,14 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { .checked_add(theorethical_maximum_offset) .is_none() { - return f32::MAX; + return false; } - + true + } + /// estimation for linear interpolation is hard because, you don't know + /// where the local maxima are for the deviation of the calculated value and + /// the offset is also unknown. + fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { let first_val_in_first_block = fastfield_accessor.get(0); let last_elem_in_first_chunk = CHUNK_SIZE.min(stats.num_vals); let last_val_in_first_block = fastfield_accessor.get(last_elem_in_first_chunk as u32 - 1); diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 12398d37a..956c23eea 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -36,6 +36,24 @@ pub struct CompositeFastFieldSerializer { composite_write: CompositeWrite, } +// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait +// https://github.com/rust-lang/rust/pull/86176 +fn codec_estimation( + stats: FastFieldStats, + fastfield_accessor: &A, + estimations: &mut Vec<(f32, &str, u8)>, +) { + if !T::is_applicable(fastfield_accessor, stats.clone()) { + return; + } + let (ratio, name, id) = ( + T::estimate(fastfield_accessor, stats.clone()), + T::NAME, + T::ID, + ); + estimations.push((ratio, name, id)); +} + impl CompositeFastFieldSerializer { /// Constructor pub fn from_write(write: WritePtr) -> io::Result { @@ -57,33 +75,21 @@ impl CompositeFastFieldSerializer { let mut estimations = vec![]; - { - let (ratio, name, id) = ( - BitpackedFastFieldSerializer::estimate(&fastfield_accessor, stats.clone()), - BitpackedFastFieldSerializer::NAME, - BitpackedFastFieldSerializer::ID, - ); - estimations.push((ratio, name, id)); - } - { - let (ratio, name, id) = ( - LinearInterpolFastFieldSerializer::estimate(&fastfield_accessor, stats.clone()), - LinearInterpolFastFieldSerializer::NAME, - LinearInterpolFastFieldSerializer::ID, - ); - estimations.push((ratio, name, id)); - } - { - let (ratio, name, id) = ( - MultiLinearInterpolFastFieldSerializer::estimate( - &fastfield_accessor, - stats.clone(), - ), - MultiLinearInterpolFastFieldSerializer::NAME, - MultiLinearInterpolFastFieldSerializer::ID, - ); - estimations.push((ratio, name, id)); - } + codec_estimation::( + stats.clone(), + &fastfield_accessor, + &mut estimations, + ); + codec_estimation::( + stats.clone(), + &fastfield_accessor, + &mut estimations, + ); + codec_estimation::( + stats.clone(), + &fastfield_accessor, + &mut estimations, + ); if let Some(broken_estimation) = estimations .iter() .find(|estimation| estimation.0 == f32::NAN)