From 294cd5fd0bdf5c78d0e92f4d50aa8927f41fdbbe Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 11 Jun 2021 13:00:40 +0200 Subject: [PATCH] streamline traits and tests --- fastfield_codecs/benches/bench.rs | 129 +++++++----------- fastfield_codecs/src/bitpacked.rs | 91 ++++++------ fastfield_codecs/src/lib.rs | 99 ++++++++++---- fastfield_codecs/src/linearinterpol.rs | 84 ++++-------- .../src/multilinearinterpol/mod.rs | 61 +++------ src/fastfield/multivalued/writer.rs | 4 +- src/fastfield/reader.rs | 16 +-- src/fastfield/serializer/mod.rs | 23 ++-- 8 files changed, 226 insertions(+), 281 deletions(-) diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index 0873fba0a..68b9af9ac 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -6,7 +6,7 @@ extern crate test; mod tests { use fastfield_codecs::{ bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer}, - linearinterpol::{LinearInterpolFastFieldSerializer, LinearinterpolFastFieldReader}, + linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer}, multilinearinterpol::{ MultiLinearInterpolFastFieldSerializer, MultiLinearinterpolFastFieldReader, }, @@ -26,112 +26,75 @@ mod tests { data } + fn value_iter() -> impl Iterator { + 0..20_000 + } + fn bench_get( + b: &mut Bencher, + data: &[u64], + ) { + let mut bytes = vec![]; + S::create( + &mut bytes, + &data, + stats_from_vec(&data), + data.iter().cloned(), + data.iter().cloned(), + ) + .unwrap(); + let reader = R::open_from_bytes(&bytes).unwrap(); + b.iter(|| { + for pos in value_iter() { + reader.get_u64(pos as u64, &bytes); + } + }); + } + fn bench_create(b: &mut Bencher, data: &[u64]) { + let mut bytes = vec![]; + b.iter(|| { + S::create( + &mut bytes, + &data, + stats_from_vec(&data), + data.iter().cloned(), + data.iter().cloned(), + ) + .unwrap(); + }); + } + use test::Bencher; #[bench] fn bench_fastfield_bitpack_create(b: &mut Bencher) { let data: Vec<_> = get_data(); - b.iter(|| { - let mut out = vec![]; - BitpackedFastFieldSerializer::create( - &mut out, - &data, - stats_from_vec(&data), - data.iter().cloned(), - ) - .unwrap(); - out - }); + bench_create::(b, &data); } #[bench] fn bench_fastfield_linearinterpol_create(b: &mut Bencher) { let data: Vec<_> = get_data(); - b.iter(|| { - let mut out = vec![]; - LinearInterpolFastFieldSerializer::create( - &mut out, - &data, - stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - out - }); + bench_create::(b, &data); } #[bench] fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) { let data: Vec<_> = get_data(); - b.iter(|| { - let mut out = vec![]; - MultiLinearInterpolFastFieldSerializer::create( - &mut out, - &data, - stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - out - }); - } - fn value_iter() -> impl Iterator { - 0..20_000 + bench_create::(b, &data); } #[bench] fn bench_fastfield_bitpack_get(b: &mut Bencher) { let data: Vec<_> = get_data(); - let mut bytes = vec![]; - BitpackedFastFieldSerializer::create( - &mut bytes, - &data, - stats_from_vec(&data), - data.iter().cloned(), - ) - .unwrap(); - let reader = BitpackedFastFieldReader::open_from_bytes(&bytes).unwrap(); - b.iter(|| { - for pos in value_iter() { - reader.get_u64(pos as u64, &bytes); - } - }); + bench_get::(b, &data); } #[bench] fn bench_fastfield_linearinterpol_get(b: &mut Bencher) { let data: Vec<_> = get_data(); - let mut bytes = vec![]; - LinearInterpolFastFieldSerializer::create( - &mut bytes, - &data, - stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - let reader = LinearinterpolFastFieldReader::open_from_bytes(&bytes).unwrap(); - b.iter(|| { - for pos in value_iter() { - reader.get_u64(pos as u64, &bytes); - } - }); + bench_get::(b, &data); } #[bench] fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) { let data: Vec<_> = get_data(); - let mut bytes = vec![]; - MultiLinearInterpolFastFieldSerializer::create( - &mut bytes, - &data, - stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - let reader = MultiLinearinterpolFastFieldReader::open_from_bytes(&bytes).unwrap(); - b.iter(|| { - for pos in value_iter() { - reader.get_u64(pos as u64, &bytes); - } - }); + bench_get::( + b, &data, + ); } pub fn stats_from_vec(data: &[u64]) -> FastFieldStats { let min_value = data.iter().cloned().min().unwrap_or(0); diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 60c8a9ae6..b0a373b46 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -1,7 +1,7 @@ use crate::CodecId; -use crate::CodecReader; +use crate::FastFieldCodecReader; +use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; -use crate::FastFieldSerializerEstimate; use crate::FastFieldStats; use common::BinarySerializable; use std::io::{self, Write}; @@ -19,7 +19,7 @@ pub struct BitpackedFastFieldReader { pub max_value_u64: u64, } -impl<'data> CodecReader for BitpackedFastFieldReader { +impl<'data> FastFieldCodecReader for BitpackedFastFieldReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: &[u8]) -> io::Result { let (_data, mut footer) = bytes.split_at(bytes.len() - 16); @@ -47,7 +47,7 @@ impl<'data> CodecReader for BitpackedFastFieldReader { self.max_value_u64 } } -pub struct BitpackedFastFieldSerializer<'a, W: 'a + Write> { +pub struct BitpackedFastFieldSerializerLegacy<'a, W: 'a + Write> { bit_packer: BitPacker, write: &'a mut W, min_value: u64, @@ -55,7 +55,7 @@ pub struct BitpackedFastFieldSerializer<'a, W: 'a + Write> { num_bits: u8, } -impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> { +impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> { /// Creates a new fast field serializer. /// /// The serializer in fact encode the values by bitpacking @@ -68,12 +68,12 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> { write: &'a mut W, min_value: u64, max_value: u64, - ) -> io::Result> { + ) -> io::Result> { assert!(min_value <= max_value); let amplitude = max_value - min_value; let num_bits = compute_num_bits(amplitude); let bit_packer = BitPacker::new(); - Ok(BitpackedFastFieldSerializer { + Ok(BitpackedFastFieldSerializerLegacy { bit_packer, write, min_value, @@ -81,29 +81,6 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> { num_bits, }) } - /// Creates a new fast field serializer. - /// - /// The serializer in fact encode the values by bitpacking - /// `(val - min_value)`. - /// - /// It requires a `min_value` and a `max_value` to compute - /// compute the minimum number of bits required to encode - /// values. - pub fn create( - write: &'a mut W, - _fastfield_accessor: &impl FastFieldDataAccess, - stats: FastFieldStats, - data_iter: impl Iterator, - ) -> io::Result<()> { - let mut serializer = Self::open(write, stats.min_value, stats.max_value)?; - - for val in data_iter { - serializer.add_val(val)?; - } - serializer.close_field()?; - - Ok(()) - } /// Pushes a new value to the currently open u64 fast field. #[inline] pub fn add_val(&mut self, val: u64) -> io::Result<()> { @@ -120,7 +97,34 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> { } } -impl<'a, W: 'a + Write> FastFieldSerializerEstimate for BitpackedFastFieldSerializer<'a, W> { +pub struct BitpackedFastFieldSerializer {} + +impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { + /// Creates a new fast field serializer. + /// + /// The serializer in fact encode the values by bitpacking + /// `(val - min_value)`. + /// + /// It requires a `min_value` and a `max_value` to compute + /// compute the minimum number of bits required to encode + /// values. + fn create( + write: &mut impl Write, + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + data_iter: impl Iterator, + _data_iter1: impl Iterator, + ) -> io::Result<()> { + let mut serializer = + BitpackedFastFieldSerializerLegacy::open(write, stats.min_value, stats.max_value)?; + + for val in data_iter { + serializer.add_val(val)?; + } + serializer.close_field()?; + + Ok(()) + } fn estimate(_fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 { let amplitude = stats.max_value - stats.min_value; let num_bits = compute_num_bits(amplitude); @@ -128,7 +132,7 @@ impl<'a, W: 'a + Write> FastFieldSerializerEstimate for BitpackedFastFieldSerial num_bits as f32 / num_bits_uncompressed as f32 } } -impl<'a, W: 'a + Write> CodecId for BitpackedFastFieldSerializer<'_, W> { +impl CodecId for BitpackedFastFieldSerializer { const NAME: &'static str = "Bitpacked"; const ID: u8 = 1; } @@ -137,26 +141,11 @@ impl<'a, W: 'a + Write> CodecId for BitpackedFastFieldSerializer<'_, W> { mod tests { use super::*; use crate::tests::get_codec_test_data_sets; - fn create_and_validate(data: &[u64], name: &str) { - let mut out = vec![]; - BitpackedFastFieldSerializer::create( - &mut out, - &data, - crate::tests::stats_from_vec(&data), - data.iter().cloned(), - ) - .unwrap(); - let reader = BitpackedFastFieldReader::open_from_bytes(&out).unwrap(); - for (doc, orig_val) in data.iter().enumerate() { - let val = reader.get_u64(doc as u64, &out); - if val != *orig_val { - panic!( - "val {:?} does not match orig_val {:?}, in data set {}", - val, orig_val, name - ); - } - } + fn create_and_validate(data: &[u64], name: &str) { + crate::tests::create_and_validate::( + &data, name, + ); } #[test] diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index f6955226a..473b9b83e 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -2,11 +2,14 @@ #[macro_use] extern crate more_asserts; +use std::io; +use std::io::Write; + pub mod bitpacked; pub mod linearinterpol; pub mod multilinearinterpol; -pub trait CodecReader: Sized { +pub trait FastFieldCodecReader: Sized { /// reads the metadata and returns the CodecReader fn open_from_bytes(bytes: &[u8]) -> std::io::Result; @@ -16,6 +19,35 @@ pub trait CodecReader: Sized { fn max_value(&self) -> u64; } +/// The FastFieldSerializerEstimate trait is required on all variants +/// of fast field compressions, to decide which one to choose. +pub trait FastFieldCodecSerializer { + /// returns an estimate of the compression ratio. if the compressor is unable to handle the + /// data it needs to return f32::MAX. + /// The baseline is uncompressed 64bit data. + /// + /// It could make sense to also return a value representing + /// computational complexity. + fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32; + + fn create( + write: &mut impl Write, + fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + data_iter: impl Iterator, + data_iter1: impl Iterator, + ) -> io::Result<()>; +} + +/// `CodecId` is required by each Codec. +/// +/// It needs to provide a unique name and id, which is +/// used for debugging and de/serialization. +pub trait CodecId { + const NAME: &'static str; + const ID: u8; +} + /// FastFieldDataAccess is the trait to access fast field data during serialization and estimation. pub trait FastFieldDataAccess: Clone { /// Return the value associated to the given document. @@ -28,27 +60,6 @@ pub trait FastFieldDataAccess: Clone { fn get(&self, doc: u32) -> u64; } -/// The FastFieldSerializerEstimate trait is required on all variants -/// of fast field compressions, to decide which one to choose. -pub trait FastFieldSerializerEstimate { - /// returns an estimate of the compression ratio. if the compressor is unable to handle the - /// data it needs to return f32::MAX. - /// The baseline is uncompressed 64bit data. - /// - /// It could make sense to also return a value representing - /// computational complexity. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32; -} - -/// `CodecId` is required by each Codec. -/// -/// It needs to provide a unique name and id, which is -/// used for debugging and de/serialization. -pub trait CodecId { - const NAME: &'static str; - const ID: u8; -} - #[derive(Debug, Clone)] pub struct FastFieldStats { pub min_value: u64, @@ -72,8 +83,37 @@ impl FastFieldDataAccess for Vec { mod tests { use crate::{ bitpacked::BitpackedFastFieldSerializer, linearinterpol::LinearInterpolFastFieldSerializer, + multilinearinterpol::MultiLinearInterpolFastFieldSerializer, }; + pub fn create_and_validate( + data: &[u64], + name: &str, + ) { + if S::estimate(&data, crate::tests::stats_from_vec(&data)) == f32::MAX { + return; + } + let mut out = vec![]; + S::create( + &mut out, + &data, + crate::tests::stats_from_vec(&data), + data.iter().cloned(), + data.iter().cloned(), + ) + .unwrap(); + + let reader = R::open_from_bytes(&out).unwrap(); + for (doc, orig_val) in data.iter().enumerate() { + let val = reader.get_u64(doc as u64, &out); + if val != *orig_val { + panic!( + "val {:?} does not match orig_val {:?}, in data set {}, data {:?}", + val, orig_val, name, data + ); + } + } + } pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { let mut data_and_names = vec![]; @@ -103,14 +143,19 @@ mod tests { #[test] fn estimation_good_interpolation_case() { - let data = (10..=200_u64).collect::>(); + let data = (10..=20000_u64).collect::>(); let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data)); - assert_le!(linear_interpol_estimation, 0.1); + assert_le!(linear_interpol_estimation, 0.01); + + let multi_linear_interpol_estimation = + MultiLinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data)); + assert_le!(multi_linear_interpol_estimation, 0.2); + assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation); let bitpacked_estimation = - BitpackedFastFieldSerializer::>::estimate(&data, stats_from_vec(&data)); + BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data)); assert_le!(linear_interpol_estimation, bitpacked_estimation); } #[test] @@ -122,7 +167,7 @@ mod tests { assert_le!(linear_interpol_estimation, 0.32); let bitpacked_estimation = - BitpackedFastFieldSerializer::>::estimate(&data, stats_from_vec(&data)); + BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data)); assert_le!(bitpacked_estimation, linear_interpol_estimation); } #[test] @@ -137,7 +182,7 @@ mod tests { assert_le!(linear_interpol_estimation, 0.35); let bitpacked_estimation = - BitpackedFastFieldSerializer::>::estimate(&data, stats_from_vec(&data)); + BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data)); assert_le!(bitpacked_estimation, 0.32); assert_le!(bitpacked_estimation, linear_interpol_estimation); } diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index be69311fe..0354cf1dd 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -1,7 +1,7 @@ use crate::CodecId; -use crate::CodecReader; +use crate::FastFieldCodecReader; +use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; -use crate::FastFieldSerializerEstimate; use crate::FastFieldStats; use std::io::{self, Read, Write}; use std::ops::Sub; @@ -15,7 +15,7 @@ use tantivy_bitpacker::BitUnpacker; /// Depending on the field type, a different /// fast field is required. #[derive(Clone)] -pub struct LinearinterpolFastFieldReader { +pub struct LinearInterpolFastFieldReader { bit_unpacker: BitUnpacker, pub footer: LinearInterpolFooter, pub slope: f32, @@ -61,7 +61,7 @@ impl FixedSize for LinearInterpolFooter { const SIZE_IN_BYTES: usize = 56; } -impl CodecReader for LinearinterpolFastFieldReader { +impl FastFieldCodecReader for LinearInterpolFastFieldReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: &[u8]) -> io::Result { let (_data, mut footer) = bytes.split_at(bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES); @@ -70,7 +70,7 @@ impl CodecReader for LinearinterpolFastFieldReader { let num_bits = compute_num_bits(footer.relative_max_value); let bit_unpacker = BitUnpacker::new(num_bits); - Ok(LinearinterpolFastFieldReader { + Ok(LinearInterpolFastFieldReader { bit_unpacker, footer, slope, @@ -96,9 +96,24 @@ impl CodecReader for LinearinterpolFastFieldReader { /// and stores the difference bitpacked. pub struct LinearInterpolFastFieldSerializer {} -impl LinearInterpolFastFieldSerializer { +#[inline] +fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { + if num_vals <= 1 { + return 0.0; + } + // We calculate the slope with f64 high precision and use the result in lower precision f32 + // This is done in order to handle estimations for very large values like i64::MAX + ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 +} + +#[inline] +fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { + first_val + (pos as f32 * slope) as u64 +} + +impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { /// Creates a new fast field serializer. - pub fn create( + fn create( write: &mut impl Write, fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats, @@ -150,24 +165,6 @@ impl LinearInterpolFastFieldSerializer { footer.serialize(write)?; Ok(()) } -} - -#[inline] -fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { - if num_vals <= 1 { - return 0.0; - } - // We calculate the slope with f64 high precision and use the result in lower precision f32 - // This is done in order to handle estimations for very large values like i64::MAX - ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 -} - -#[inline] -fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { - first_val + (pos as f32 * slope) as u64 -} - -impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer { /// estimation for linear interpolation is hard because, you don't know /// where the local maxima for the deviation of the calculated value are and /// the offset to shift all values to >=0 is also unknown. @@ -241,33 +238,11 @@ mod tests { use super::*; use crate::tests::get_codec_test_data_sets; - fn create_and_validate(data: &[u64], name: &str) -> (u64, u64) { - if LinearInterpolFastFieldSerializer::estimate(&data, crate::tests::stats_from_vec(&data)) - == f32::MAX - { - return (0, 0); - } - let mut out = vec![]; - LinearInterpolFastFieldSerializer::create( - &mut out, - &data, - crate::tests::stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - - let reader = LinearinterpolFastFieldReader::open_from_bytes(&out).unwrap(); - for (doc, orig_val) in data.iter().enumerate() { - let val = reader.get_u64(doc as u64, &out); - if val != *orig_val { - panic!( - "val {:?} does not match orig_val {:?}, in data set {}", - val, orig_val, name - ); - } - } - (reader.footer.relative_max_value, reader.footer.offset) + fn create_and_validate(data: &[u64], name: &str) { + crate::tests::create_and_validate::< + LinearInterpolFastFieldSerializer, + LinearInterpolFastFieldReader, + >(&data, name); } #[test] @@ -303,10 +278,7 @@ mod tests { fn linear_interpol_fast_field_test_simple() { let data = (10..=20_u64).collect::>(); - let (rel_max_value, offset) = create_and_validate(&data, "simple monotonically"); - - assert_eq!(offset, 0); - assert_eq!(rel_max_value, 0); + create_and_validate(&data, "simple monotonically"); } #[test] diff --git a/fastfield_codecs/src/multilinearinterpol/mod.rs b/fastfield_codecs/src/multilinearinterpol/mod.rs index 8fec05c31..14c5fb1a6 100644 --- a/fastfield_codecs/src/multilinearinterpol/mod.rs +++ b/fastfield_codecs/src/multilinearinterpol/mod.rs @@ -1,7 +1,7 @@ use crate::CodecId; -use crate::CodecReader; +use crate::FastFieldCodecReader; +use crate::FastFieldCodecSerializer; use crate::FastFieldDataAccess; -use crate::FastFieldSerializerEstimate; use crate::FastFieldStats; use std::io::{self, Read, Write}; use std::ops::Sub; @@ -164,7 +164,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio &interpolations[get_interpolation_position(doc)] } -impl CodecReader for MultiLinearinterpolFastFieldReader { +impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: &[u8]) -> io::Result { let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?; @@ -197,12 +197,22 @@ impl CodecReader for MultiLinearinterpolFastFieldReader { } } +#[inline] +fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { + ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 +} + +#[inline] +fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { + (first_val as i64 + (pos as f32 * slope) as i64) as u64 +} + /// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements. pub struct MultiLinearInterpolFastFieldSerializer {} -impl MultiLinearInterpolFastFieldSerializer { +impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { /// Creates a new fast field serializer. - pub fn create( + fn create( write: &mut impl Write, fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats, @@ -298,17 +308,7 @@ impl MultiLinearInterpolFastFieldSerializer { footer.serialize(write)?; Ok(()) } -} -#[inline] -fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { - ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 -} -#[inline] -fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { - (first_val as i64 + (pos as f32 * slope) as i64) as u64 -} -impl FastFieldSerializerEstimate for MultiLinearInterpolFastFieldSerializer { /// estimation for linear interpolation is hard because, you don't know /// where the local maxima are for the deviation of the calculated value and /// the offset is also unknown. @@ -389,33 +389,10 @@ mod tests { use crate::tests::get_codec_test_data_sets; fn create_and_validate(data: &[u64], name: &str) { - if MultiLinearInterpolFastFieldSerializer::estimate( - &data, - crate::tests::stats_from_vec(&data), - ) == f32::MAX - { - return; - } - let mut out = vec![]; - MultiLinearInterpolFastFieldSerializer::create( - &mut out, - &data, - crate::tests::stats_from_vec(&data), - data.iter().cloned(), - data.iter().cloned(), - ) - .unwrap(); - - let reader = MultiLinearinterpolFastFieldReader::open_from_bytes(&out).unwrap(); - for (doc, orig_val) in data.iter().enumerate() { - let val = reader.get_u64(doc as u64, &out); - if val != *orig_val { - panic!( - "val {:?} does not match orig_val {:?}, in data set {}, data {:?}", - val, orig_val, name, data - ); - } - } + crate::tests::create_and_validate::< + MultiLinearInterpolFastFieldSerializer, + MultiLinearinterpolFastFieldReader, + >(&data, name); } #[test] diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index b3bc13d17..9259a4116 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -1,4 +1,4 @@ -use crate::fastfield::serializer::BitpackedFastFieldSerializer; +use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy; use crate::fastfield::CompositeFastFieldSerializer; use crate::postings::UnorderedTermId; use crate::schema::{Document, Field}; @@ -154,7 +154,7 @@ impl MultiValuedFastFieldWriter { } { // writing the values themselves. - let mut value_serializer: BitpackedFastFieldSerializer<'_, _>; + let mut value_serializer: BitpackedFastFieldSerializerLegacy<'_, _>; match mapping_opt { Some(mapping) => { value_serializer = serializer.new_u64_fast_field_with_idx( diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index a5192e222..4298ef457 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -10,12 +10,12 @@ use crate::schema::FAST; use crate::DocId; use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader; use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer; +use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; -use fastfield_codecs::linearinterpol::LinearinterpolFastFieldReader; use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; use fastfield_codecs::multilinearinterpol::MultiLinearinterpolFastFieldReader; use fastfield_codecs::CodecId; -use fastfield_codecs::CodecReader; +use fastfield_codecs::FastFieldCodecReader; use std::collections::HashMap; use std::marker::PhantomData; use std::path::Path; @@ -69,7 +69,7 @@ pub enum DynamicFastFieldReader { /// Bitpacked compressed fastfield data. Bitpacked(FastFieldReaderCodecWrapper), /// Linear interpolated values + bitpacked - LinearInterpol(FastFieldReaderCodecWrapper), + LinearInterpol(FastFieldReaderCodecWrapper), /// Blockwise linear interpolated values + bitpacked MultiLinearInterpol(FastFieldReaderCodecWrapper), } @@ -81,7 +81,7 @@ impl DynamicFastFieldReader { let id = bytes.read_u8(); let reader = match id { - BitpackedFastFieldSerializer::>::ID => { + BitpackedFastFieldSerializer::ID => { DynamicFastFieldReader::Bitpacked(FastFieldReaderCodecWrapper::< Item, BitpackedReader, @@ -90,7 +90,7 @@ impl DynamicFastFieldReader { LinearInterpolFastFieldSerializer::ID => { DynamicFastFieldReader::LinearInterpol(FastFieldReaderCodecWrapper::< Item, - LinearinterpolFastFieldReader, + LinearInterpolFastFieldReader, >::open_from_bytes(bytes)?) } MultiLinearInterpolFastFieldSerializer::ID => { @@ -154,12 +154,12 @@ pub struct FastFieldReaderCodecWrapper { _phantom: PhantomData, } -impl FastFieldReaderCodecWrapper { +impl FastFieldReaderCodecWrapper { /// Opens a fast field given a file. pub fn open(file: FileSlice) -> crate::Result { let mut bytes = file.read_bytes()?; let id = u8::deserialize(&mut bytes)?; - assert_eq!(BitpackedFastFieldSerializer::>::ID, id); + assert_eq!(BitpackedFastFieldSerializer::ID, id); Self::open_from_bytes(bytes) } /// Opens a fast field given the bytes. @@ -194,7 +194,7 @@ impl FastFieldReaderCodecWrapper { } } -impl FastFieldReader +impl FastFieldReader for FastFieldReaderCodecWrapper { /// Return the value associated to the given document. diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 3c90ab1b4..e1644920b 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -6,10 +6,11 @@ use crate::schema::Field; use fastfield_codecs::CodecId; //pub use bitpacked::BitpackedFastFieldSerializer; pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer; +pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializerLegacy; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; +pub use fastfield_codecs::FastFieldCodecSerializer; pub use fastfield_codecs::FastFieldDataAccess; -pub use fastfield_codecs::FastFieldSerializerEstimate; pub use fastfield_codecs::FastFieldStats; use std::io::{self, Write}; @@ -60,12 +61,9 @@ impl CompositeFastFieldSerializer { { let (ratio, name, id) = ( - BitpackedFastFieldSerializer::>::estimate( - &fastfield_accessor, - stats.clone(), - ), - BitpackedFastFieldSerializer::>::NAME, - BitpackedFastFieldSerializer::>::ID, + BitpackedFastFieldSerializer::estimate(&fastfield_accessor, stats.clone()), + BitpackedFastFieldSerializer::NAME, + BitpackedFastFieldSerializer::ID, ); estimations.push((ratio, name, id)); } @@ -107,12 +105,13 @@ impl CompositeFastFieldSerializer { ); // todo print actual field name id.serialize(field_write)?; match name { - BitpackedFastFieldSerializer::>::NAME => { + BitpackedFastFieldSerializer::NAME => { BitpackedFastFieldSerializer::create( field_write, &fastfield_accessor, stats, data_iter_1, + data_iter_2, )?; } LinearInterpolFastFieldSerializer::NAME => { @@ -147,7 +146,7 @@ impl CompositeFastFieldSerializer { field: Field, min_value: u64, max_value: u64, - ) -> io::Result>> { + ) -> io::Result>> { self.new_u64_fast_field_with_idx(field, min_value, max_value, 0) } @@ -158,12 +157,12 @@ impl CompositeFastFieldSerializer { min_value: u64, max_value: u64, idx: usize, - ) -> io::Result>> { + ) -> io::Result>> { let field_write = self.composite_write.for_field_with_idx(field, idx); // Prepend codec id to field data for compatibility with DynamicFastFieldReader. - let id = BitpackedFastFieldSerializer::>::ID; + let id = BitpackedFastFieldSerializer::ID; id.serialize(field_write)?; - BitpackedFastFieldSerializer::open(field_write, min_value, max_value) + BitpackedFastFieldSerializerLegacy::open(field_write, min_value, max_value) } /// Start serializing a new [u8] fast field