diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 602c7fda0..deb3e7943 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -90,6 +90,7 @@ pub struct FastFieldStats { pub num_vals: u64, } +#[cfg(test)] impl<'a> Column for &'a [u64] { fn get_val(&self, position: u64) -> u64 { self[position as usize] @@ -112,26 +113,6 @@ impl<'a> Column for &'a [u64] { } } -impl Column for Vec { - fn get_val(&self, position: u64) -> u64 { - self[position as usize] - } - fn iter<'b>(&'b self) -> Box + 'b> { - Box::new((self as &[u64]).iter().cloned()) - } - fn min_value(&self) -> u64 { - self.iter().min().unwrap_or(0) - } - - fn max_value(&self) -> u64 { - self.iter().max().unwrap_or(0) - } - - fn num_vals(&self) -> u64 { - self.len() as u64 - } -} - #[cfg(test)] mod tests { use proptest::arbitrary::any; @@ -235,6 +216,7 @@ mod tests { fn estimation_good_interpolation_case() { let data = (10..=20000_u64).collect::>(); + let data = data.as_slice(); let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap(); assert_le!(linear_interpol_estimation, 0.01); @@ -247,7 +229,7 @@ mod tests { } #[test] fn estimation_test_bad_interpolation_case() { - let data = vec![200, 10, 10, 10, 10, 1000, 20]; + let data: &[u64] = &[200, 10, 10, 10, 10, 1000, 20]; let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap(); assert_le!(linear_interpol_estimation, 0.32); @@ -259,6 +241,7 @@ mod tests { fn estimation_test_bad_interpolation_case_monotonically_increasing() { let mut data: Vec = (200..=20000_u64).collect(); data.push(1_000_000); + let data = data.as_slice(); // in this case the linear interpolation can't in fact not be worse than bitpacking, // but the estimator adds some threshold, which leads to estimated worse behavior diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 848392b66..cbe5b3198 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -3,9 +3,33 @@ extern crate prettytable; use fastfield_codecs::bitpacked::BitpackedCodec; use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec; use fastfield_codecs::linear::LinearCodec; -use fastfield_codecs::{FastFieldCodec, FastFieldCodecType, FastFieldStats}; +use fastfield_codecs::{Column, FastFieldCodec, FastFieldCodecType, FastFieldStats}; use prettytable::{Cell, Row, Table}; +struct Data<'a>(&'a [u64]); + +impl<'a> Column for Data<'a> { + fn get_val(&self, position: u64) -> u64 { + self.0[position as usize] + } + + fn iter<'b>(&'b self) -> Box + 'b> { + Box::new(self.0.iter().cloned()) + } + + fn min_value(&self) -> u64 { + *self.0.iter().min().unwrap_or(&0) + } + + fn max_value(&self) -> u64 { + *self.0.iter().max().unwrap_or(&0) + } + + fn num_vals(&self) -> u64 { + self.0.len() as u64 + } +} + fn main() { let mut table = Table::new(); @@ -86,10 +110,11 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { pub fn serialize_with_codec( data: &[u64], ) -> Option<(f32, f32, FastFieldCodecType)> { + let data = Data(data); let estimation = C::estimate(&data)?; let mut out = Vec::new(); C::serialize(&mut out, &data).unwrap(); - let actual_compression = out.len() as f32 / (data.len() * 8) as f32; + let actual_compression = out.len() as f32 / (data.num_vals() * 8) as f32; Some((estimation, actual_compression, C::CODEC_TYPE)) } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index a3d6f9408..fa2bac324 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -133,7 +133,7 @@ impl TermOrdinalMapping { fn max_term_ord(&self) -> TermOrdinal { self.per_segment_new_term_ordinals .iter() - .flat_map(|term_ordinals| term_ordinals.iter().max()) + .flat_map(|term_ordinals| term_ordinals.iter().max().cloned()) .max() .unwrap_or_default() } @@ -784,7 +784,7 @@ impl IndexMerger { let new_doc_id: DocId = self.offsets .iter() - .position(|offset| offset > pos) + .position(|&offset| offset > pos) .expect("pos is out of bounds") as DocId - 1u32;