diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index c30df44e5..5a4227dca 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -28,12 +28,14 @@ mod tests { } fn get_reader_for_bench(data: &[u64]) -> Codec::Reader { let mut bytes = Vec::new(); + let min_value = *data.iter().min().unwrap(); + let data = data.iter().map(|el| *el - min_value).collect::>(); let col = VecColumn::from(&data); let normalized_header = fastfield_codecs::NormalizedHeader { num_vals: col.num_vals(), max_value: col.max_value(), }; - Codec::serialize(&VecColumn::from(data), &mut bytes).unwrap(); + Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap(); Codec::open_from_bytes(OwnedBytes::new(bytes), normalized_header).unwrap() } fn bench_get(b: &mut Bencher, data: &[u64]) { @@ -65,10 +67,13 @@ mod tests { bench_get_dynamic_helper(b, col); } fn bench_create(b: &mut Bencher, data: &[u64]) { + let min_value = *data.iter().min().unwrap(); + let data = data.iter().map(|el| *el - min_value).collect::>(); + let mut bytes = Vec::new(); b.iter(|| { bytes.clear(); - Codec::serialize(&VecColumn::from(data), &mut bytes).unwrap(); + Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap(); }); } diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 8e62c610e..d2adf6500 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -335,13 +335,14 @@ mod tests { #[cfg(all(test, feature = "unstable"))] mod bench { + use std::iter; use std::sync::Arc; + use column::ColumnV2Ext; use rand::prelude::*; use test::{self, Bencher}; use super::*; - use crate::column::ColumnV2; use crate::Column; // Warning: this generates the same permutation at each call @@ -385,31 +386,88 @@ mod bench { }); } - #[bench] - fn bench_intfastfield_jumpy_fflookup_u128(b: &mut Bencher) { + fn get_u128_column_permutation() -> Arc> { let permutation = generate_permutation(); - let n = permutation.len(); - let permutation = permutation.iter().map(|el| *el as u128).collect::>(); - - let compressor = - CompactSpaceCompressor::train_from(permutation.iter().cloned(), permutation.len()); - let data = compressor - .compress(permutation.iter().cloned().map(Some)) - .unwrap(); + let permutation = permutation + .iter() + .map(|el| *el as u128) + .map(Some) + .collect::>(); + get_u128_column(&permutation) + } + fn get_data_50percent_item() -> (u128, u128, Vec>) { + let mut permutation = generate_permutation(); + let major_item = permutation[0]; + let minor_item = permutation[1]; + permutation.extend(iter::repeat(major_item).take(permutation.len())); + permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); + let permutation = permutation + .iter() + .map(|el| Some(*el as u128)) + .collect::>(); + (major_item as u128, minor_item as u128, permutation) + } + fn get_u128_column(data: &[Option]) -> Arc> { + let compressor = CompactSpaceCompressor::train_from(VecColumn::from(&data)); + let data = compressor.compress(data.iter().cloned()).unwrap(); let data = OwnedBytes::new(data); - let column: Arc> = + let column: Arc> = Arc::new(CompactSpaceDecompressor::open(data).unwrap()); + column + } + + #[bench] + fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) { + let (major_item, _minor_item, data) = get_data_50percent_item(); + let column = get_u128_column(&data); + + b.iter(|| column.get_between_vals(major_item..=major_item)); + } + + #[bench] + fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) { + let (_major_item, minor_item, data) = get_data_50percent_item(); + let column = get_u128_column(&data); + + b.iter(|| column.get_between_vals(minor_item..=minor_item)); + } + + #[bench] + fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) { + let (_major_item, _minor_item, data) = get_data_50percent_item(); + let column = get_u128_column(&data); + + b.iter(|| column.get_between_vals(0..=u128::MAX)); + } + + #[bench] + fn bench_intfastfield_jumpy_fflookup_u128(b: &mut Bencher) { + let column = get_u128_column_permutation(); b.iter(|| { let mut a = 0u128; - for _ in 0..n { + for _ in 0..column.num_vals() { a = column.get_val(a as u64).unwrap(); } a }); } + #[bench] + fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) { + let column = get_u128_column_permutation(); + + b.iter(|| { + let n = column.num_vals(); + let mut a = 0u128; + for i in (0..n / 5).map(|val| val * 5) { + a += column.get_val(i as u64).unwrap(); + } + a + }); + } + #[bench] fn bench_intfastfield_stride7_vec(b: &mut Bencher) { let permutation = generate_permutation(); diff --git a/fastfield_codecs/src/serialize.rs b/fastfield_codecs/src/serialize.rs index 4753bb443..abb82eeb1 100644 --- a/fastfield_codecs/src/serialize.rs +++ b/fastfield_codecs/src/serialize.rs @@ -215,7 +215,7 @@ pub fn serialize_and_load( column: &[T], ) -> Arc> { let mut buffer = Vec::new(); - super::serialize(VecColumn::from(column), &mut buffer, &ALL_CODEC_TYPES).unwrap(); + super::serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap(); super::open(OwnedBytes::new(buffer)).unwrap() }