diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index 5a4227dca..cabf2e824 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -4,9 +4,222 @@ extern crate test; #[cfg(test)] mod tests { + use std::iter; use std::sync::Arc; use fastfield_codecs::*; + use rand::prelude::*; + + use super::*; + + // Warning: this generates the same permutation at each call + fn generate_permutation() -> Vec { + let mut permutation: Vec = (0u64..100_000u64).collect(); + permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); + permutation + } + + fn generate_random() -> Vec { + let mut permutation: Vec = (0u64..100_000u64) + .map(|el| el + random::() as u64) + .collect(); + permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); + permutation + } + + // Warning: this generates the same permutation at each call + fn generate_permutation_gcd() -> Vec { + let mut permutation: Vec = (1u64..100_000u64).map(|el| el * 1000).collect(); + permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); + permutation + } + + pub fn serialize_and_load( + column: &[T], + ) -> Arc> { + let mut buffer = Vec::new(); + serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap(); + open(OwnedBytes::new(buffer)).unwrap() + } + + #[bench] + fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) { + let permutation = generate_permutation(); + let n = permutation.len(); + b.iter(|| { + let mut a = 0u64; + for _ in 0..n { + a = permutation[a as usize]; + } + a + }); + } + + #[bench] + fn bench_intfastfield_jumpy_fflookup(b: &mut Bencher) { + let permutation = generate_permutation(); + let n = permutation.len(); + let column: Arc> = serialize_and_load(&permutation); + b.iter(|| { + let mut a = 0u64; + for _ in 0..n { + a = column.get_val(a as u64); + } + a + }); + } + + fn get_exp_data() -> Vec { + let mut data = vec![]; + for i in 0..100 { + let num = i * i; + data.extend(iter::repeat(i as u64).take(num)); + } + data.shuffle(&mut StdRng::from_seed([1u8; 32])); + + // lengt = 328350 + data + } + + fn get_data_50percent_item() -> (u128, u128, Vec) { + let mut permutation = get_exp_data(); + let major_item = 20; + let minor_item = 10; + permutation.extend(iter::repeat(major_item).take(permutation.len())); + permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); + let permutation = permutation.iter().map(|el| *el as u128).collect::>(); + (major_item as u128, minor_item as u128, permutation) + } + fn get_u128_column_random() -> Arc> { + let permutation = generate_random(); + let permutation = permutation.iter().map(|el| *el as u128).collect::>(); + get_u128_column_from_data(&permutation) + } + + fn get_u128_column_from_data(data: &[u128]) -> Arc> { + let mut out = vec![]; + serialize_u128(VecColumn::from(&data), &mut out).unwrap(); + let out = OwnedBytes::new(out); + open_u128(out).unwrap() + } + + #[bench] + fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) { + let (major_item, _minor_item, data) = get_data_50percent_item(); + let column = get_u128_column_from_data(&data); + + b.iter(|| column.get_between_vals(major_item..=major_item)); + } + + #[bench] + fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) { + let (_major_item, minor_item, data) = get_data_50percent_item(); + let column = get_u128_column_from_data(&data); + + b.iter(|| column.get_between_vals(minor_item..=minor_item)); + } + + #[bench] + fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) { + let (_major_item, _minor_item, data) = get_data_50percent_item(); + let column = get_u128_column_from_data(&data); + + b.iter(|| column.get_between_vals(0..=u128::MAX)); + } + + #[bench] + fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) { + let column = get_u128_column_random(); + + b.iter(|| { + let mut a = 0u128; + for i in 0u64..column.num_vals() as u64 { + a += column.get_val(i); + } + a + }); + } + + #[bench] + fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) { + let column = get_u128_column_random(); + + b.iter(|| { + let n = column.num_vals(); + let mut a = 0u128; + for i in (0..n / 5).map(|val| val * 5) { + a += column.get_val(i as u64); + } + a + }); + } + + #[bench] + fn bench_intfastfield_stride7_vec(b: &mut Bencher) { + let permutation = generate_permutation(); + let n = permutation.len(); + b.iter(|| { + let mut a = 0u64; + for i in (0..n / 7).map(|val| val * 7) { + a += permutation[i as usize]; + } + a + }); + } + + #[bench] + fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) { + let permutation = generate_permutation(); + let n = permutation.len(); + let column: Arc> = serialize_and_load(&permutation); + b.iter(|| { + let mut a = 0u64; + for i in (0..n / 7).map(|val| val * 7) { + a += column.get_val(i as u64); + } + a + }); + } + + #[bench] + fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) { + let permutation = generate_permutation(); + let n = permutation.len(); + let column: Arc> = serialize_and_load(&permutation); + b.iter(|| { + let mut a = 0u64; + for i in 0u64..n as u64 { + a += column.get_val(i); + } + a + }); + } + + #[bench] + fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) { + let permutation = generate_permutation_gcd(); + let n = permutation.len(); + let column: Arc> = serialize_and_load(&permutation); + b.iter(|| { + let mut a = 0u64; + for i in 0..n as u64 { + a += column.get_val(i); + } + a + }); + } + + #[bench] + fn bench_intfastfield_scan_all_vec(b: &mut Bencher) { + let permutation = generate_permutation(); + b.iter(|| { + let mut a = 0u64; + for i in 0..permutation.len() { + a += permutation[i as usize] as u64; + } + a + }); + } fn get_data() -> Vec { let mut rng = StdRng::seed_from_u64(2u64); diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 85fbb1558..5971ce422 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -11,7 +11,6 @@ use std::io; use std::io::Write; use std::sync::Arc; -use column::ColumnExt; use common::BinarySerializable; use compact_space::CompactSpaceDecompressor; use ownedbytes::OwnedBytes; @@ -30,7 +29,7 @@ mod serialize; use self::bitpacked::BitpackedCodec; use self::blockwise_linear::BlockwiseLinearCodec; -pub use self::column::{monotonic_map_column, Column, VecColumn}; +pub use self::column::{monotonic_map_column, Column, ColumnExt, VecColumn}; pub use self::compact_space::ip_to_u128; use self::linear::LinearCodec; pub use self::monotonic_mapping::MonotonicallyMappableToU64; @@ -340,208 +339,3 @@ mod tests { assert_eq!(count_codec, 3); } } - -#[cfg(all(test, feature = "unstable"))] -mod bench { - use std::iter; - use std::sync::Arc; - - use column::ColumnExt; - use rand::prelude::*; - use test::{self, Bencher}; - - use super::*; - use crate::Column; - - // Warning: this generates the same permutation at each call - fn generate_permutation() -> Vec { - let mut permutation: Vec = (0u64..100_000u64).collect(); - permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); - permutation - } - - // Warning: this generates the same permutation at each call - fn generate_permutation_gcd() -> Vec { - let mut permutation: Vec = (1u64..100_000u64).map(|el| el * 1000).collect(); - permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); - permutation - } - - #[bench] - fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) { - let permutation = generate_permutation(); - let n = permutation.len(); - b.iter(|| { - let mut a = 0u64; - for _ in 0..n { - a = permutation[a as usize]; - } - a - }); - } - - #[bench] - fn bench_intfastfield_jumpy_fflookup(b: &mut Bencher) { - let permutation = generate_permutation(); - let n = permutation.len(); - let column: Arc> = crate::serialize_and_load(&permutation); - b.iter(|| { - let mut a = 0u64; - for _ in 0..n { - a = column.get_val(a as u64); - } - a - }); - } - - fn get_exp_data() -> Vec { - let mut data = vec![]; - for i in 0..100 { - let num = i * i; - data.extend(iter::repeat(i as u64).take(num)); - } - data.shuffle(&mut StdRng::from_seed([1u8; 32])); - - // lengt = 328350 - data - } - - fn get_u128_column_permutation() -> Arc> { - let permutation = generate_permutation(); - let permutation = permutation.iter().map(|el| *el as u128).collect::>(); - get_u128_column(&permutation) - } - fn get_data_50percent_item() -> (u128, u128, Vec) { - let mut permutation = get_exp_data(); - let major_item = 20; - let minor_item = 10; - permutation.extend(iter::repeat(major_item).take(permutation.len())); - permutation.shuffle(&mut StdRng::from_seed([1u8; 32])); - let permutation = permutation.iter().map(|el| *el as u128).collect::>(); - (major_item as u128, minor_item as u128, permutation) - } - fn get_u128_column(data: &[u128]) -> Arc> { - let mut out = vec![]; - serialize_u128(VecColumn::from(&data), &mut out).unwrap(); - let out = OwnedBytes::new(out); - open_u128(out).unwrap() - } - - #[bench] - fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) { - let (major_item, _minor_item, data) = get_data_50percent_item(); - let column = get_u128_column(&data); - - b.iter(|| column.get_between_vals(major_item..=major_item)); - } - - #[bench] - fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) { - let (_major_item, minor_item, data) = get_data_50percent_item(); - let column = get_u128_column(&data); - - b.iter(|| column.get_between_vals(minor_item..=minor_item)); - } - - #[bench] - fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) { - let (_major_item, _minor_item, data) = get_data_50percent_item(); - let column = get_u128_column(&data); - - b.iter(|| column.get_between_vals(0..=u128::MAX)); - } - - #[bench] - fn bench_intfastfield_jumpy_fflookup_u128(b: &mut Bencher) { - let column = get_u128_column_permutation(); - - b.iter(|| { - let mut a = 0u128; - for _ in 0..column.num_vals() { - a = column.get_val(a as u64); - } - a - }); - } - - #[bench] - fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) { - let column = get_u128_column_permutation(); - - b.iter(|| { - let n = column.num_vals(); - let mut a = 0u128; - for i in (0..n / 5).map(|val| val * 5) { - a += column.get_val(i as u64); - } - a - }); - } - - #[bench] - fn bench_intfastfield_stride7_vec(b: &mut Bencher) { - let permutation = generate_permutation(); - let n = permutation.len(); - b.iter(|| { - let mut a = 0u64; - for i in (0..n / 7).map(|val| val * 7) { - a += permutation[i as usize]; - } - a - }); - } - - #[bench] - fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) { - let permutation = generate_permutation(); - let n = permutation.len(); - let column: Arc> = crate::serialize_and_load(&permutation); - b.iter(|| { - let mut a = 0u64; - for i in (0..n / 7).map(|val| val * 7) { - a += column.get_val(i as u64); - } - a - }); - } - - #[bench] - fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) { - let permutation = generate_permutation(); - let n = permutation.len(); - let column: Arc> = crate::serialize_and_load(&permutation); - b.iter(|| { - let mut a = 0u64; - for i in 0u64..n as u64 { - a += column.get_val(i); - } - a - }); - } - - #[bench] - fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) { - let permutation = generate_permutation_gcd(); - let n = permutation.len(); - let column: Arc> = crate::serialize_and_load(&permutation); - b.iter(|| { - let mut a = 0u64; - for i in 0..n as u64 { - a += column.get_val(i); - } - a - }); - } - - #[bench] - fn bench_intfastfield_scan_all_vec(b: &mut Bencher) { - let permutation = generate_permutation(); - b.iter(|| { - let mut a = 0u64; - for i in 0..permutation.len() { - a += permutation[i as usize] as u64; - } - a - }); - } -} diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 28de82f50..d3d9c06f8 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -6,9 +6,7 @@ use std::io::BufRead; use std::net::{IpAddr, Ipv6Addr}; use std::str::FromStr; -use fastfield_codecs::{ - open_u128, serialize_u128, Column, FastFieldCodecType, FastFieldStats, VecColumn, -}; +use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn}; use itertools::Itertools; use measure_time::print_time; use ownedbytes::OwnedBytes;