From 245ed5fed13c8159f2f147c77711966f74510152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Tue, 30 Nov 2021 11:11:57 +0100 Subject: [PATCH] Add float dataset for comparing fast field codec. --- fastfield_codecs/Makefile | 2 +- fastfield_codecs/src/main.rs | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fastfield_codecs/Makefile b/fastfield_codecs/Makefile index 1caeaebcc..4a51fed63 100644 --- a/fastfield_codecs/Makefile +++ b/fastfield_codecs/Makefile @@ -1,4 +1,4 @@ -DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids +DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids nooc_temperatures.txt download: @echo "--- Downloading datasets ---" mkdir -p datasets diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index cc912f565..d84b0dd70 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -1,5 +1,6 @@ #[macro_use] extern crate prettytable; +use common::f64_to_u64; use fastfield_codecs::bitpacked::BitpackedFastFieldReader; use fastfield_codecs::frame_of_reference::{ FrameOfReferenceFastFieldReader, FramedOfReferenceFastFieldSerializer, @@ -146,6 +147,9 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { data.sort_unstable(); data_and_names.push((data, "Amazon review product ids SORTED")); + let data = load_float_dataset("datasets/nooc_temperatures.txt"); + data_and_names.push((data.clone(), "Temperatures")); + data_and_names } @@ -161,6 +165,22 @@ pub fn load_dataset(file_path: &str) -> Vec { data } +pub fn load_float_dataset(file_path: &str) -> Vec { + println!("Load float dataset from `{}`", file_path); + let file = File::open(file_path).expect("Error when opening file."); + let lines = io::BufReader::new(file).lines(); + let mut data = Vec::new(); + for line in lines { + let line_string = line.unwrap(); + let value = line_string.parse::().unwrap(); + let bytes = value.to_le_bytes(); + let u64_value = u64::from_le_bytes(bytes); + data.push(u64_value); + } + println!("len {}, {} {}", data.len(), data[0], data[1]); + data +} + pub fn serialize_with_codec( data: &[u64], ) -> (bool, f32, f32, &'static str, Duration, Duration) {