Add float dataset for comparing fast field codec.

This commit is contained in:
François Massot
2021-11-30 11:11:57 +01:00
parent 33bed01168
commit 245ed5fed1
2 changed files with 21 additions and 1 deletions

View File

@@ -1,4 +1,4 @@
DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids
DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids nooc_temperatures.txt
download:
@echo "--- Downloading datasets ---"
mkdir -p datasets

View File

@@ -1,5 +1,6 @@
#[macro_use]
extern crate prettytable;
use common::f64_to_u64;
use fastfield_codecs::bitpacked::BitpackedFastFieldReader;
use fastfield_codecs::frame_of_reference::{
FrameOfReferenceFastFieldReader, FramedOfReferenceFastFieldSerializer,
@@ -146,6 +147,9 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
data.sort_unstable();
data_and_names.push((data, "Amazon review product ids SORTED"));
let data = load_float_dataset("datasets/nooc_temperatures.txt");
data_and_names.push((data.clone(), "Temperatures"));
data_and_names
}
@@ -161,6 +165,22 @@ pub fn load_dataset(file_path: &str) -> Vec<u64> {
data
}
pub fn load_float_dataset(file_path: &str) -> Vec<u64> {
println!("Load float dataset from `{}`", file_path);
let file = File::open(file_path).expect("Error when opening file.");
let lines = io::BufReader::new(file).lines();
let mut data = Vec::new();
for line in lines {
let line_string = line.unwrap();
let value = line_string.parse::<f64>().unwrap();
let bytes = value.to_le_bytes();
let u64_value = u64::from_le_bytes(bytes);
data.push(u64_value);
}
println!("len {}, {} {}", data.len(), data[0], data[1]);
data
}
pub fn serialize_with_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
) -> (bool, f32, f32, &'static str, Duration, Duration) {