mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-11 11:32:54 +00:00
Add float dataset for comparing fast field codec.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids
|
||||
DATASETS ?= hdfs_logs_timestamps http_logs_timestamps amazon_reviews_product_ids nooc_temperatures.txt
|
||||
download:
|
||||
@echo "--- Downloading datasets ---"
|
||||
mkdir -p datasets
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#[macro_use]
|
||||
extern crate prettytable;
|
||||
use common::f64_to_u64;
|
||||
use fastfield_codecs::bitpacked::BitpackedFastFieldReader;
|
||||
use fastfield_codecs::frame_of_reference::{
|
||||
FrameOfReferenceFastFieldReader, FramedOfReferenceFastFieldSerializer,
|
||||
@@ -146,6 +147,9 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
|
||||
data.sort_unstable();
|
||||
data_and_names.push((data, "Amazon review product ids SORTED"));
|
||||
|
||||
let data = load_float_dataset("datasets/nooc_temperatures.txt");
|
||||
data_and_names.push((data.clone(), "Temperatures"));
|
||||
|
||||
data_and_names
|
||||
}
|
||||
|
||||
@@ -161,6 +165,22 @@ pub fn load_dataset(file_path: &str) -> Vec<u64> {
|
||||
data
|
||||
}
|
||||
|
||||
pub fn load_float_dataset(file_path: &str) -> Vec<u64> {
|
||||
println!("Load float dataset from `{}`", file_path);
|
||||
let file = File::open(file_path).expect("Error when opening file.");
|
||||
let lines = io::BufReader::new(file).lines();
|
||||
let mut data = Vec::new();
|
||||
for line in lines {
|
||||
let line_string = line.unwrap();
|
||||
let value = line_string.parse::<f64>().unwrap();
|
||||
let bytes = value.to_le_bytes();
|
||||
let u64_value = u64::from_le_bytes(bytes);
|
||||
data.push(u64_value);
|
||||
}
|
||||
println!("len {}, {} {}", data.len(), data[0], data[1]);
|
||||
data
|
||||
}
|
||||
|
||||
pub fn serialize_with_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
data: &[u64],
|
||||
) -> (bool, f32, f32, &'static str, Duration, Duration) {
|
||||
|
||||
Reference in New Issue
Block a user