mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-23 11:40:40 +00:00
rename, add codec_tester
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
use crate::CodecId;
|
||||
use crate::FastFieldCodecReader;
|
||||
use crate::FastFieldCodecSerializer;
|
||||
use crate::FastFieldDataAccess;
|
||||
@@ -100,6 +99,8 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
|
||||
pub struct BitpackedFastFieldSerializer {}
|
||||
|
||||
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
|
||||
const NAME: &'static str = "Bitpacked";
|
||||
const ID: u8 = 1;
|
||||
/// Creates a new fast field serializer.
|
||||
///
|
||||
/// The serializer in fact encode the values by bitpacking
|
||||
@@ -132,10 +133,6 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
|
||||
num_bits as f32 / num_bits_uncompressed as f32
|
||||
}
|
||||
}
|
||||
impl CodecId for BitpackedFastFieldSerializer {
|
||||
const NAME: &'static str = "Bitpacked";
|
||||
const ID: u8 = 1;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -22,6 +22,11 @@ pub trait FastFieldCodecReader: Sized {
|
||||
/// The FastFieldSerializerEstimate trait is required on all variants
|
||||
/// of fast field compressions, to decide which one to choose.
|
||||
pub trait FastFieldCodecSerializer {
|
||||
/// A codex needs to provide a unique name and id, which is
|
||||
/// used for debugging and de/serialization.
|
||||
const NAME: &'static str;
|
||||
const ID: u8;
|
||||
|
||||
/// returns an estimate of the compression ratio. if the compressor is unable to handle the
|
||||
/// data it needs to return f32::MAX.
|
||||
/// The baseline is uncompressed 64bit data.
|
||||
@@ -39,15 +44,6 @@ pub trait FastFieldCodecSerializer {
|
||||
) -> io::Result<()>;
|
||||
}
|
||||
|
||||
/// `CodecId` is required by each Codec.
|
||||
///
|
||||
/// It needs to provide a unique name and id, which is
|
||||
/// used for debugging and de/serialization.
|
||||
pub trait CodecId {
|
||||
const NAME: &'static str;
|
||||
const ID: u8;
|
||||
}
|
||||
|
||||
/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
|
||||
pub trait FastFieldDataAccess: Clone {
|
||||
/// Return the value associated to the given document.
|
||||
@@ -82,16 +78,20 @@ impl FastFieldDataAccess for Vec<u64> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
bitpacked::BitpackedFastFieldSerializer, linearinterpol::LinearInterpolFastFieldSerializer,
|
||||
multilinearinterpol::MultiLinearInterpolFastFieldSerializer,
|
||||
bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
|
||||
linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
|
||||
multilinearinterpol::{
|
||||
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
|
||||
},
|
||||
};
|
||||
|
||||
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
data: &[u64],
|
||||
name: &str,
|
||||
) {
|
||||
if S::estimate(&data, crate::tests::stats_from_vec(&data)) == f32::MAX {
|
||||
return;
|
||||
) -> (f32, f32) {
|
||||
let estimation = S::estimate(&data, crate::tests::stats_from_vec(&data));
|
||||
if estimation == f32::MAX {
|
||||
return (estimation, 0.0);
|
||||
}
|
||||
let mut out = vec![];
|
||||
S::create(
|
||||
@@ -113,6 +113,8 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
let actual_compression = data.len() as f32 / out.len() as f32;
|
||||
return (estimation, actual_compression);
|
||||
}
|
||||
pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
|
||||
let mut data_and_names = vec![];
|
||||
@@ -130,6 +132,35 @@ mod tests {
|
||||
data_and_names
|
||||
}
|
||||
|
||||
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
|
||||
let codec_name = S::NAME;
|
||||
for (data, data_set_name) in get_codec_test_data_sets() {
|
||||
let (estimate, actual) =
|
||||
crate::tests::create_and_validate::<S, R>(&data, data_set_name);
|
||||
let result = if estimate == f32::MAX {
|
||||
"Disabled".to_string()
|
||||
} else {
|
||||
format!("Estimate {:?} Actual {:?} ", estimate, actual)
|
||||
};
|
||||
println!(
|
||||
"Codec {}, DataSet {}, {}",
|
||||
codec_name, data_set_name, result
|
||||
);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_bitpacking() {
|
||||
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_interpolation() {
|
||||
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_multi_interpolation() {
|
||||
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
|
||||
}
|
||||
|
||||
use super::*;
|
||||
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
|
||||
let min_value = data.iter().cloned().min().unwrap_or(0);
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::CodecId;
|
||||
use crate::FastFieldCodecReader;
|
||||
use crate::FastFieldCodecSerializer;
|
||||
use crate::FastFieldDataAccess;
|
||||
@@ -112,6 +111,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
}
|
||||
|
||||
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "LinearInterpol";
|
||||
const ID: u8 = 2;
|
||||
/// Creates a new fast field serializer.
|
||||
fn create(
|
||||
write: &mut impl Write,
|
||||
@@ -228,11 +229,6 @@ fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
|
||||
}
|
||||
}
|
||||
|
||||
impl CodecId for LinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "LinearInterpol";
|
||||
const ID: u8 = 2;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::CodecId;
|
||||
use crate::FastFieldCodecReader;
|
||||
use crate::FastFieldCodecSerializer;
|
||||
use crate::FastFieldDataAccess;
|
||||
@@ -44,7 +43,7 @@ impl<W: io::Write> io::Write for TrackWriteSize<W> {
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
#[derive(Clone)]
|
||||
pub struct MultiLinearinterpolFastFieldReader {
|
||||
pub struct MultiLinearInterpolFastFieldReader {
|
||||
pub footer: MultiLinearInterpolFooter,
|
||||
}
|
||||
|
||||
@@ -164,7 +163,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
|
||||
&interpolations[get_interpolation_position(doc)]
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader {
|
||||
impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
|
||||
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
|
||||
@@ -172,7 +171,7 @@ impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader {
|
||||
let (_data, mut footer) = bytes.split_at(bytes.len() - (4 + footer_len) as usize);
|
||||
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
|
||||
|
||||
Ok(MultiLinearinterpolFastFieldReader { footer })
|
||||
Ok(MultiLinearInterpolFastFieldReader { footer })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -211,6 +210,8 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
pub struct MultiLinearInterpolFastFieldSerializer {}
|
||||
|
||||
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "MultiLinearInterpol";
|
||||
const ID: u8 = 3;
|
||||
/// Creates a new fast field serializer.
|
||||
fn create(
|
||||
write: &mut impl Write,
|
||||
@@ -378,11 +379,6 @@ fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
|
||||
}
|
||||
}
|
||||
|
||||
impl CodecId for MultiLinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "MultiLinearInterpol";
|
||||
const ID: u8 = 3;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -391,7 +387,7 @@ mod tests {
|
||||
fn create_and_validate(data: &[u64], name: &str) {
|
||||
crate::tests::create_and_validate::<
|
||||
MultiLinearInterpolFastFieldSerializer,
|
||||
MultiLinearinterpolFastFieldReader,
|
||||
MultiLinearInterpolFastFieldReader,
|
||||
>(&data, name);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
|
||||
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::multilinearinterpol::MultiLinearinterpolFastFieldReader;
|
||||
use fastfield_codecs::CodecId;
|
||||
use fastfield_codecs::FastFieldCodecReader;
|
||||
use fastfield_codecs::FastFieldCodecSerializer;
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::path::Path;
|
||||
|
||||
Reference in New Issue
Block a user