streamline traits and tests

This commit is contained in:
Pascal Seitz
2021-06-11 13:00:40 +02:00
parent f4d271177c
commit 294cd5fd0b
8 changed files with 226 additions and 281 deletions

View File

@@ -6,7 +6,7 @@ extern crate test;
mod tests {
use fastfield_codecs::{
bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
linearinterpol::{LinearInterpolFastFieldSerializer, LinearinterpolFastFieldReader},
linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
multilinearinterpol::{
MultiLinearInterpolFastFieldSerializer, MultiLinearinterpolFastFieldReader,
},
@@ -26,112 +26,75 @@ mod tests {
data
}
fn value_iter() -> impl Iterator<Item = u64> {
0..20_000
}
fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
b: &mut Bencher,
data: &[u64],
) {
let mut bytes = vec![];
S::create(
&mut bytes,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = R::open_from_bytes(&bytes).unwrap();
b.iter(|| {
for pos in value_iter() {
reader.get_u64(pos as u64, &bytes);
}
});
}
fn bench_create<S: FastFieldCodecSerializer>(b: &mut Bencher, data: &[u64]) {
let mut bytes = vec![];
b.iter(|| {
S::create(
&mut bytes,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
});
}
use test::Bencher;
#[bench]
fn bench_fastfield_bitpack_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
b.iter(|| {
let mut out = vec![];
BitpackedFastFieldSerializer::create(
&mut out,
&data,
stats_from_vec(&data),
data.iter().cloned(),
)
.unwrap();
out
});
bench_create::<BitpackedFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
b.iter(|| {
let mut out = vec![];
LinearInterpolFastFieldSerializer::create(
&mut out,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
out
});
bench_create::<LinearInterpolFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
b.iter(|| {
let mut out = vec![];
MultiLinearInterpolFastFieldSerializer::create(
&mut out,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
out
});
}
fn value_iter() -> impl Iterator<Item = u64> {
0..20_000
bench_create::<MultiLinearInterpolFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
let mut bytes = vec![];
BitpackedFastFieldSerializer::create(
&mut bytes,
&data,
stats_from_vec(&data),
data.iter().cloned(),
)
.unwrap();
let reader = BitpackedFastFieldReader::open_from_bytes(&bytes).unwrap();
b.iter(|| {
for pos in value_iter() {
reader.get_u64(pos as u64, &bytes);
}
});
bench_get::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
let mut bytes = vec![];
LinearInterpolFastFieldSerializer::create(
&mut bytes,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = LinearinterpolFastFieldReader::open_from_bytes(&bytes).unwrap();
b.iter(|| {
for pos in value_iter() {
reader.get_u64(pos as u64, &bytes);
}
});
bench_get::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
let mut bytes = vec![];
MultiLinearInterpolFastFieldSerializer::create(
&mut bytes,
&data,
stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = MultiLinearinterpolFastFieldReader::open_from_bytes(&bytes).unwrap();
b.iter(|| {
for pos in value_iter() {
reader.get_u64(pos as u64, &bytes);
}
});
bench_get::<MultiLinearInterpolFastFieldSerializer, MultiLinearinterpolFastFieldReader>(
b, &data,
);
}
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);

View File

@@ -1,7 +1,7 @@
use crate::CodecId;
use crate::CodecReader;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
use crate::FastFieldSerializerEstimate;
use crate::FastFieldStats;
use common::BinarySerializable;
use std::io::{self, Write};
@@ -19,7 +19,7 @@ pub struct BitpackedFastFieldReader {
pub max_value_u64: u64,
}
impl<'data> CodecReader for BitpackedFastFieldReader {
impl<'data> FastFieldCodecReader for BitpackedFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let (_data, mut footer) = bytes.split_at(bytes.len() - 16);
@@ -47,7 +47,7 @@ impl<'data> CodecReader for BitpackedFastFieldReader {
self.max_value_u64
}
}
pub struct BitpackedFastFieldSerializer<'a, W: 'a + Write> {
pub struct BitpackedFastFieldSerializerLegacy<'a, W: 'a + Write> {
bit_packer: BitPacker,
write: &'a mut W,
min_value: u64,
@@ -55,7 +55,7 @@ pub struct BitpackedFastFieldSerializer<'a, W: 'a + Write> {
num_bits: u8,
}
impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> {
impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
@@ -68,12 +68,12 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> {
write: &'a mut W,
min_value: u64,
max_value: u64,
) -> io::Result<BitpackedFastFieldSerializer<'a, W>> {
) -> io::Result<BitpackedFastFieldSerializerLegacy<'a, W>> {
assert!(min_value <= max_value);
let amplitude = max_value - min_value;
let num_bits = compute_num_bits(amplitude);
let bit_packer = BitPacker::new();
Ok(BitpackedFastFieldSerializer {
Ok(BitpackedFastFieldSerializerLegacy {
bit_packer,
write,
min_value,
@@ -81,29 +81,6 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> {
num_bits,
})
}
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
/// `(val - min_value)`.
///
/// It requires a `min_value` and a `max_value` to compute
/// compute the minimum number of bits required to encode
/// values.
pub fn create(
write: &'a mut W,
_fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
) -> io::Result<()> {
let mut serializer = Self::open(write, stats.min_value, stats.max_value)?;
for val in data_iter {
serializer.add_val(val)?;
}
serializer.close_field()?;
Ok(())
}
/// Pushes a new value to the currently open u64 fast field.
#[inline]
pub fn add_val(&mut self, val: u64) -> io::Result<()> {
@@ -120,7 +97,34 @@ impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> {
}
}
impl<'a, W: 'a + Write> FastFieldSerializerEstimate for BitpackedFastFieldSerializer<'a, W> {
pub struct BitpackedFastFieldSerializer {}
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
/// `(val - min_value)`.
///
/// It requires a `min_value` and a `max_value` to compute
/// compute the minimum number of bits required to encode
/// values.
fn create(
write: &mut impl Write,
_fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
_data_iter1: impl Iterator<Item = u64>,
) -> io::Result<()> {
let mut serializer =
BitpackedFastFieldSerializerLegacy::open(write, stats.min_value, stats.max_value)?;
for val in data_iter {
serializer.add_val(val)?;
}
serializer.close_field()?;
Ok(())
}
fn estimate(_fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
let amplitude = stats.max_value - stats.min_value;
let num_bits = compute_num_bits(amplitude);
@@ -128,7 +132,7 @@ impl<'a, W: 'a + Write> FastFieldSerializerEstimate for BitpackedFastFieldSerial
num_bits as f32 / num_bits_uncompressed as f32
}
}
impl<'a, W: 'a + Write> CodecId for BitpackedFastFieldSerializer<'_, W> {
impl CodecId for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
const ID: u8 = 1;
}
@@ -137,26 +141,11 @@ impl<'a, W: 'a + Write> CodecId for BitpackedFastFieldSerializer<'_, W> {
mod tests {
use super::*;
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) {
let mut out = vec![];
BitpackedFastFieldSerializer::create(
&mut out,
&data,
crate::tests::stats_from_vec(&data),
data.iter().cloned(),
)
.unwrap();
let reader = BitpackedFastFieldReader::open_from_bytes(&out).unwrap();
for (doc, orig_val) in data.iter().enumerate() {
let val = reader.get_u64(doc as u64, &out);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}",
val, orig_val, name
);
}
}
fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
&data, name,
);
}
#[test]

View File

@@ -2,11 +2,14 @@
#[macro_use]
extern crate more_asserts;
use std::io;
use std::io::Write;
pub mod bitpacked;
pub mod linearinterpol;
pub mod multilinearinterpol;
pub trait CodecReader: Sized {
pub trait FastFieldCodecReader: Sized {
/// reads the metadata and returns the CodecReader
fn open_from_bytes(bytes: &[u8]) -> std::io::Result<Self>;
@@ -16,6 +19,35 @@ pub trait CodecReader: Sized {
fn max_value(&self) -> u64;
}
/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldCodecSerializer {
/// returns an estimate of the compression ratio. if the compressor is unable to handle the
/// data it needs to return f32::MAX.
/// The baseline is uncompressed 64bit data.
///
/// It could make sense to also return a value representing
/// computational complexity.
fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32;
fn create(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
data_iter: impl Iterator<Item = u64>,
data_iter1: impl Iterator<Item = u64>,
) -> io::Result<()>;
}
/// `CodecId` is required by each Codec.
///
/// It needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
pub trait CodecId {
const NAME: &'static str;
const ID: u8;
}
/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
pub trait FastFieldDataAccess: Clone {
/// Return the value associated to the given document.
@@ -28,27 +60,6 @@ pub trait FastFieldDataAccess: Clone {
fn get(&self, doc: u32) -> u64;
}
/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldSerializerEstimate {
/// returns an estimate of the compression ratio. if the compressor is unable to handle the
/// data it needs to return f32::MAX.
/// The baseline is uncompressed 64bit data.
///
/// It could make sense to also return a value representing
/// computational complexity.
fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32;
}
/// `CodecId` is required by each Codec.
///
/// It needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
pub trait CodecId {
const NAME: &'static str;
const ID: u8;
}
#[derive(Debug, Clone)]
pub struct FastFieldStats {
pub min_value: u64,
@@ -72,8 +83,37 @@ impl FastFieldDataAccess for Vec<u64> {
mod tests {
use crate::{
bitpacked::BitpackedFastFieldSerializer, linearinterpol::LinearInterpolFastFieldSerializer,
multilinearinterpol::MultiLinearInterpolFastFieldSerializer,
};
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
name: &str,
) {
if S::estimate(&data, crate::tests::stats_from_vec(&data)) == f32::MAX {
return;
}
let mut out = vec![];
S::create(
&mut out,
&data,
crate::tests::stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = R::open_from_bytes(&out).unwrap();
for (doc, orig_val) in data.iter().enumerate() {
let val = reader.get_u64(doc as u64, &out);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}, data {:?}",
val, orig_val, name, data
);
}
}
}
pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
let mut data_and_names = vec![];
@@ -103,14 +143,19 @@ mod tests {
#[test]
fn estimation_good_interpolation_case() {
let data = (10..=200_u64).collect::<Vec<_>>();
let data = (10..=20000_u64).collect::<Vec<_>>();
let linear_interpol_estimation =
LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
assert_le!(linear_interpol_estimation, 0.1);
assert_le!(linear_interpol_estimation, 0.01);
let multi_linear_interpol_estimation =
MultiLinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);
let bitpacked_estimation =
BitpackedFastFieldSerializer::<Vec<u8>>::estimate(&data, stats_from_vec(&data));
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
assert_le!(linear_interpol_estimation, bitpacked_estimation);
}
#[test]
@@ -122,7 +167,7 @@ mod tests {
assert_le!(linear_interpol_estimation, 0.32);
let bitpacked_estimation =
BitpackedFastFieldSerializer::<Vec<u8>>::estimate(&data, stats_from_vec(&data));
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
#[test]
@@ -137,7 +182,7 @@ mod tests {
assert_le!(linear_interpol_estimation, 0.35);
let bitpacked_estimation =
BitpackedFastFieldSerializer::<Vec<u8>>::estimate(&data, stats_from_vec(&data));
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}

View File

@@ -1,7 +1,7 @@
use crate::CodecId;
use crate::CodecReader;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
use crate::FastFieldSerializerEstimate;
use crate::FastFieldStats;
use std::io::{self, Read, Write};
use std::ops::Sub;
@@ -15,7 +15,7 @@ use tantivy_bitpacker::BitUnpacker;
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct LinearinterpolFastFieldReader {
pub struct LinearInterpolFastFieldReader {
bit_unpacker: BitUnpacker,
pub footer: LinearInterpolFooter,
pub slope: f32,
@@ -61,7 +61,7 @@ impl FixedSize for LinearInterpolFooter {
const SIZE_IN_BYTES: usize = 56;
}
impl CodecReader for LinearinterpolFastFieldReader {
impl FastFieldCodecReader for LinearInterpolFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let (_data, mut footer) = bytes.split_at(bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES);
@@ -70,7 +70,7 @@ impl CodecReader for LinearinterpolFastFieldReader {
let num_bits = compute_num_bits(footer.relative_max_value);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(LinearinterpolFastFieldReader {
Ok(LinearInterpolFastFieldReader {
bit_unpacker,
footer,
slope,
@@ -96,9 +96,24 @@ impl CodecReader for LinearinterpolFastFieldReader {
/// and stores the difference bitpacked.
pub struct LinearInterpolFastFieldSerializer {}
impl LinearInterpolFastFieldSerializer {
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
if num_vals <= 1 {
return 0.0;
}
// We calculate the slope with f64 high precision and use the result in lower precision f32
// This is done in order to handle estimations for very large values like i64::MAX
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
first_val + (pos as f32 * slope) as u64
}
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
pub fn create(
fn create(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
@@ -150,24 +165,6 @@ impl LinearInterpolFastFieldSerializer {
footer.serialize(write)?;
Ok(())
}
}
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
if num_vals <= 1 {
return 0.0;
}
// We calculate the slope with f64 high precision and use the result in lower precision f32
// This is done in order to handle estimations for very large values like i64::MAX
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
first_val + (pos as f32 * slope) as u64
}
impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer {
/// estimation for linear interpolation is hard because, you don't know
/// where the local maxima for the deviation of the calculated value are and
/// the offset to shift all values to >=0 is also unknown.
@@ -241,33 +238,11 @@ mod tests {
use super::*;
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) -> (u64, u64) {
if LinearInterpolFastFieldSerializer::estimate(&data, crate::tests::stats_from_vec(&data))
== f32::MAX
{
return (0, 0);
}
let mut out = vec![];
LinearInterpolFastFieldSerializer::create(
&mut out,
&data,
crate::tests::stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = LinearinterpolFastFieldReader::open_from_bytes(&out).unwrap();
for (doc, orig_val) in data.iter().enumerate() {
let val = reader.get_u64(doc as u64, &out);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}",
val, orig_val, name
);
}
}
(reader.footer.relative_max_value, reader.footer.offset)
fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<
LinearInterpolFastFieldSerializer,
LinearInterpolFastFieldReader,
>(&data, name);
}
#[test]
@@ -303,10 +278,7 @@ mod tests {
fn linear_interpol_fast_field_test_simple() {
let data = (10..=20_u64).collect::<Vec<_>>();
let (rel_max_value, offset) = create_and_validate(&data, "simple monotonically");
assert_eq!(offset, 0);
assert_eq!(rel_max_value, 0);
create_and_validate(&data, "simple monotonically");
}
#[test]

View File

@@ -1,7 +1,7 @@
use crate::CodecId;
use crate::CodecReader;
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
use crate::FastFieldSerializerEstimate;
use crate::FastFieldStats;
use std::io::{self, Read, Write};
use std::ops::Sub;
@@ -164,7 +164,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}
impl CodecReader for MultiLinearinterpolFastFieldReader {
impl FastFieldCodecReader for MultiLinearinterpolFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
@@ -197,12 +197,22 @@ impl CodecReader for MultiLinearinterpolFastFieldReader {
}
}
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
(first_val as i64 + (pos as f32 * slope) as i64) as u64
}
/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct MultiLinearInterpolFastFieldSerializer {}
impl MultiLinearInterpolFastFieldSerializer {
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
pub fn create(
fn create(
write: &mut impl Write,
fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
@@ -298,17 +308,7 @@ impl MultiLinearInterpolFastFieldSerializer {
footer.serialize(write)?;
Ok(())
}
}
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
}
#[inline]
fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
(first_val as i64 + (pos as f32 * slope) as i64) as u64
}
impl FastFieldSerializerEstimate for MultiLinearInterpolFastFieldSerializer {
/// estimation for linear interpolation is hard because, you don't know
/// where the local maxima are for the deviation of the calculated value and
/// the offset is also unknown.
@@ -389,33 +389,10 @@ mod tests {
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) {
if MultiLinearInterpolFastFieldSerializer::estimate(
&data,
crate::tests::stats_from_vec(&data),
) == f32::MAX
{
return;
}
let mut out = vec![];
MultiLinearInterpolFastFieldSerializer::create(
&mut out,
&data,
crate::tests::stats_from_vec(&data),
data.iter().cloned(),
data.iter().cloned(),
)
.unwrap();
let reader = MultiLinearinterpolFastFieldReader::open_from_bytes(&out).unwrap();
for (doc, orig_val) in data.iter().enumerate() {
let val = reader.get_u64(doc as u64, &out);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}, data {:?}",
val, orig_val, name, data
);
}
}
crate::tests::create_and_validate::<
MultiLinearInterpolFastFieldSerializer,
MultiLinearinterpolFastFieldReader,
>(&data, name);
}
#[test]

View File

@@ -1,4 +1,4 @@
use crate::fastfield::serializer::BitpackedFastFieldSerializer;
use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
use crate::fastfield::CompositeFastFieldSerializer;
use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field};
@@ -154,7 +154,7 @@ impl MultiValuedFastFieldWriter {
}
{
// writing the values themselves.
let mut value_serializer: BitpackedFastFieldSerializer<'_, _>;
let mut value_serializer: BitpackedFastFieldSerializerLegacy<'_, _>;
match mapping_opt {
Some(mapping) => {
value_serializer = serializer.new_u64_fast_field_with_idx(

View File

@@ -10,12 +10,12 @@ use crate::schema::FAST;
use crate::DocId;
use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader;
use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldReader;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::linearinterpol::LinearinterpolFastFieldReader;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearinterpolFastFieldReader;
use fastfield_codecs::CodecId;
use fastfield_codecs::CodecReader;
use fastfield_codecs::FastFieldCodecReader;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::path::Path;
@@ -69,7 +69,7 @@ pub enum DynamicFastFieldReader<Item: FastValue> {
/// Bitpacked compressed fastfield data.
Bitpacked(FastFieldReaderCodecWrapper<Item, BitpackedReader>),
/// Linear interpolated values + bitpacked
LinearInterpol(FastFieldReaderCodecWrapper<Item, LinearinterpolFastFieldReader>),
LinearInterpol(FastFieldReaderCodecWrapper<Item, LinearInterpolFastFieldReader>),
/// Blockwise linear interpolated values + bitpacked
MultiLinearInterpol(FastFieldReaderCodecWrapper<Item, MultiLinearinterpolFastFieldReader>),
}
@@ -81,7 +81,7 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
let id = bytes.read_u8();
let reader = match id {
BitpackedFastFieldSerializer::<Vec<u8>>::ID => {
BitpackedFastFieldSerializer::ID => {
DynamicFastFieldReader::Bitpacked(FastFieldReaderCodecWrapper::<
Item,
BitpackedReader,
@@ -90,7 +90,7 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
LinearInterpolFastFieldSerializer::ID => {
DynamicFastFieldReader::LinearInterpol(FastFieldReaderCodecWrapper::<
Item,
LinearinterpolFastFieldReader,
LinearInterpolFastFieldReader,
>::open_from_bytes(bytes)?)
}
MultiLinearInterpolFastFieldSerializer::ID => {
@@ -154,12 +154,12 @@ pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
_phantom: PhantomData<Item>,
}
impl<Item: FastValue, C: CodecReader> FastFieldReaderCodecWrapper<Item, C> {
impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item, C> {
/// Opens a fast field given a file.
pub fn open(file: FileSlice) -> crate::Result<Self> {
let mut bytes = file.read_bytes()?;
let id = u8::deserialize(&mut bytes)?;
assert_eq!(BitpackedFastFieldSerializer::<Vec<u8>>::ID, id);
assert_eq!(BitpackedFastFieldSerializer::ID, id);
Self::open_from_bytes(bytes)
}
/// Opens a fast field given the bytes.
@@ -194,7 +194,7 @@ impl<Item: FastValue, C: CodecReader> FastFieldReaderCodecWrapper<Item, C> {
}
}
impl<Item: FastValue, C: CodecReader + Clone> FastFieldReader<Item>
impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
for FastFieldReaderCodecWrapper<Item, C>
{
/// Return the value associated to the given document.

View File

@@ -6,10 +6,11 @@ use crate::schema::Field;
use fastfield_codecs::CodecId;
//pub use bitpacked::BitpackedFastFieldSerializer;
pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer;
pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializerLegacy;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
pub use fastfield_codecs::FastFieldCodecSerializer;
pub use fastfield_codecs::FastFieldDataAccess;
pub use fastfield_codecs::FastFieldSerializerEstimate;
pub use fastfield_codecs::FastFieldStats;
use std::io::{self, Write};
@@ -60,12 +61,9 @@ impl CompositeFastFieldSerializer {
{
let (ratio, name, id) = (
BitpackedFastFieldSerializer::<Vec<u8>>::estimate(
&fastfield_accessor,
stats.clone(),
),
BitpackedFastFieldSerializer::<Vec<u8>>::NAME,
BitpackedFastFieldSerializer::<Vec<u8>>::ID,
BitpackedFastFieldSerializer::estimate(&fastfield_accessor, stats.clone()),
BitpackedFastFieldSerializer::NAME,
BitpackedFastFieldSerializer::ID,
);
estimations.push((ratio, name, id));
}
@@ -107,12 +105,13 @@ impl CompositeFastFieldSerializer {
); // todo print actual field name
id.serialize(field_write)?;
match name {
BitpackedFastFieldSerializer::<Vec<u8>>::NAME => {
BitpackedFastFieldSerializer::NAME => {
BitpackedFastFieldSerializer::create(
field_write,
&fastfield_accessor,
stats,
data_iter_1,
data_iter_2,
)?;
}
LinearInterpolFastFieldSerializer::NAME => {
@@ -147,7 +146,7 @@ impl CompositeFastFieldSerializer {
field: Field,
min_value: u64,
max_value: u64,
) -> io::Result<BitpackedFastFieldSerializer<'_, CountingWriter<WritePtr>>> {
) -> io::Result<BitpackedFastFieldSerializerLegacy<'_, CountingWriter<WritePtr>>> {
self.new_u64_fast_field_with_idx(field, min_value, max_value, 0)
}
@@ -158,12 +157,12 @@ impl CompositeFastFieldSerializer {
min_value: u64,
max_value: u64,
idx: usize,
) -> io::Result<BitpackedFastFieldSerializer<'_, CountingWriter<WritePtr>>> {
) -> io::Result<BitpackedFastFieldSerializerLegacy<'_, CountingWriter<WritePtr>>> {
let field_write = self.composite_write.for_field_with_idx(field, idx);
// Prepend codec id to field data for compatibility with DynamicFastFieldReader.
let id = BitpackedFastFieldSerializer::<Vec<u8>>::ID;
let id = BitpackedFastFieldSerializer::ID;
id.serialize(field_write)?;
BitpackedFastFieldSerializer::open(field_write, min_value, max_value)
BitpackedFastFieldSerializerLegacy::open(field_write, min_value, max_value)
}
/// Start serializing a new [u8] fast field