split open_from_bytes to own trait

This commit is contained in:
Pascal Seitz
2022-08-26 09:23:43 +02:00
parent 4a6f36937c
commit 02c3252d1e
9 changed files with 73 additions and 28 deletions

View File

@@ -25,7 +25,10 @@ mod tests {
fn value_iter() -> impl Iterator<Item = u64> {
0..20_000
}
fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
fn bench_get<
S: FastFieldCodecSerializer,
R: FastFieldCodecDeserializer + FastFieldCodecReader,
>(
b: &mut Bencher,
data: &[u64],
) {

View File

@@ -5,7 +5,8 @@ use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
FastFieldDataAccess,
};
/// Depending on the field type, a different
@@ -19,7 +20,7 @@ pub struct BitpackedReader {
pub num_vals: u64,
}
impl FastFieldCodecReader for BitpackedReader {
impl FastFieldCodecDeserializer for BitpackedReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - 24;
@@ -38,6 +39,8 @@ impl FastFieldCodecReader for BitpackedReader {
num_vals,
})
}
}
impl FastFieldCodecReader for BitpackedReader {
#[inline]
fn get_u64(&self, doc: u64) -> u64 {
self.min_value_u64 + self.bit_unpacker.get(doc, &self.data)

View File

@@ -19,7 +19,8 @@ use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::linear::{get_calculated_value, get_slope};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
FastFieldDataAccess,
};
const CHUNK_SIZE: u64 = 512;
@@ -148,7 +149,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}
impl FastFieldCodecReader for BlockwiseLinearReader {
impl FastFieldCodecDeserializer for BlockwiseLinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
@@ -157,7 +158,9 @@ impl FastFieldCodecReader for BlockwiseLinearReader {
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
Ok(BlockwiseLinearReader { data, footer })
}
}
impl FastFieldCodecReader for BlockwiseLinearReader {
#[inline]
fn get_u64(&self, idx: u64) -> u64 {
let interpolation = get_interpolation_function(idx, &self.footer.interpolations);

View File

@@ -12,13 +12,22 @@ pub mod bitpacked;
pub mod blockwise_linear;
pub mod linear;
pub trait FastFieldCodecDeserializer: Sized {
/// Reads the metadata and returns the CodecReader
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self>
where
Self: FastFieldCodecReader;
}
pub trait FastFieldCodecReader: Sized {
/// reads the metadata and returns the CodecReader
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self>;
fn get_u64(&self, doc: u64) -> u64;
fn min_value(&self) -> u64;
fn max_value(&self) -> u64;
fn num_vals(&self) -> u64;
/// Returns a iterator over the data
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = u64> + 'a> {
Box::new((0..self.num_vals()).map(|idx| self.get_u64(idx)))
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
@@ -170,7 +179,10 @@ mod tests {
use crate::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
use crate::linear::{LinearReader, LinearSerializer};
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
pub fn create_and_validate<
S: FastFieldCodecSerializer,
R: FastFieldCodecDeserializer + FastFieldCodecReader,
>(
data: &[u64],
name: &str,
) -> (f32, f32) {
@@ -230,7 +242,10 @@ mod tests {
data_and_names
}
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
fn test_codec<
S: FastFieldCodecSerializer,
R: FastFieldCodecReader + FastFieldCodecDeserializer,
>() {
let codec_name = format!("{:?}", S::CODEC_TYPE);
for (data, dataset_name) in get_codec_test_data_sets() {
let (estimate, actual) = crate::tests::create_and_validate::<S, R>(&data, dataset_name);

View File

@@ -6,7 +6,8 @@ use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
FastFieldDataAccess,
};
/// Depending on the field type, a different
@@ -59,7 +60,7 @@ impl FixedSize for LinearFooter {
const SIZE_IN_BYTES: usize = 56;
}
impl FastFieldCodecReader for LinearReader {
impl FastFieldCodecDeserializer for LinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
@@ -75,6 +76,9 @@ impl FastFieldCodecReader for LinearReader {
slope,
})
}
}
impl FastFieldCodecReader for LinearReader {
#[inline]
fn get_u64(&self, doc: u64) -> u64 {
let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);

View File

@@ -3,7 +3,7 @@ use std::num::NonZeroU64;
use common::BinarySerializable;
use fastdivide::DividerU64;
use fastfield_codecs::FastFieldCodecReader;
use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader};
use ownedbytes::OwnedBytes;
pub const GCD_DEFAULT: u64 = 1;
@@ -15,24 +15,30 @@ pub const GCD_DEFAULT: u64 = 1;
pub struct GCDFastFieldCodec<CodecReader> {
gcd: u64,
min_value: u64,
num_vals: u64,
reader: CodecReader,
}
impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec<C> {
/// Opens a fast field given the bytes.
impl<C: FastFieldCodecReader + FastFieldCodecDeserializer + Clone> FastFieldCodecDeserializer
for GCDFastFieldCodec<C>
{
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self> {
let footer_offset = bytes.len() - 16;
let footer_offset = bytes.len() - 24;
let (body, mut footer) = bytes.split(footer_offset);
let gcd = u64::deserialize(&mut footer)?;
let min_value = u64::deserialize(&mut footer)?;
let num_vals = u64::deserialize(&mut footer)?;
let reader = C::open_from_bytes(body)?;
Ok(GCDFastFieldCodec {
gcd,
min_value,
num_vals,
reader,
})
}
}
impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec<C> {
#[inline]
fn get_u64(&self, doc: u64) -> u64 {
let mut data = self.reader.get_u64(doc);
@@ -48,11 +54,20 @@ impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec
fn max_value(&self) -> u64 {
self.min_value + self.reader.max_value() * self.gcd
}
fn num_vals(&self) -> u64 {
self.num_vals
}
}
pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> {
pub fn write_gcd_header<W: Write>(
field_write: &mut W,
min_value: u64,
gcd: u64,
num_vals: u64,
) -> io::Result<()> {
gcd.serialize(field_write)?;
min_value.serialize(field_write)?;
num_vals.serialize(field_write)?;
Ok(())
}

View File

@@ -326,7 +326,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 37);
assert_eq!(file.len(), 45);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(*FIELD).unwrap();
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
@@ -357,7 +357,7 @@ mod tests {
serializer.close()?;
}
let file = directory.open_read(path)?;
assert_eq!(file.len(), 62);
assert_eq!(file.len(), 70);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -393,7 +393,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 35);
assert_eq!(file.len(), 43);
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -425,7 +425,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 80043);
assert_eq!(file.len(), 80051);
{
let fast_fields_composite = CompositeFile::open(&file)?;
let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -896,7 +896,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 36);
assert_eq!(file.len(), 44);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
@@ -932,7 +932,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 48);
assert_eq!(file.len(), 56);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
@@ -966,7 +966,7 @@ mod tests {
serializer.close().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 35);
assert_eq!(file.len(), 43);
let composite_file = CompositeFile::open(&file)?;
let file = composite_file.open_read(field).unwrap();
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;

View File

@@ -6,7 +6,7 @@ use common::BinarySerializable;
use fastfield_codecs::bitpacked::BitpackedReader;
use fastfield_codecs::blockwise_linear::BlockwiseLinearReader;
use fastfield_codecs::linear::LinearReader;
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecType};
use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecType};
use super::{FastValue, GCDFastFieldCodec};
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
@@ -199,7 +199,9 @@ pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
_phantom: PhantomData<Item>,
}
impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item, C> {
impl<Item: FastValue, C: FastFieldCodecReader + FastFieldCodecDeserializer>
FastFieldReaderCodecWrapper<Item, C>
{
/// Opens a fast field given a file.
pub fn open(file: FileSlice) -> crate::Result<Self> {
let mut bytes = file.read_bytes()?;
@@ -249,8 +251,8 @@ impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item,
}
}
impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
for FastFieldReaderCodecWrapper<Item, C>
impl<Item: FastValue, C: FastFieldCodecReader + FastFieldCodecDeserializer + Clone>
FastFieldReader<Item> for FastFieldReaderCodecWrapper<Item, C>
{
/// Return the value associated to the given document.
///

View File

@@ -189,7 +189,7 @@ impl CompositeFastFieldSerializer {
field_write,
fastfield_accessor,
)?;
write_gcd_header(field_write, base_value, gcd)?;
write_gcd_header(field_write, base_value, gcd, num_vals)?;
Ok(())
}