diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index af6a58dff..6c81a5f75 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -25,7 +25,10 @@ mod tests { fn value_iter() -> impl Iterator { 0..20_000 } - fn bench_get( + fn bench_get< + S: FastFieldCodecSerializer, + R: FastFieldCodecDeserializer + FastFieldCodecReader, + >( b: &mut Bencher, data: &[u64], ) { diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index c9c7010b6..c3f378e39 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -5,7 +5,8 @@ use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::{ - FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess, + FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, + FastFieldDataAccess, }; /// Depending on the field type, a different @@ -19,7 +20,7 @@ pub struct BitpackedReader { pub num_vals: u64, } -impl FastFieldCodecReader for BitpackedReader { +impl FastFieldCodecDeserializer for BitpackedReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: OwnedBytes) -> io::Result { let footer_offset = bytes.len() - 24; @@ -38,6 +39,8 @@ impl FastFieldCodecReader for BitpackedReader { num_vals, }) } +} +impl FastFieldCodecReader for BitpackedReader { #[inline] fn get_u64(&self, doc: u64) -> u64 { self.min_value_u64 + self.bit_unpacker.get(doc, &self.data) diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index cfdf752d2..d8352adcd 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -19,7 +19,8 @@ use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::linear::{get_calculated_value, get_slope}; use crate::{ - FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess, + FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, + FastFieldDataAccess, }; const CHUNK_SIZE: u64 = 512; @@ -148,7 +149,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio &interpolations[get_interpolation_position(doc)] } -impl FastFieldCodecReader for BlockwiseLinearReader { +impl FastFieldCodecDeserializer for BlockwiseLinearReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: OwnedBytes) -> io::Result { let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?; @@ -157,7 +158,9 @@ impl FastFieldCodecReader for BlockwiseLinearReader { let footer = BlockwiseLinearFooter::deserialize(&mut footer)?; Ok(BlockwiseLinearReader { data, footer }) } +} +impl FastFieldCodecReader for BlockwiseLinearReader { #[inline] fn get_u64(&self, idx: u64) -> u64 { let interpolation = get_interpolation_function(idx, &self.footer.interpolations); diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 1da8d09f0..923a334d9 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -12,13 +12,22 @@ pub mod bitpacked; pub mod blockwise_linear; pub mod linear; +pub trait FastFieldCodecDeserializer: Sized { + /// Reads the metadata and returns the CodecReader + fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result + where + Self: FastFieldCodecReader; +} + pub trait FastFieldCodecReader: Sized { - /// reads the metadata and returns the CodecReader - fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result; fn get_u64(&self, doc: u64) -> u64; fn min_value(&self) -> u64; fn max_value(&self) -> u64; fn num_vals(&self) -> u64; + /// Returns a iterator over the data + fn iter<'a>(&'a self) -> Box + 'a> { + Box::new((0..self.num_vals()).map(|idx| self.get_u64(idx))) + } } #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] @@ -170,7 +179,10 @@ mod tests { use crate::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer}; use crate::linear::{LinearReader, LinearSerializer}; - pub fn create_and_validate( + pub fn create_and_validate< + S: FastFieldCodecSerializer, + R: FastFieldCodecDeserializer + FastFieldCodecReader, + >( data: &[u64], name: &str, ) -> (f32, f32) { @@ -230,7 +242,10 @@ mod tests { data_and_names } - fn test_codec() { + fn test_codec< + S: FastFieldCodecSerializer, + R: FastFieldCodecReader + FastFieldCodecDeserializer, + >() { let codec_name = format!("{:?}", S::CODEC_TYPE); for (data, dataset_name) in get_codec_test_data_sets() { let (estimate, actual) = crate::tests::create_and_validate::(&data, dataset_name); diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index 919cf8d60..a63440114 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -6,7 +6,8 @@ use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::{ - FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess, + FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, + FastFieldDataAccess, }; /// Depending on the field type, a different @@ -59,7 +60,7 @@ impl FixedSize for LinearFooter { const SIZE_IN_BYTES: usize = 56; } -impl FastFieldCodecReader for LinearReader { +impl FastFieldCodecDeserializer for LinearReader { /// Opens a fast field given a file. fn open_from_bytes(bytes: OwnedBytes) -> io::Result { let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES; @@ -75,6 +76,9 @@ impl FastFieldCodecReader for LinearReader { slope, }) } +} + +impl FastFieldCodecReader for LinearReader { #[inline] fn get_u64(&self, doc: u64) -> u64 { let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope); diff --git a/src/fastfield/gcd.rs b/src/fastfield/gcd.rs index 50cb2594a..f786f67f3 100644 --- a/src/fastfield/gcd.rs +++ b/src/fastfield/gcd.rs @@ -3,7 +3,7 @@ use std::num::NonZeroU64; use common::BinarySerializable; use fastdivide::DividerU64; -use fastfield_codecs::FastFieldCodecReader; +use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader}; use ownedbytes::OwnedBytes; pub const GCD_DEFAULT: u64 = 1; @@ -15,24 +15,30 @@ pub const GCD_DEFAULT: u64 = 1; pub struct GCDFastFieldCodec { gcd: u64, min_value: u64, + num_vals: u64, reader: CodecReader, } -impl FastFieldCodecReader for GCDFastFieldCodec { - /// Opens a fast field given the bytes. +impl FastFieldCodecDeserializer + for GCDFastFieldCodec +{ fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result { - let footer_offset = bytes.len() - 16; + let footer_offset = bytes.len() - 24; let (body, mut footer) = bytes.split(footer_offset); let gcd = u64::deserialize(&mut footer)?; let min_value = u64::deserialize(&mut footer)?; + let num_vals = u64::deserialize(&mut footer)?; let reader = C::open_from_bytes(body)?; Ok(GCDFastFieldCodec { gcd, min_value, + num_vals, reader, }) } +} +impl FastFieldCodecReader for GCDFastFieldCodec { #[inline] fn get_u64(&self, doc: u64) -> u64 { let mut data = self.reader.get_u64(doc); @@ -48,11 +54,20 @@ impl FastFieldCodecReader for GCDFastFieldCodec fn max_value(&self) -> u64 { self.min_value + self.reader.max_value() * self.gcd } + fn num_vals(&self) -> u64 { + self.num_vals + } } -pub fn write_gcd_header(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> { +pub fn write_gcd_header( + field_write: &mut W, + min_value: u64, + gcd: u64, + num_vals: u64, +) -> io::Result<()> { gcd.serialize(field_write)?; min_value.serialize(field_write)?; + num_vals.serialize(field_write)?; Ok(()) } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index d5cf0e203..851d5df6a 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -326,7 +326,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 37); + assert_eq!(file.len(), 45); let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; @@ -357,7 +357,7 @@ mod tests { serializer.close()?; } let file = directory.open_read(path)?; - assert_eq!(file.len(), 62); + assert_eq!(file.len(), 70); { let fast_fields_composite = CompositeFile::open(&file)?; let data = fast_fields_composite.open_read(*FIELD).unwrap(); @@ -393,7 +393,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 35); + assert_eq!(file.len(), 43); { let fast_fields_composite = CompositeFile::open(&file).unwrap(); let data = fast_fields_composite.open_read(*FIELD).unwrap(); @@ -425,7 +425,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 80043); + assert_eq!(file.len(), 80051); { let fast_fields_composite = CompositeFile::open(&file)?; let data = fast_fields_composite.open_read(*FIELD).unwrap(); @@ -896,7 +896,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 36); + assert_eq!(file.len(), 44); let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; @@ -932,7 +932,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 48); + assert_eq!(file.len(), 56); let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; @@ -966,7 +966,7 @@ mod tests { serializer.close().unwrap(); } let file = directory.open_read(path).unwrap(); - assert_eq!(file.len(), 35); + assert_eq!(file.len(), 43); let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index f3c8d2820..11726ba5d 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -6,7 +6,7 @@ use common::BinarySerializable; use fastfield_codecs::bitpacked::BitpackedReader; use fastfield_codecs::blockwise_linear::BlockwiseLinearReader; use fastfield_codecs::linear::LinearReader; -use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecType}; +use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecType}; use super::{FastValue, GCDFastFieldCodec}; use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr}; @@ -199,7 +199,9 @@ pub struct FastFieldReaderCodecWrapper { _phantom: PhantomData, } -impl FastFieldReaderCodecWrapper { +impl + FastFieldReaderCodecWrapper +{ /// Opens a fast field given a file. pub fn open(file: FileSlice) -> crate::Result { let mut bytes = file.read_bytes()?; @@ -249,8 +251,8 @@ impl FastFieldReaderCodecWrapper FastFieldReader - for FastFieldReaderCodecWrapper +impl + FastFieldReader for FastFieldReaderCodecWrapper { /// Return the value associated to the given document. /// diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 1655cd7da..871a04978 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -189,7 +189,7 @@ impl CompositeFastFieldSerializer { field_write, fastfield_accessor, )?; - write_gcd_header(field_write, base_value, gcd)?; + write_gcd_header(field_write, base_value, gcd, num_vals)?; Ok(()) }