diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index 768037d00..7389bfc6c 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -29,7 +29,7 @@ mod tests { fn value_iter() -> impl Iterator { 0..20_000 } - fn bench_get( + fn bench_get( b: &mut Bencher, data: &[u64], ) { @@ -49,7 +49,7 @@ mod tests { } }); } - fn bench_create(b: &mut Bencher, data: &[u64]) { + fn bench_create(b: &mut Bencher, data: &[u64]) { let mut bytes = vec![]; b.iter(|| { S::serialize( diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 02f813d62..057b4485c 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -4,7 +4,7 @@ use common::BinarySerializable; use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; -use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; +use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats}; /// Depending on the field type, a different /// fast field is required. @@ -83,7 +83,7 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> { pub struct BitpackedFastFieldSerializer; -impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { +impl FastFieldCodec for BitpackedFastFieldSerializer { const NAME: &'static str = "Bitpacked"; type Reader = BitpackedFastFieldReader; @@ -114,6 +114,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer { /// compute the minimum number of bits required to encode /// values. fn serialize( + &self, write: &mut impl Write, _fastfield_accessor: &dyn FastFieldDataAccess, stats: FastFieldStats, @@ -150,7 +151,8 @@ mod tests { use crate::tests::get_codec_test_data_sets; fn create_and_validate(data: &[u64], name: &str) { - crate::tests::create_and_validate::( + crate::tests::create_and_validate( + &BitpackedFastFieldSerializer, data, name, ); } diff --git a/fastfield_codecs/src/dynamic.rs b/fastfield_codecs/src/dynamic.rs index fcdbc3642..f1840c2b7 100644 --- a/fastfield_codecs/src/dynamic.rs +++ b/fastfield_codecs/src/dynamic.rs @@ -23,16 +23,16 @@ use std::sync::Arc; use ownedbytes::OwnedBytes; -use crate::FastFieldCodecSerializer; +use crate::FastFieldCodec; use crate::bitpacked::BitpackedFastFieldSerializer; use crate::linearinterpol::LinearInterpolFastFieldSerializer; use crate::FastFieldCodecReader; use crate::gcd::GCDFastFieldCodecSerializer; use crate::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; -struct DynamicFastFieldSerializer; +pub struct DynamicFastFieldSerializer; -impl FastFieldCodecSerializer for DynamicFastFieldSerializer { +impl FastFieldCodec for DynamicFastFieldSerializer { const NAME: &'static str = "dynamic"; type Reader = DynamicFastFieldReader; @@ -46,6 +46,7 @@ impl FastFieldCodecSerializer for DynamicFastFieldSerializer { } fn serialize( + &self, write: &mut impl io::Write, fastfield_accessor: &dyn crate::FastFieldDataAccess, stats: crate::FastFieldStats, diff --git a/fastfield_codecs/src/gcd.rs b/fastfield_codecs/src/gcd.rs index d09b49dbe..d07e43c1a 100644 --- a/fastfield_codecs/src/gcd.rs +++ b/fastfield_codecs/src/gcd.rs @@ -4,7 +4,7 @@ use common::BinarySerializable; use fastdivide::DividerU64; use ownedbytes::OwnedBytes; -use crate::{FastFieldCodecReader, FastFieldCodecSerializer}; +use crate::{FastFieldCodecReader, FastFieldCodec}; /// Wrapper for accessing a fastfield. /// @@ -16,13 +16,13 @@ pub struct GCDFastFieldCodecReader { reader: CodecReader, } -pub struct GCDFastFieldCodecSerializer { - _wrapped_type: PhantomData, +pub struct GCDFastFieldCodecSerializer { + pub gcd: NonZeroU64, + pub min_value: u64, + pub wrapped: WrappedCodecSerializer, } -impl GCDFastFieldCodecSerializer {} - -impl FastFieldCodecSerializer for GCDFastFieldCodecSerializer { +impl FastFieldCodec for GCDFastFieldCodecSerializer { // TODO Fixme. We could like the underlying codec name as well. const NAME: &'static str = "GCD"; @@ -37,13 +37,16 @@ impl FastFieldCodecSerializer } fn serialize( + &self, write: &mut impl Write, fastfield_accessor: &dyn crate::FastFieldDataAccess, stats: crate::FastFieldStats, data_iter: impl Iterator, data_iter1: impl Iterator, ) -> io::Result<()> { - todo!() + write_gcd_header(write, self.min_value, self.gcd)?; + self.wrapped.serialize(write, fastfield_accessor, stats, data_iter, data_iter1)?; + Ok(()) } fn open_from_bytes(bytes: OwnedBytes) -> io::Result { @@ -77,8 +80,8 @@ impl FastFieldCodecReader for GCDFastFieldCodecReader(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> { - gcd.serialize(field_write)?; +fn write_gcd_header(field_write: &mut W, min_value: u64, gcd: NonZeroU64) -> io::Result<()> { + gcd.get().serialize(field_write)?; min_value.serialize(field_write)?; Ok(()) } diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 40c58d7fa..ac08c671c 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -13,7 +13,7 @@ pub mod gcd; pub mod linearinterpol; pub mod multilinearinterpol; -pub trait FastFieldCodecReader{ +pub trait FastFieldCodecReader { /// reads the metadata and returns the CodecReader fn get_u64(&self, doc: u64) -> u64; fn min_value(&self) -> u64; @@ -22,7 +22,7 @@ pub trait FastFieldCodecReader{ /// The FastFieldSerializerEstimate trait is required on all variants /// of fast field compressions, to decide which one to choose. -pub trait FastFieldCodecSerializer { +pub trait FastFieldCodec { /// A codex needs to provide a unique name used for debugging and de/serialization. const NAME: &'static str; @@ -42,6 +42,7 @@ pub trait FastFieldCodecSerializer { /// There are multiple iterators, in case the codec needs to read the data multiple times. /// The iterators should be preferred over using fastfield_accessor for performance reasons. fn serialize( + &self, write: &mut impl Write, fastfield_accessor: &dyn FastFieldDataAccess, stats: FastFieldStats, @@ -93,7 +94,8 @@ mod tests { MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer, }; - pub fn create_and_validate( + pub fn create_and_validate( + codec: &S, data: &[u64], name: &str, ) -> (f32, f32) { @@ -102,7 +104,7 @@ mod tests { } let estimation = S::estimate(&data, crate::tests::stats_from_vec(data)); let mut out: Vec = Vec::new(); - S::serialize( + codec.serialize( &mut out, &data, crate::tests::stats_from_vec(data), @@ -141,11 +143,11 @@ mod tests { data_and_names } - fn test_codec() { - let codec_name = S::NAME; + fn test_codec(codec: &C) { + let codec_name = C::NAME; for (data, data_set_name) in get_codec_test_data_sets() { let (estimate, actual) = - crate::tests::create_and_validate::(&data, data_set_name); + crate::tests::create_and_validate(codec, &data, data_set_name); let result = if estimate == f32::MAX { "Disabled".to_string() } else { @@ -159,15 +161,15 @@ mod tests { } #[test] fn test_codec_bitpacking() { - test_codec::(); + test_codec(&BitpackedFastFieldSerializer); } #[test] fn test_codec_interpolation() { - test_codec::(); + test_codec(&LinearInterpolFastFieldSerializer); } #[test] fn test_codec_multi_interpolation() { - test_codec::(); + test_codec(&MultiLinearInterpolFastFieldSerializer); } use super::*; diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index 0911ec4df..704c1c156 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize}; use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; -use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; +use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats}; /// Depending on the field type, a different /// fast field is required. @@ -77,7 +77,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader { /// Fastfield serializer, which tries to guess values by linear interpolation /// and stores the difference bitpacked. -pub struct LinearInterpolFastFieldSerializer {} +pub struct LinearInterpolFastFieldSerializer; #[inline] fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { @@ -94,7 +94,7 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { first_val + (pos as f32 * slope) as u64 } -impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { +impl FastFieldCodec for LinearInterpolFastFieldSerializer { const NAME: &'static str = "LinearInterpol"; type Reader = LinearInterpolFastFieldReader; @@ -117,6 +117,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer { /// Creates a new fast field serializer. fn serialize( + &self, write: &mut impl Write, fastfield_accessor: &dyn FastFieldDataAccess, stats: FastFieldStats, @@ -242,9 +243,7 @@ mod tests { use crate::tests::get_codec_test_data_sets; fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) { - crate::tests::create_and_validate::< - LinearInterpolFastFieldSerializer, - >(data, name) + crate::tests::create_and_validate(&LinearInterpolFastFieldSerializer, data, name) } #[test] diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 361a81379..e8761378a 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -2,7 +2,7 @@ extern crate prettytable; // use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; // use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; -use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats}; +use fastfield_codecs::{FastFieldCodec, FastFieldStats, bitpacked::BitpackedFastFieldSerializer}; use prettytable::{Cell, Row, Table}; fn main() { @@ -17,9 +17,7 @@ fn main() { // results.push(res); // let res = serialize_with_codec::(&data); // results.push(res); - let res = serialize_with_codec::( - &data, - ); + let res = serialize_with_codec(&BitpackedFastFieldSerializer, &data); results.push(res); // let best_estimation_codec = results @@ -91,7 +89,8 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { data_and_names } -pub fn serialize_with_codec( +pub fn serialize_with_codec( + codec: &S, data: &[u64], ) -> (bool, f32, f32, &'static str) { let is_applicable = S::is_applicable(&data, stats_from_vec(data)); @@ -100,7 +99,7 @@ pub fn serialize_with_codec( } let estimation = S::estimate(&data, stats_from_vec(data)); let mut out = vec![]; - S::serialize( + codec.serialize( &mut out, &data, stats_from_vec(data), diff --git a/fastfield_codecs/src/multilinearinterpol.rs b/fastfield_codecs/src/multilinearinterpol.rs index 31b08040d..2244360f2 100644 --- a/fastfield_codecs/src/multilinearinterpol.rs +++ b/fastfield_codecs/src/multilinearinterpol.rs @@ -17,7 +17,7 @@ use common::{BinarySerializable, CountingWriter, DeserializeFrom}; use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; -use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; +use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats}; const CHUNK_SIZE: u64 = 512; @@ -179,9 +179,9 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 { } /// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements. -pub struct MultiLinearInterpolFastFieldSerializer {} +pub struct MultiLinearInterpolFastFieldSerializer; -impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { +impl FastFieldCodec for MultiLinearInterpolFastFieldSerializer { const NAME: &'static str = "MultiLinearInterpol"; type Reader = MultiLinearInterpolFastFieldReader; @@ -197,6 +197,7 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer { /// Creates a new fast field serializer. fn serialize( + &self, write: &mut impl Write, fastfield_accessor: &dyn FastFieldDataAccess, stats: FastFieldStats, @@ -374,9 +375,7 @@ mod tests { use crate::tests::get_codec_test_data_sets; fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) { - crate::tests::create_and_validate::< - MultiLinearInterpolFastFieldSerializer, - >(data, name) + crate::tests::create_and_validate(&MultiLinearInterpolFastFieldSerializer, data, name) } #[test] diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index a2fdf4ee6..9d65fbca0 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -25,13 +25,14 @@ pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter}; pub use self::error::{FastFieldNotAvailableError, Result}; pub use self::facet_reader::FacetReader; pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter}; -pub use self::reader::{DynamicFastFieldReader, FastFieldReader}; +pub use self::reader::FastFieldReader; pub use self::readers::FastFieldReaders; pub(crate) use self::readers::{type_and_cardinality, FastType}; pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats}; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use crate::schema::{Cardinality, FieldType, Type, Value}; use crate::{DateTime, DocId}; +pub use self::wrapper::FastFieldReaderCodecWrapper; mod alive_bitset; mod bytes; @@ -41,6 +42,7 @@ mod multivalued; mod reader; mod readers; mod serializer; +mod wrapper; mod writer; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)] diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 3addc19ca..2308d2486 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -2,17 +2,9 @@ use std::collections::HashMap; use std::marker::PhantomData; use std::path::Path; -use fastfield_codecs::bitpacked::{ - BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer, -}; -use fastfield_codecs::gcd::{GCDFastFieldCodecReader, GCD_CODEC_ID}; -use fastfield_codecs::linearinterpol::{ - LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer, -}; -use fastfield_codecs::multilinearinterpol::{ - MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer, -}; -use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer}; + +use fastfield_codecs::{FastFieldCodecReader, FastFieldCodec}; +use fastfield_codecs::dynamic::{DynamicFastFieldReader, DynamicFastFieldSerializer}; use super::FastValue; use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr}; @@ -61,165 +53,6 @@ pub trait FastFieldReader: Clone { fn max_value(&self) -> Item; } -#[derive(Clone)] -/// DynamicFastFieldReader wraps different readers to access -/// the various encoded fastfield data -pub enum DynamicFastFieldReader { - /// Bitpacked compressed fastfield data. - Bitpacked(FastFieldReaderCodecWrapper), - /// Linear interpolated values + bitpacked - LinearInterpol(FastFieldReaderCodecWrapper), - /// Blockwise linear interpolated values + bitpacked - MultiLinearInterpol(FastFieldReaderCodecWrapper), - - /// GCD and Bitpacked compressed fastfield data. - BitpackedGCD(FastFieldReaderCodecWrapper>), - /// GCD and Linear interpolated values + bitpacked - LinearInterpolGCD( - FastFieldReaderCodecWrapper>, - ), - /// GCD and Blockwise linear interpolated values + bitpacked - MultiLinearInterpolGCD( - FastFieldReaderCodecWrapper>, - ), -} - -impl DynamicFastFieldReader { - /// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data. - pub fn open_from_id( - mut bytes: OwnedBytes, - codec_id: u8, - ) -> crate::Result> { - let reader = match codec_id { - BitpackedFastFieldSerializer::ID => { - DynamicFastFieldReader::Bitpacked(FastFieldReaderCodecWrapper::< - Item, - BitpackedReader, - >::open_from_bytes(bytes)?) - } - LinearInterpolFastFieldSerializer::ID => { - DynamicFastFieldReader::LinearInterpol(FastFieldReaderCodecWrapper::< - Item, - LinearInterpolFastFieldReader, - >::open_from_bytes(bytes)?) - } - MultiLinearInterpolFastFieldSerializer::ID => { - DynamicFastFieldReader::MultiLinearInterpol(FastFieldReaderCodecWrapper::< - Item, - MultiLinearInterpolFastFieldReader, - >::open_from_bytes( - bytes - )?) - } - _ if codec_id == GCD_CODEC_ID => { - let codec_id = bytes.read_u8(); - - match codec_id { - BitpackedFastFieldSerializer::ID => { - DynamicFastFieldReader::BitpackedGCD(FastFieldReaderCodecWrapper::< - Item, - GCDFastFieldCodecReader, - >::open_from_bytes( - bytes - )?) - } - LinearInterpolFastFieldSerializer::ID => { - DynamicFastFieldReader::LinearInterpolGCD(FastFieldReaderCodecWrapper::< - Item, - GCDFastFieldCodecReader, - >::open_from_bytes( - bytes - )?) - } - MultiLinearInterpolFastFieldSerializer::ID => { - DynamicFastFieldReader::MultiLinearInterpolGCD( - FastFieldReaderCodecWrapper::< - Item, - GCDFastFieldCodecReader, - >::open_from_bytes(bytes)?, - ) - } - _ => { - panic!( - "unknown fastfield codec id {:?}. Data corrupted or using old tantivy \ - version.", - codec_id - ) - } - } - } - _ => { - panic!( - "unknown fastfield codec id {:?}. Data corrupted or using old tantivy version.", - codec_id - ) - } - }; - Ok(reader) - } - /// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data. - pub fn open(file: FileSlice) -> crate::Result> { - let mut bytes = file.read_bytes()?; - let codec_id = bytes.read_u8(); - - Self::open_from_id(bytes, codec_id) - } -} - -impl FastFieldReader for DynamicFastFieldReader { - #[inline] - fn get(&self, doc: DocId) -> Item { - match self { - Self::Bitpacked(reader) => reader.get(doc), - Self::LinearInterpol(reader) => reader.get(doc), - Self::MultiLinearInterpol(reader) => reader.get(doc), - Self::BitpackedGCD(reader) => reader.get(doc), - Self::LinearInterpolGCD(reader) => reader.get(doc), - Self::MultiLinearInterpolGCD(reader) => reader.get(doc), - } - } - #[inline] - fn get_range(&self, start: u64, output: &mut [Item]) { - match self { - Self::Bitpacked(reader) => reader.get_range(start, output), - Self::LinearInterpol(reader) => reader.get_range(start, output), - Self::MultiLinearInterpol(reader) => reader.get_range(start, output), - Self::BitpackedGCD(reader) => reader.get_range(start, output), - Self::LinearInterpolGCD(reader) => reader.get_range(start, output), - Self::MultiLinearInterpolGCD(reader) => reader.get_range(start, output), - } - } - fn min_value(&self) -> Item { - match self { - Self::Bitpacked(reader) => reader.min_value(), - Self::LinearInterpol(reader) => reader.min_value(), - Self::MultiLinearInterpol(reader) => reader.min_value(), - Self::BitpackedGCD(reader) => reader.min_value(), - Self::LinearInterpolGCD(reader) => reader.min_value(), - Self::MultiLinearInterpolGCD(reader) => reader.min_value(), - } - } - fn max_value(&self) -> Item { - match self { - Self::Bitpacked(reader) => reader.max_value(), - Self::LinearInterpol(reader) => reader.max_value(), - Self::MultiLinearInterpol(reader) => reader.max_value(), - Self::BitpackedGCD(reader) => reader.max_value(), - Self::LinearInterpolGCD(reader) => reader.max_value(), - Self::MultiLinearInterpolGCD(reader) => reader.max_value(), - } - } -} - -/// Wrapper for accessing a fastfield. -/// -/// Holds the data and the codec to the read the data. -#[derive(Clone)] -pub struct FastFieldReaderCodecWrapper { - reader: CodecReader, - _phantom: PhantomData, -} - impl FastFieldReaderCodecWrapper { /// Opens a fast field given a file. pub fn open(file: FileSlice) -> crate::Result { diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index ec5181e50..b85e6e2c1 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -5,10 +5,10 @@ use common::{BinarySerializable, CountingWriter}; pub use fastfield_codecs::bitpacked::{ BitpackedFastFieldSerializer, BitpackedFastFieldSerializerLegacy, }; -use fastfield_codecs::gcd::{find_gcd, write_gcd_header, GCD_CODEC_ID, GCD_DEFAULT}; +use fastfield_codecs::gcd::{find_gcd, write_gcd_header}; use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer; use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer; -pub use fastfield_codecs::{FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; +pub use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess, FastFieldStats}; use super::{FastFieldCodecName, ALL_CODECS}; use crate::directory::{CompositeWrite, WritePtr}; @@ -40,7 +40,7 @@ pub struct CompositeFastFieldSerializer { } #[derive(Debug, Clone)] -pub struct FastFieldCodecEnableCheck { +struct FastFieldCodecEnableCheck { enabled_codecs: Vec, } impl FastFieldCodecEnableCheck { @@ -54,17 +54,9 @@ impl FastFieldCodecEnableCheck { } } -impl From for FastFieldCodecEnableCheck { - fn from(codec_name: FastFieldCodecName) -> Self { - FastFieldCodecEnableCheck { - enabled_codecs: vec![codec_name], - } - } -} - // use this, when this is merged and stabilized explicit_generic_args_with_impl_trait // https://github.com/rust-lang/rust/pull/86176 -fn codec_estimation( +fn codec_estimation( stats: FastFieldStats, fastfield_accessor: &A, estimations: &mut Vec<(f32, &str, u8)>, @@ -83,7 +75,7 @@ impl CompositeFastFieldSerializer { } /// Constructor - pub fn from_write_with_codec( + fn from_write_with_codec( write: WritePtr, codec_enable_checker: FastFieldCodecEnableCheck, ) -> io::Result { @@ -119,7 +111,7 @@ impl CompositeFastFieldSerializer { /// Serialize data into a new u64 fast field. The best compression codec will be chosen /// automatically. - pub fn write_header(field_write: &mut W, codec_id: u8) -> io::Result<()> { + fn write_header(field_write: &mut W, codec_id: u8) -> io::Result<()> { codec_id.serialize(field_write)?; Ok(()) @@ -140,7 +132,9 @@ impl CompositeFastFieldSerializer { I: Iterator, { let field_write = self.composite_write.for_field_with_idx(field, idx); - let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT); + let gcd: u64 = find_gcd(iter_gen().map(|val| val - stats.min_value)) + .map(NonZeroU64::get) + .unwrap_or(1); if gcd == 1 { // No GCD opportunity here. @@ -154,7 +148,6 @@ impl CompositeFastFieldSerializer { iter_gen(), ); } - Self::write_header(field_write, GCD_CODEC_ID)?; struct GCDWrappedFFAccess { fastfield_accessor: T, @@ -196,7 +189,7 @@ impl CompositeFastFieldSerializer { /// Serialize data into a new u64 fast field. The best compression codec will be chosen /// automatically. - pub fn create_auto_detect_u64_fast_field_with_idx_gcd( + fn create_auto_detect_u64_fast_field_with_idx_gcd( codec_enable_checker: FastFieldCodecEnableCheck, field: Field, field_write: &mut CountingWriter, diff --git a/src/fastfield/wrapper.rs b/src/fastfield/wrapper.rs index 125dd6618..0f272f4a6 100644 --- a/src/fastfield/wrapper.rs +++ b/src/fastfield/wrapper.rs @@ -18,6 +18,18 @@ // along with this program. If not, see . // +use std::path::Path; + +use fastfield_codecs::FastFieldCodecReader; +use fastfield_codecs::FastFieldCodec; +use fastfield_codecs::dynamic::DynamicFastFieldReader; + +use crate::directory::CompositeFile; +use crate::directory::RamDirectory; +use crate::directory::WritePtr; +use crate::fastfield::FastValue; +use crate::schema::Schema; + /// Wrapper for accessing a fastfield. /// /// Holds the data and the codec to the read the data. @@ -78,40 +90,81 @@ impl FastFieldReader } } -impl From> for DynamicFastFieldReader { - fn from(vals: Vec) -> DynamicFastFieldReader { - let mut schema_builder = Schema::builder(); - let field = schema_builder.add_u64_field("field", FAST); - let schema = schema_builder.build(); - let path = Path::new("__dummy__"); - let directory: RamDirectory = RamDirectory::create(); - { - let write: WritePtr = directory - .open_write(path) - .expect("With a RamDirectory, this should never fail."); - let mut serializer = CompositeFastFieldSerializer::from_write(write) - .expect("With a RamDirectory, this should never fail."); - let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); - { - let fast_field_writer = fast_field_writers - .get_field_writer_mut(field) - .expect("With a RamDirectory, this should never fail."); - for val in vals { - fast_field_writer.add_val(val.to_u64()); - } - } - fast_field_writers - .serialize(&mut serializer, &HashMap::new(), None) - .unwrap(); - serializer.close().unwrap(); - } +impl FastFieldReaderCodecWrapper { + // /// Opens a fast field given a file. + // pub fn open(file: FileSlice) -> crate::Result { + // let mut bytes = file.read_bytes()?; + // Self::open_from_bytes(bytes) + // } - let file = directory.open_read(path).expect("Failed to open the file"); - let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file"); - let field_file = composite_file - .open_read(field) - .expect("File component not found"); - DynamicFastFieldReader::open(field_file).unwrap() + /// Opens a fast field given the bytes. + pub fn open_from_bytes(bytes: OwnedBytes) -> crate::Result { + let reader = C::open_from_bytes(bytes)?; + Ok(FastFieldReaderCodecWrapper { + reader, + _phantom: PhantomData, + }) + } + + #[inline] + pub(crate) fn get_u64(&self, doc: u64) -> Item { + let data = self.reader.get_u64(doc); + Item::from_u64(data) + } + + /// Internally `multivalued` also use SingleValue Fast fields. + /// It works as follows... A first column contains the list of start index + /// for each document, a second column contains the actual values. + /// + /// The values associated to a given doc, are then + /// `second_column[first_column.get(doc)..first_column.get(doc+1)]`. + /// + /// Which means single value fast field reader can be indexed internally with + /// something different from a `DocId`. For this use case, we want to use `u64` + /// values. + /// + /// See `get_range` for an actual documentation about this method. + pub(crate) fn get_range_u64(&self, start: u64, output: &mut [Item]) { + for (i, out) in output.iter_mut().enumerate() { + *out = self.get_u64(start + (i as u64)); + } } } +// impl From> for DynamicFastFieldReader { +// fn from(vals: Vec) -> DynamicFastFieldReader { +// let mut schema_builder = Schema::builder(); +// let field = schema_builder.add_u64_field("field", FAST); +// let schema = schema_builder.build(); +// let path = Path::new("__dummy__"); +// let directory: RamDirectory = RamDirectory::create(); +// { +// let write: WritePtr = directory +// .open_write(path) +// .expect("With a RamDirectory, this should never fail."); +// let mut serializer = CompositeFastFieldSerializer::from_write(write) +// .expect("With a RamDirectory, this should never fail."); +// let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); +// { +// let fast_field_writer = fast_field_writers +// .get_field_writer_mut(field) +// .expect("With a RamDirectory, this should never fail."); +// for val in vals { +// fast_field_writer.add_val(val.to_u64()); +// } +// } +// fast_field_writers +// .serialize(&mut serializer, &HashMap::new(), None) +// .unwrap(); +// serializer.close().unwrap(); +// } + +// let file = directory.open_read(path).expect("Failed to open the file"); +// let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file"); +// let field_file = composite_file +// .open_read(field) +// .expect("File component not found"); +// DynamicFastFieldReader::open(field_file).unwrap() +// } +// } +