diff --git a/Cargo.toml b/Cargo.toml index cabdc43b0..811b0e668 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,8 @@ crossbeam = "0.8" futures = { version = "0.3.15", features = ["thread-pool"] } tantivy-query-grammar = { version="0.15.0", path="./query-grammar" } tantivy-bitpacker = { version="0.1", path="./bitpacker" } +common = { version="0.1", path="./common" } +fastfield_codecs = { version="0.1", path="./fastfield_codecs" } stable_deref_trait = "1.2" rust-stemmers = "1.2" downcast-rs = "1.2" @@ -88,7 +90,7 @@ unstable = [] # useful for benches. wasm-bindgen = ["uuid/wasm-bindgen"] [workspace] -members = ["query-grammar", "bitpacker"] +members = ["query-grammar", "bitpacker", "common", "fastfield_codecs"] [badges] travis-ci = { repository = "tantivy-search/tantivy" } diff --git a/common/Cargo.toml b/common/Cargo.toml new file mode 100644 index 000000000..47a7290cd --- /dev/null +++ b/common/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "common" +version = "0.1.0" +authors = ["Pascal Seitz "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +byteorder = "1.4.3" diff --git a/common/src/lib.rs b/common/src/lib.rs new file mode 100644 index 000000000..e0e7d5106 --- /dev/null +++ b/common/src/lib.rs @@ -0,0 +1,7 @@ +pub use byteorder::LittleEndian as Endianness; + +mod serialize; +mod vint; + +pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; +pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt}; diff --git a/src/common/serialize.rs b/common/src/serialize.rs similarity index 99% rename from src/common/serialize.rs rename to common/src/serialize.rs index 779b839af..cd483fe3f 100644 --- a/src/common/serialize.rs +++ b/common/src/serialize.rs @@ -1,5 +1,5 @@ -use crate::common::Endianness; -use crate::common::VInt; +use crate::Endianness; +use crate::VInt; use byteorder::{ReadBytesExt, WriteBytesExt}; use std::fmt; use std::io; diff --git a/src/common/vint.rs b/common/src/vint.rs similarity index 100% rename from src/common/vint.rs rename to common/src/vint.rs diff --git a/fastfield_codecs/Cargo.toml b/fastfield_codecs/Cargo.toml new file mode 100644 index 000000000..f6d250e29 --- /dev/null +++ b/fastfield_codecs/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "fastfield_codecs" +version = "0.1.0" +authors = ["Pascal Seitz "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +common = { path = "../common/" } +tantivy-bitpacker = { path = "../bitpacker/" } + +[dev-dependencies] +rand = "0.8.3" diff --git a/fastfield_codecs/src/bitpacked/mod.rs b/fastfield_codecs/src/bitpacked/mod.rs new file mode 100644 index 000000000..9dba9f32e --- /dev/null +++ b/fastfield_codecs/src/bitpacked/mod.rs @@ -0,0 +1,5 @@ +mod reader; +mod serialize; + +pub use reader::BitpackedFastFieldReader; +pub use serialize::BitpackedFastFieldSerializer; diff --git a/fastfield_codecs/src/bitpacked/reader.rs b/fastfield_codecs/src/bitpacked/reader.rs new file mode 100644 index 000000000..966c9dc44 --- /dev/null +++ b/fastfield_codecs/src/bitpacked/reader.rs @@ -0,0 +1,33 @@ +use common::BinarySerializable; +use std::io; +use tantivy_bitpacker::compute_num_bits; +use tantivy_bitpacker::BitUnpacker; +/// Depending on the field type, a different +/// fast field is required. +#[derive(Clone)] +pub struct BitpackedFastFieldReader<'data> { + bytes: &'data [u8], + bit_unpacker: BitUnpacker, + pub min_value_u64: u64, + pub max_value_u64: u64, +} + +impl<'data> BitpackedFastFieldReader<'data> { + /// Opens a fast field given a file. + pub fn open_from_bytes(mut bytes: &'data [u8]) -> io::Result { + let min_value = u64::deserialize(&mut bytes)?; + let amplitude = u64::deserialize(&mut bytes)?; + let max_value = min_value + amplitude; + let num_bits = compute_num_bits(amplitude); + let bit_unpacker = BitUnpacker::new(num_bits); + Ok(BitpackedFastFieldReader { + bytes, + min_value_u64: min_value, + max_value_u64: max_value, + bit_unpacker, + }) + } + pub fn get_u64(&self, doc: u64) -> u64 { + self.min_value_u64 + self.bit_unpacker.get(doc, &self.bytes) + } +} diff --git a/fastfield_codecs/src/bitpacked/serialize.rs b/fastfield_codecs/src/bitpacked/serialize.rs new file mode 100644 index 000000000..40b74ea1d --- /dev/null +++ b/fastfield_codecs/src/bitpacked/serialize.rs @@ -0,0 +1,93 @@ +use crate::FastFieldDataAccess; +use crate::FastFieldSerializerEstimate; +use crate::FastFieldStats; +use common::BinarySerializable; +use std::io::{self, Write}; +use tantivy_bitpacker::compute_num_bits; +use tantivy_bitpacker::BitPacker; + +pub struct BitpackedFastFieldSerializer<'a, W: 'a + Write> { + bit_packer: BitPacker, + write: &'a mut W, + min_value: u64, + num_bits: u8, +} + +impl<'a, W: Write> BitpackedFastFieldSerializer<'a, W> { + /// Creates a new fast field serializer. + /// + /// The serializer in fact encode the values by bitpacking + /// `(val - min_value)`. + /// + /// It requires a `min_value` and a `max_value` to compute + /// compute the minimum number of bits required to encode + /// values. + pub fn open( + write: &'a mut W, + min_value: u64, + max_value: u64, + ) -> io::Result> { + assert!(min_value <= max_value); + min_value.serialize(write)?; + let amplitude = max_value - min_value; + amplitude.serialize(write)?; + let num_bits = compute_num_bits(amplitude); + let bit_packer = BitPacker::new(); + Ok(BitpackedFastFieldSerializer { + bit_packer, + write, + min_value, + num_bits, + }) + } + /// Creates a new fast field serializer. + /// + /// The serializer in fact encode the values by bitpacking + /// `(val - min_value)`. + /// + /// It requires a `min_value` and a `max_value` to compute + /// compute the minimum number of bits required to encode + /// values. + pub fn create( + write: &'a mut W, + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + data_iter: impl Iterator, + ) -> io::Result<()> { + let mut serializer = Self::open(write, stats.min_value, stats.max_value)?; + + for val in data_iter { + serializer.add_val(val)?; + } + serializer.close_field()?; + + Ok(()) + } + /// Pushes a new value to the currently open u64 fast field. + pub fn add_val(&mut self, val: u64) -> io::Result<()> { + let val_to_write: u64 = val - self.min_value; + self.bit_packer + .write(val_to_write, self.num_bits, &mut self.write)?; + Ok(()) + } + pub fn close_field(mut self) -> io::Result<()> { + self.bit_packer.close(&mut self.write) + } +} + +impl<'a, W: 'a + Write> FastFieldSerializerEstimate for BitpackedFastFieldSerializer<'a, W> { + fn estimate( + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + ) -> (f32, &'static str) { + let amplitude = stats.max_value - stats.min_value; + let num_bits = compute_num_bits(amplitude); + let num_bits_uncompressed = 64; + let ratio = num_bits as f32 / num_bits_uncompressed as f32; + let name = Self::codec_id().0; + (ratio, name) + } + fn codec_id() -> (&'static str, u8) { + ("Bitpacked", 1) + } +} diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs new file mode 100644 index 000000000..83a310488 --- /dev/null +++ b/fastfield_codecs/src/lib.rs @@ -0,0 +1,47 @@ +pub mod bitpacked; +pub mod linearinterpol; + +#[cfg(test)] +mod tests { + use super::*; + pub fn stats_from_vec(data: &[u64]) -> FastFieldStats { + let min_value = data.iter().cloned().min().unwrap_or(0); + let max_value = data.iter().cloned().max().unwrap_or(0); + FastFieldStats { + min_value, + max_value, + num_vals: data.len() as u64, + } + } +} + +/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation. +pub trait FastFieldDataAccess: Clone { + /// Return the value associated to the given document. + /// + /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance reasons. + /// + /// # Panics + /// + /// May panic if `doc` is greater than the segment + fn get(&self, doc: u32) -> u64; +} + +/// The FastFieldSerializerEstimate trait is required on all variants +/// of fast field compressions, to decide which one to choose. +pub trait FastFieldSerializerEstimate { + /// returns an estimate of the compression ratio. + fn estimate( + fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + ) -> (f32, &'static str); + /// the unique (name, id) of the compressor. Used to distinguish when de/serializing. + fn codec_id() -> (&'static str, u8); +} + +#[derive(Debug, Clone)] +pub struct FastFieldStats { + pub min_value: u64, + pub max_value: u64, + pub num_vals: u64, +} diff --git a/fastfield_codecs/src/linearinterpol/mod.rs b/fastfield_codecs/src/linearinterpol/mod.rs new file mode 100644 index 000000000..60d9f1e6e --- /dev/null +++ b/fastfield_codecs/src/linearinterpol/mod.rs @@ -0,0 +1,123 @@ +mod serialize; + +use common::BinarySerializable; +use std::io; +use tantivy_bitpacker::compute_num_bits; +use tantivy_bitpacker::BitUnpacker; + +use crate::FastFieldDataAccess; +/// Depending on the field type, a different +/// fast field is required. +#[derive(Clone)] +pub struct LinearinterpolFastFieldReader<'data> { + bytes: &'data [u8], + bit_unpacker: BitUnpacker, + pub first_value: u64, + pub rel_max_value: u64, + pub offset: u64, + pub slope: f64, +} + +impl<'data> LinearinterpolFastFieldReader<'data> { + /// Opens a fast field given a file. + pub fn open_from_bytes(mut bytes: &'data [u8]) -> io::Result { + let rel_max_value = u64::deserialize(&mut bytes)?; + let offset = u64::deserialize(&mut bytes)?; + let first_value = u64::deserialize(&mut bytes)?; + let last_value = u64::deserialize(&mut bytes)?; + let num_vals = u64::deserialize(&mut bytes)?; + let slope = (last_value as f64 - first_value as f64) / (num_vals as u64 - 1) as f64; + + let num_bits = compute_num_bits(rel_max_value); + let bit_unpacker = BitUnpacker::new(num_bits); + Ok(LinearinterpolFastFieldReader { + bytes, + first_value, + rel_max_value, + offset, + bit_unpacker, + slope, + }) + } + pub fn get_u64(&self, doc: u64) -> u64 { + let calculated_value = self.first_value + (doc as f64 * self.slope) as u64; + calculated_value + self.bit_unpacker.get(doc, &self.bytes) - self.offset + + //self.offset + self.min_value + self.bit_unpacker.get(doc, &self.bytes) + } +} + +impl<'a> FastFieldDataAccess for &'a [u64] { + fn get(&self, doc: u32) -> u64 { + self[doc as usize] + } +} + +impl FastFieldDataAccess for Vec { + fn get(&self, doc: u32) -> u64 { + self[doc as usize] + } +} +#[cfg(test)] +mod tests { + use super::*; + + fn create_and_validate(data: &[u64]) -> (u64, u64) { + let mut out = vec![]; + serialize::LinearInterpolFastFieldSerializer::create( + &mut out, + &data, + crate::tests::stats_from_vec(&data), + data.iter().cloned(), + data.iter().cloned(), + data.iter().cloned(), + ) + .unwrap(); + + let reader = LinearinterpolFastFieldReader::open_from_bytes(&out).unwrap(); + for (doc, val) in data.iter().enumerate() { + assert_eq!(reader.get_u64(doc as u64), *val); + } + (reader.rel_max_value, reader.offset) + } + + #[test] + fn linear_interpol_fast_field_test_simple() { + let data = (10..=20_u64).collect::>(); + + let (rel_max_value, offset) = create_and_validate(&data); + + assert_eq!(offset, 0); + assert_eq!(rel_max_value, 0); + } + + #[test] + fn linear_interpol_fast_field_test_with_offset() { + //let data = vec![5, 50, 95, 96, 97, 98, 99, 100]; + let mut data = vec![5, 6, 7, 8, 9, 10, 99, 100]; + create_and_validate(&data); + + data.reverse(); + create_and_validate(&data); + } + #[test] + fn linear_interpol_fast_field_test_no_structure() { + let mut data = vec![5, 50, 3, 13, 1, 1000, 35]; + create_and_validate(&data); + + data.reverse(); + create_and_validate(&data); + } + #[test] + fn linear_interpol_fast_field_rand() { + for i in 0..50000 { + let mut data = (0..1 + rand::random::() as usize) + .map(|num| rand::random::() as u64 / 2 as u64) + .collect::>(); + create_and_validate(&data); + + data.reverse(); + create_and_validate(&data); + } + } +} diff --git a/fastfield_codecs/src/linearinterpol/serialize.rs b/fastfield_codecs/src/linearinterpol/serialize.rs new file mode 100644 index 000000000..0c99bfde0 --- /dev/null +++ b/fastfield_codecs/src/linearinterpol/serialize.rs @@ -0,0 +1,87 @@ +use crate::FastFieldDataAccess; +use crate::FastFieldSerializerEstimate; +use crate::FastFieldStats; +use common::BinarySerializable; +use std::io::{self, Write}; +use tantivy_bitpacker::compute_num_bits; +use tantivy_bitpacker::BitPacker; + +/// Fastfield serializer, which tries to guess values by linear interpolation +/// and stores the difference. +pub struct LinearInterpolFastFieldSerializer {} + +// TODO not suitable if max is larger than i64::MAX / 2 + +impl LinearInterpolFastFieldSerializer { + /// Creates a new fast field serializer. + pub fn create( + write: &mut impl Write, + fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + data_iter: impl Iterator, + data_iter1: impl Iterator, + data_iter2: impl Iterator, + ) -> io::Result<()> { + assert!(stats.min_value <= stats.max_value); + + //let first_val = stats.min_value; + //let last_val = stats.max_value; + let first_val = fastfield_accessor.get(0); + let last_val = fastfield_accessor.get(stats.num_vals as u32 - 1); + let slope = (last_val as f64 - first_val as f64) / (stats.num_vals as u64 - 1) as f64; + // offset to ensure all values are positive + let offset = data_iter1 + .enumerate() + .map(|(pos, val)| { + let calculated_value = first_val + (pos as f64 * slope) as u64; + val as i64 - calculated_value as i64 + }) + .min() + .unwrap() + .abs() as u64; + + //calc new max + let rel_max = data_iter2 + .enumerate() + .map(|(pos, val)| { + let calculated_value = first_val + (pos as f64 * slope) as u64; + (val + offset) - calculated_value + }) + .max() + .unwrap(); + + let amplitude = rel_max; + amplitude.serialize(write)?; + offset.serialize(write)?; + first_val.serialize(write)?; + last_val.serialize(write)?; + stats.num_vals.serialize(write)?; + let num_bits = compute_num_bits(amplitude); + let mut bit_packer = BitPacker::new(); + for (pos, val) in data_iter.enumerate() { + let calculated_value = first_val + (pos as f64 * slope) as u64; + let diff = (val + offset) - calculated_value; + bit_packer.write(diff, num_bits, write)?; + } + bit_packer.close(write)?; + + Ok(()) + } +} + +impl FastFieldSerializerEstimate for LinearInterpolFastFieldSerializer { + fn estimate( + _fastfield_accessor: &impl FastFieldDataAccess, + stats: FastFieldStats, + ) -> (f32, &'static str) { + let amplitude = stats.max_value - stats.min_value; + let num_bits = compute_num_bits(amplitude); + let num_bits_uncompressed = 64; + let ratio = num_bits as f32 / num_bits_uncompressed as f32; + let name = Self::codec_id().0; + (ratio, name) + } + fn codec_id() -> (&'static str, u8) { + ("LinearInterpol", 2) + } +} diff --git a/src/common/mod.rs b/src/common/mod.rs index ea7507b04..4e574f1a0 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,18 +1,19 @@ mod bitset; mod composite_file; mod counting_writer; -mod serialize; -mod vint; +//mod serialize; +//mod vint; pub use self::bitset::BitSet; pub(crate) use self::bitset::TinySet; pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; pub use self::counting_writer::CountingWriter; -pub use self::serialize::{BinarySerializable, DeserializeFrom, FixedSize}; -pub use self::vint::{ +//pub use self::serialize::{BinarySerializable, DeserializeFrom, FixedSize}; +pub use byteorder::LittleEndian as Endianness; +pub use common::{ read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, }; -pub use byteorder::LittleEndian as Endianness; +pub use common::{BinarySerializable, DeserializeFrom, FixedSize}; /// Segment's max doc must be `< MAX_DOC_LIMIT`. /// @@ -103,8 +104,9 @@ pub fn u64_to_f64(val: u64) -> f64 { #[cfg(test)] pub(crate) mod test { - pub use super::serialize::test::fixed_size_test; + //pub use super::serialize::test::fixed_size_test; use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; + use common::{BinarySerializable, FixedSize}; use proptest::prelude::*; use std::f64; use tantivy_bitpacker::compute_num_bits; @@ -118,6 +120,12 @@ pub(crate) mod test { assert_eq!(u64_to_f64(f64_to_u64(val)), val); } + pub fn fixed_size_test() { + let mut buffer = Vec::new(); + O::default().serialize(&mut buffer).unwrap(); + assert_eq!(buffer.len(), O::SIZE_IN_BYTES); + } + proptest! { #[test] fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) { diff --git a/src/directory/owned_bytes.rs b/src/directory/owned_bytes.rs index d8f493416..1983d41c8 100644 --- a/src/directory/owned_bytes.rs +++ b/src/directory/owned_bytes.rs @@ -56,6 +56,12 @@ impl OwnedBytes { self.data } + /// Returns the underlying slice of data. + /// `Deref` and `AsRef` are also available. + #[inline] + pub fn into_slice(self) -> &'static [u8] { + self.data + } /// Returns the len of the slice. #[inline] pub fn len(&self) -> usize { diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 8ba8225bb..3f4398f3c 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -215,6 +215,7 @@ mod tests { use super::*; use crate::common::CompositeFile; + use crate::common::HasLen; use crate::directory::{Directory, RamDirectory, WritePtr}; use crate::fastfield::BitpackedFastFieldReader; use crate::merge_policy::NoMergePolicy; @@ -223,7 +224,6 @@ mod tests { use crate::schema::FAST; use crate::schema::{Document, IntOptions}; use crate::{Index, SegmentId, SegmentReader}; - use common::HasLen; use once_cell::sync::Lazy; use rand::prelude::SliceRandom; use rand::rngs::StdRng; diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 1effdec54..5f36a1d4d 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -8,6 +8,7 @@ use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter}; use crate::schema::Schema; use crate::schema::FAST; use crate::DocId; +use fastfield_codecs::bitpacked::BitpackedFastFieldReader as BitpackedReader; use std::collections::HashMap; use std::marker::PhantomData; use std::path::Path; @@ -67,9 +68,9 @@ pub enum DynamicFastFieldReader { impl DynamicFastFieldReader { /// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data. pub fn open(file: FileSlice) -> crate::Result> { - let mut bytes = file.read_bytes()?; + let bytes = file.read_bytes()?; let (mut id_bytes, data_bytes) = bytes.split(1); - let id = u8::deserialize(&mut id_bytes)?; + let _id = u8::deserialize(&mut id_bytes)?; Ok(DynamicFastFieldReader::Bitpacked( BitpackedFastFieldReader::open_from_bytes(data_bytes)?, @@ -106,10 +107,7 @@ impl FastFieldReader for DynamicFastFieldReader { /// fast field is required. #[derive(Clone)] pub struct BitpackedFastFieldReader { - bytes: OwnedBytes, - bit_unpacker: BitUnpacker, - min_value_u64: u64, - max_value_u64: u64, + reader: BitpackedReader<'static>, _phantom: PhantomData, } @@ -121,22 +119,15 @@ impl BitpackedFastFieldReader { Self::open_from_bytes(bytes) } /// Opens a fast field given a file. - pub fn open_from_bytes(mut bytes: OwnedBytes) -> crate::Result { - let min_value = u64::deserialize(&mut bytes)?; - let amplitude = u64::deserialize(&mut bytes)?; - let max_value = min_value + amplitude; - let num_bits = compute_num_bits(amplitude); - let bit_unpacker = BitUnpacker::new(num_bits); + pub fn open_from_bytes(bytes: OwnedBytes) -> crate::Result { + let reader = BitpackedReader::open_from_bytes(bytes.into_slice())?; Ok(BitpackedFastFieldReader { - bytes, - min_value_u64: min_value, - max_value_u64: max_value, - bit_unpacker, + reader, _phantom: PhantomData, }) } pub(crate) fn get_u64(&self, doc: u64) -> Item { - Item::from_u64(self.min_value_u64 + self.bit_unpacker.get(doc, &self.bytes)) + Item::from_u64(self.reader.get_u64(doc)) } /// Internally `multivalued` also use SingleValue Fast fields. @@ -194,7 +185,7 @@ impl FastFieldReader for BitpackedFastFieldReader { /// deleted document, and should be considered as an upper bound /// of the actual maximum value. fn min_value(&self) -> Item { - Item::from_u64(self.min_value_u64) + Item::from_u64(self.reader.min_value_u64) } /// Returns the maximum value for this fast field. @@ -203,7 +194,7 @@ impl FastFieldReader for BitpackedFastFieldReader { /// deleted document, and should be considered as an upper bound /// of the actual maximum value. fn max_value(&self) -> Item { - Item::from_u64(self.max_value_u64) + Item::from_u64(self.reader.max_value_u64) } } diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index b8074fc4a..14050a217 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -7,22 +7,13 @@ use crate::common::CountingWriter; use crate::directory::WritePtr; use crate::schema::Field; use crate::DocId; -pub use bitpacked::BitpackedFastFieldSerializer; +//pub use bitpacked::BitpackedFastFieldSerializer; +pub use fastfield_codecs::bitpacked::BitpackedFastFieldSerializer; +pub use fastfield_codecs::FastFieldDataAccess; +pub use fastfield_codecs::FastFieldSerializerEstimate; +pub use fastfield_codecs::FastFieldStats; use std::io::{self, Write}; -/// FastFieldReader is the trait to access fast field data. -pub trait FastFieldDataAccess: Clone { - //type IteratorType: Iterator; - /// Return the value associated to the given document. - /// - /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance reasons. - /// - /// # Panics - /// - /// May panic if `doc` is greater than the segment - fn get(&self, doc: DocId) -> u64; -} - /// `CompositeFastFieldSerializer` is in charge of serializing /// fastfields on disk. /// @@ -129,13 +120,6 @@ impl CompositeFastFieldSerializer { } } -#[derive(Debug, Clone)] -pub struct FastFieldStats { - pub min_value: u64, - pub max_value: u64, - pub num_vals: u64, -} - /// The FastFieldSerializer trait is the common interface /// implemented by every fastfield serializer variant. /// @@ -148,18 +132,6 @@ pub trait FastFieldSerializer { fn close_field(self) -> io::Result<()>; } -/// The FastFieldSerializerEstimate trait is required on all variants -/// of fast field compressions, to decide which one to choose. -pub trait FastFieldSerializerEstimate { - /// returns an estimate of the compression ratio. - fn estimate( - fastfield_accessor: &impl FastFieldDataAccess, - stats: FastFieldStats, - ) -> (f32, &'static str); - /// the unique (name, id) of the compressor. Used to distinguish when de/serializing. - fn codec_id() -> (&'static str, u8); -} - pub struct FastBytesFieldSerializer<'a, W: Write> { write: &'a mut W, }