Refactor Further

This commit is contained in:
Paul Masurel
2022-08-21 12:51:21 +02:00
parent 0ec2ebd791
commit b9a87d6dc6
12 changed files with 152 additions and 266 deletions

View File

@@ -29,7 +29,7 @@ mod tests {
fn value_iter() -> impl Iterator<Item = u64> {
0..20_000
}
fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
fn bench_get<S: FastFieldCodec, R: FastFieldCodecReader>(
b: &mut Bencher,
data: &[u64],
) {
@@ -49,7 +49,7 @@ mod tests {
}
});
}
fn bench_create<S: FastFieldCodecSerializer>(b: &mut Bencher, data: &[u64]) {
fn bench_create<S: FastFieldCodec>(b: &mut Bencher, data: &[u64]) {
let mut bytes = vec![];
b.iter(|| {
S::serialize(

View File

@@ -4,7 +4,7 @@ use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
/// Depending on the field type, a different
/// fast field is required.
@@ -83,7 +83,7 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
pub struct BitpackedFastFieldSerializer;
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
impl FastFieldCodec for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
type Reader = BitpackedFastFieldReader;
@@ -114,6 +114,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
/// compute the minimum number of bits required to encode
/// values.
fn serialize(
&self,
write: &mut impl Write,
_fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
@@ -150,7 +151,8 @@ mod tests {
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<BitpackedFastFieldSerializer>(
crate::tests::create_and_validate(
&BitpackedFastFieldSerializer,
data, name,
);
}

View File

@@ -23,16 +23,16 @@ use std::sync::Arc;
use ownedbytes::OwnedBytes;
use crate::FastFieldCodecSerializer;
use crate::FastFieldCodec;
use crate::bitpacked::BitpackedFastFieldSerializer;
use crate::linearinterpol::LinearInterpolFastFieldSerializer;
use crate::FastFieldCodecReader;
use crate::gcd::GCDFastFieldCodecSerializer;
use crate::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
struct DynamicFastFieldSerializer;
pub struct DynamicFastFieldSerializer;
impl FastFieldCodecSerializer for DynamicFastFieldSerializer {
impl FastFieldCodec for DynamicFastFieldSerializer {
const NAME: &'static str = "dynamic";
type Reader = DynamicFastFieldReader;
@@ -46,6 +46,7 @@ impl FastFieldCodecSerializer for DynamicFastFieldSerializer {
}
fn serialize(
&self,
write: &mut impl io::Write,
fastfield_accessor: &dyn crate::FastFieldDataAccess,
stats: crate::FastFieldStats,

View File

@@ -4,7 +4,7 @@ use common::BinarySerializable;
use fastdivide::DividerU64;
use ownedbytes::OwnedBytes;
use crate::{FastFieldCodecReader, FastFieldCodecSerializer};
use crate::{FastFieldCodecReader, FastFieldCodec};
/// Wrapper for accessing a fastfield.
///
@@ -16,13 +16,13 @@ pub struct GCDFastFieldCodecReader<CodecReader> {
reader: CodecReader,
}
pub struct GCDFastFieldCodecSerializer<WrappedCodecSerializer: FastFieldCodecSerializer> {
_wrapped_type: PhantomData<WrappedCodecSerializer>,
pub struct GCDFastFieldCodecSerializer<WrappedCodecSerializer: FastFieldCodec> {
pub gcd: NonZeroU64,
pub min_value: u64,
pub wrapped: WrappedCodecSerializer,
}
impl<WrappedCodecSerializer: FastFieldCodecSerializer> GCDFastFieldCodecSerializer<WrappedCodecSerializer> {}
impl<WrappedCodecSerializer: FastFieldCodecSerializer> FastFieldCodecSerializer for GCDFastFieldCodecSerializer<WrappedCodecSerializer> {
impl<WrappedCodecSerializer: FastFieldCodec> FastFieldCodec for GCDFastFieldCodecSerializer<WrappedCodecSerializer> {
// TODO Fixme. We could like the underlying codec name as well.
const NAME: &'static str = "GCD";
@@ -37,13 +37,16 @@ impl<WrappedCodecSerializer: FastFieldCodecSerializer> FastFieldCodecSerializer
}
fn serialize(
&self,
write: &mut impl Write,
fastfield_accessor: &dyn crate::FastFieldDataAccess,
stats: crate::FastFieldStats,
data_iter: impl Iterator<Item = u64>,
data_iter1: impl Iterator<Item = u64>,
) -> io::Result<()> {
todo!()
write_gcd_header(write, self.min_value, self.gcd)?;
self.wrapped.serialize(write, fastfield_accessor, stats, data_iter, data_iter1)?;
Ok(())
}
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
@@ -77,8 +80,8 @@ impl<C: FastFieldCodecReader> FastFieldCodecReader for GCDFastFieldCodecReader<C
}
}
pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> {
gcd.serialize(field_write)?;
fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: NonZeroU64) -> io::Result<()> {
gcd.get().serialize(field_write)?;
min_value.serialize(field_write)?;
Ok(())
}

View File

@@ -13,7 +13,7 @@ pub mod gcd;
pub mod linearinterpol;
pub mod multilinearinterpol;
pub trait FastFieldCodecReader{
pub trait FastFieldCodecReader {
/// reads the metadata and returns the CodecReader
fn get_u64(&self, doc: u64) -> u64;
fn min_value(&self) -> u64;
@@ -22,7 +22,7 @@ pub trait FastFieldCodecReader{
/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldCodecSerializer {
pub trait FastFieldCodec {
/// A codex needs to provide a unique name used for debugging and de/serialization.
const NAME: &'static str;
@@ -42,6 +42,7 @@ pub trait FastFieldCodecSerializer {
/// There are multiple iterators, in case the codec needs to read the data multiple times.
/// The iterators should be preferred over using fastfield_accessor for performance reasons.
fn serialize(
&self,
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
@@ -93,7 +94,8 @@ mod tests {
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
};
pub fn create_and_validate<S: FastFieldCodecSerializer>(
pub fn create_and_validate<S: FastFieldCodec>(
codec: &S,
data: &[u64],
name: &str,
) -> (f32, f32) {
@@ -102,7 +104,7 @@ mod tests {
}
let estimation = S::estimate(&data, crate::tests::stats_from_vec(data));
let mut out: Vec<u8> = Vec::new();
S::serialize(
codec.serialize(
&mut out,
&data,
crate::tests::stats_from_vec(data),
@@ -141,11 +143,11 @@ mod tests {
data_and_names
}
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
let codec_name = S::NAME;
fn test_codec<C: FastFieldCodec>(codec: &C) {
let codec_name = C::NAME;
for (data, data_set_name) in get_codec_test_data_sets() {
let (estimate, actual) =
crate::tests::create_and_validate::<S>(&data, data_set_name);
crate::tests::create_and_validate(codec, &data, data_set_name);
let result = if estimate == f32::MAX {
"Disabled".to_string()
} else {
@@ -159,15 +161,15 @@ mod tests {
}
#[test]
fn test_codec_bitpacking() {
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
test_codec(&BitpackedFastFieldSerializer);
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
test_codec(&LinearInterpolFastFieldSerializer);
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
test_codec(&MultiLinearInterpolFastFieldSerializer);
}
use super::*;

View File

@@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
/// Depending on the field type, a different
/// fast field is required.
@@ -77,7 +77,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader {
/// Fastfield serializer, which tries to guess values by linear interpolation
/// and stores the difference bitpacked.
pub struct LinearInterpolFastFieldSerializer {}
pub struct LinearInterpolFastFieldSerializer;
#[inline]
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
@@ -94,7 +94,7 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
first_val + (pos as f32 * slope) as u64
}
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
impl FastFieldCodec for LinearInterpolFastFieldSerializer {
const NAME: &'static str = "LinearInterpol";
type Reader = LinearInterpolFastFieldReader;
@@ -117,6 +117,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
fn serialize(
&self,
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
@@ -242,9 +243,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
LinearInterpolFastFieldSerializer,
>(data, name)
crate::tests::create_and_validate(&LinearInterpolFastFieldSerializer, data, name)
}
#[test]

View File

@@ -2,7 +2,7 @@
extern crate prettytable;
// use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
// use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
use fastfield_codecs::{FastFieldCodec, FastFieldStats, bitpacked::BitpackedFastFieldSerializer};
use prettytable::{Cell, Row, Table};
fn main() {
@@ -17,9 +17,7 @@ fn main() {
// results.push(res);
// let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
// results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
);
let res = serialize_with_codec(&BitpackedFastFieldSerializer, &data);
results.push(res);
// let best_estimation_codec = results
@@ -91,7 +89,8 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
data_and_names
}
pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
pub fn serialize_with_codec<S: FastFieldCodec>(
codec: &S,
data: &[u64],
) -> (bool, f32, f32, &'static str) {
let is_applicable = S::is_applicable(&data, stats_from_vec(data));
@@ -100,7 +99,7 @@ pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
}
let estimation = S::estimate(&data, stats_from_vec(data));
let mut out = vec![];
S::serialize(
codec.serialize(
&mut out,
&data,
stats_from_vec(data),

View File

@@ -17,7 +17,7 @@ use common::{BinarySerializable, CountingWriter, DeserializeFrom};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
const CHUNK_SIZE: u64 = 512;
@@ -179,9 +179,9 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct MultiLinearInterpolFastFieldSerializer {}
pub struct MultiLinearInterpolFastFieldSerializer;
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
impl FastFieldCodec for MultiLinearInterpolFastFieldSerializer {
const NAME: &'static str = "MultiLinearInterpol";
type Reader = MultiLinearInterpolFastFieldReader;
@@ -197,6 +197,7 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
/// Creates a new fast field serializer.
fn serialize(
&self,
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
@@ -374,9 +375,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
MultiLinearInterpolFastFieldSerializer,
>(data, name)
crate::tests::create_and_validate(&MultiLinearInterpolFastFieldSerializer, data, name)
}
#[test]

View File

@@ -25,13 +25,14 @@ pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
pub use self::reader::FastFieldReader;
pub use self::readers::FastFieldReaders;
pub(crate) use self::readers::{type_and_cardinality, FastType};
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use crate::schema::{Cardinality, FieldType, Type, Value};
use crate::{DateTime, DocId};
pub use self::wrapper::FastFieldReaderCodecWrapper;
mod alive_bitset;
mod bytes;
@@ -41,6 +42,7 @@ mod multivalued;
mod reader;
mod readers;
mod serializer;
mod wrapper;
mod writer;
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]

View File

@@ -2,17 +2,9 @@ use std::collections::HashMap;
use std::marker::PhantomData;
use std::path::Path;
use fastfield_codecs::bitpacked::{
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
};
use fastfield_codecs::gcd::{GCDFastFieldCodecReader, GCD_CODEC_ID};
use fastfield_codecs::linearinterpol::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
};
use fastfield_codecs::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
};
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer};
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodec};
use fastfield_codecs::dynamic::{DynamicFastFieldReader, DynamicFastFieldSerializer};
use super::FastValue;
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
@@ -61,165 +53,6 @@ pub trait FastFieldReader<Item: FastValue>: Clone {
fn max_value(&self) -> Item;
}
#[derive(Clone)]
/// DynamicFastFieldReader wraps different readers to access
/// the various encoded fastfield data
pub enum DynamicFastFieldReader<Item: FastValue> {
/// Bitpacked compressed fastfield data.
Bitpacked(FastFieldReaderCodecWrapper<Item, BitpackedReader>),
/// Linear interpolated values + bitpacked
LinearInterpol(FastFieldReaderCodecWrapper<Item, LinearInterpolFastFieldReader>),
/// Blockwise linear interpolated values + bitpacked
MultiLinearInterpol(FastFieldReaderCodecWrapper<Item, MultiLinearInterpolFastFieldReader>),
/// GCD and Bitpacked compressed fastfield data.
BitpackedGCD(FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<BitpackedReader>>),
/// GCD and Linear interpolated values + bitpacked
LinearInterpolGCD(
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<LinearInterpolFastFieldReader>>,
),
/// GCD and Blockwise linear interpolated values + bitpacked
MultiLinearInterpolGCD(
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>>,
),
}
impl<Item: FastValue> DynamicFastFieldReader<Item> {
/// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data.
pub fn open_from_id(
mut bytes: OwnedBytes,
codec_id: u8,
) -> crate::Result<DynamicFastFieldReader<Item>> {
let reader = match codec_id {
BitpackedFastFieldSerializer::ID => {
DynamicFastFieldReader::Bitpacked(FastFieldReaderCodecWrapper::<
Item,
BitpackedReader,
>::open_from_bytes(bytes)?)
}
LinearInterpolFastFieldSerializer::ID => {
DynamicFastFieldReader::LinearInterpol(FastFieldReaderCodecWrapper::<
Item,
LinearInterpolFastFieldReader,
>::open_from_bytes(bytes)?)
}
MultiLinearInterpolFastFieldSerializer::ID => {
DynamicFastFieldReader::MultiLinearInterpol(FastFieldReaderCodecWrapper::<
Item,
MultiLinearInterpolFastFieldReader,
>::open_from_bytes(
bytes
)?)
}
_ if codec_id == GCD_CODEC_ID => {
let codec_id = bytes.read_u8();
match codec_id {
BitpackedFastFieldSerializer::ID => {
DynamicFastFieldReader::BitpackedGCD(FastFieldReaderCodecWrapper::<
Item,
GCDFastFieldCodecReader<BitpackedReader>,
>::open_from_bytes(
bytes
)?)
}
LinearInterpolFastFieldSerializer::ID => {
DynamicFastFieldReader::LinearInterpolGCD(FastFieldReaderCodecWrapper::<
Item,
GCDFastFieldCodecReader<LinearInterpolFastFieldReader>,
>::open_from_bytes(
bytes
)?)
}
MultiLinearInterpolFastFieldSerializer::ID => {
DynamicFastFieldReader::MultiLinearInterpolGCD(
FastFieldReaderCodecWrapper::<
Item,
GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>,
>::open_from_bytes(bytes)?,
)
}
_ => {
panic!(
"unknown fastfield codec id {:?}. Data corrupted or using old tantivy \
version.",
codec_id
)
}
}
}
_ => {
panic!(
"unknown fastfield codec id {:?}. Data corrupted or using old tantivy version.",
codec_id
)
}
};
Ok(reader)
}
/// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data.
pub fn open(file: FileSlice) -> crate::Result<DynamicFastFieldReader<Item>> {
let mut bytes = file.read_bytes()?;
let codec_id = bytes.read_u8();
Self::open_from_id(bytes, codec_id)
}
}
impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
#[inline]
fn get(&self, doc: DocId) -> Item {
match self {
Self::Bitpacked(reader) => reader.get(doc),
Self::LinearInterpol(reader) => reader.get(doc),
Self::MultiLinearInterpol(reader) => reader.get(doc),
Self::BitpackedGCD(reader) => reader.get(doc),
Self::LinearInterpolGCD(reader) => reader.get(doc),
Self::MultiLinearInterpolGCD(reader) => reader.get(doc),
}
}
#[inline]
fn get_range(&self, start: u64, output: &mut [Item]) {
match self {
Self::Bitpacked(reader) => reader.get_range(start, output),
Self::LinearInterpol(reader) => reader.get_range(start, output),
Self::MultiLinearInterpol(reader) => reader.get_range(start, output),
Self::BitpackedGCD(reader) => reader.get_range(start, output),
Self::LinearInterpolGCD(reader) => reader.get_range(start, output),
Self::MultiLinearInterpolGCD(reader) => reader.get_range(start, output),
}
}
fn min_value(&self) -> Item {
match self {
Self::Bitpacked(reader) => reader.min_value(),
Self::LinearInterpol(reader) => reader.min_value(),
Self::MultiLinearInterpol(reader) => reader.min_value(),
Self::BitpackedGCD(reader) => reader.min_value(),
Self::LinearInterpolGCD(reader) => reader.min_value(),
Self::MultiLinearInterpolGCD(reader) => reader.min_value(),
}
}
fn max_value(&self) -> Item {
match self {
Self::Bitpacked(reader) => reader.max_value(),
Self::LinearInterpol(reader) => reader.max_value(),
Self::MultiLinearInterpol(reader) => reader.max_value(),
Self::BitpackedGCD(reader) => reader.max_value(),
Self::LinearInterpolGCD(reader) => reader.max_value(),
Self::MultiLinearInterpolGCD(reader) => reader.max_value(),
}
}
}
/// Wrapper for accessing a fastfield.
///
/// Holds the data and the codec to the read the data.
#[derive(Clone)]
pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
reader: CodecReader,
_phantom: PhantomData<Item>,
}
impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item, C> {
/// Opens a fast field given a file.
pub fn open(file: FileSlice) -> crate::Result<Self> {

View File

@@ -5,10 +5,10 @@ use common::{BinarySerializable, CountingWriter};
pub use fastfield_codecs::bitpacked::{
BitpackedFastFieldSerializer, BitpackedFastFieldSerializerLegacy,
};
use fastfield_codecs::gcd::{find_gcd, write_gcd_header, GCD_CODEC_ID, GCD_DEFAULT};
use fastfield_codecs::gcd::{find_gcd, write_gcd_header};
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
pub use fastfield_codecs::{FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
pub use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess, FastFieldStats};
use super::{FastFieldCodecName, ALL_CODECS};
use crate::directory::{CompositeWrite, WritePtr};
@@ -40,7 +40,7 @@ pub struct CompositeFastFieldSerializer {
}
#[derive(Debug, Clone)]
pub struct FastFieldCodecEnableCheck {
struct FastFieldCodecEnableCheck {
enabled_codecs: Vec<FastFieldCodecName>,
}
impl FastFieldCodecEnableCheck {
@@ -54,17 +54,9 @@ impl FastFieldCodecEnableCheck {
}
}
impl From<FastFieldCodecName> for FastFieldCodecEnableCheck {
fn from(codec_name: FastFieldCodecName) -> Self {
FastFieldCodecEnableCheck {
enabled_codecs: vec![codec_name],
}
}
}
// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait
// https://github.com/rust-lang/rust/pull/86176
fn codec_estimation<T: FastFieldCodecSerializer, A: FastFieldDataAccess>(
fn codec_estimation<T: FastFieldCodec, A: FastFieldDataAccess>(
stats: FastFieldStats,
fastfield_accessor: &A,
estimations: &mut Vec<(f32, &str, u8)>,
@@ -83,7 +75,7 @@ impl CompositeFastFieldSerializer {
}
/// Constructor
pub fn from_write_with_codec(
fn from_write_with_codec(
write: WritePtr,
codec_enable_checker: FastFieldCodecEnableCheck,
) -> io::Result<CompositeFastFieldSerializer> {
@@ -119,7 +111,7 @@ impl CompositeFastFieldSerializer {
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
/// automatically.
pub fn write_header<W: Write>(field_write: &mut W, codec_id: u8) -> io::Result<()> {
fn write_header<W: Write>(field_write: &mut W, codec_id: u8) -> io::Result<()> {
codec_id.serialize(field_write)?;
Ok(())
@@ -140,7 +132,9 @@ impl CompositeFastFieldSerializer {
I: Iterator<Item = u64>,
{
let field_write = self.composite_write.for_field_with_idx(field, idx);
let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT);
let gcd: u64 = find_gcd(iter_gen().map(|val| val - stats.min_value))
.map(NonZeroU64::get)
.unwrap_or(1);
if gcd == 1 {
// No GCD opportunity here.
@@ -154,7 +148,6 @@ impl CompositeFastFieldSerializer {
iter_gen(),
);
}
Self::write_header(field_write, GCD_CODEC_ID)?;
struct GCDWrappedFFAccess<T: FastFieldDataAccess> {
fastfield_accessor: T,
@@ -196,7 +189,7 @@ impl CompositeFastFieldSerializer {
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
/// automatically.
pub fn create_auto_detect_u64_fast_field_with_idx_gcd<W: Write>(
fn create_auto_detect_u64_fast_field_with_idx_gcd<W: Write>(
codec_enable_checker: FastFieldCodecEnableCheck,
field: Field,
field_write: &mut CountingWriter<W>,

View File

@@ -18,6 +18,18 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
use std::path::Path;
use fastfield_codecs::FastFieldCodecReader;
use fastfield_codecs::FastFieldCodec;
use fastfield_codecs::dynamic::DynamicFastFieldReader;
use crate::directory::CompositeFile;
use crate::directory::RamDirectory;
use crate::directory::WritePtr;
use crate::fastfield::FastValue;
use crate::schema::Schema;
/// Wrapper for accessing a fastfield.
///
/// Holds the data and the codec to the read the data.
@@ -78,40 +90,81 @@ impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
}
}
impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", FAST);
let schema = schema_builder.build();
let path = Path::new("__dummy__");
let directory: RamDirectory = RamDirectory::create();
{
let write: WritePtr = directory
.open_write(path)
.expect("With a RamDirectory, this should never fail.");
let mut serializer = CompositeFastFieldSerializer::from_write(write)
.expect("With a RamDirectory, this should never fail.");
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
{
let fast_field_writer = fast_field_writers
.get_field_writer_mut(field)
.expect("With a RamDirectory, this should never fail.");
for val in vals {
fast_field_writer.add_val(val.to_u64());
}
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
serializer.close().unwrap();
}
impl<Item: FastValue, Codec: FastFieldCodec> FastFieldReaderCodecWrapper<Item, Codec> {
// /// Opens a fast field given a file.
// pub fn open(file: FileSlice) -> crate::Result<Self> {
// let mut bytes = file.read_bytes()?;
// Self::open_from_bytes(bytes)
// }
let file = directory.open_read(path).expect("Failed to open the file");
let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file");
let field_file = composite_file
.open_read(field)
.expect("File component not found");
DynamicFastFieldReader::open(field_file).unwrap()
/// Opens a fast field given the bytes.
pub fn open_from_bytes(bytes: OwnedBytes) -> crate::Result<Self> {
let reader = C::open_from_bytes(bytes)?;
Ok(FastFieldReaderCodecWrapper {
reader,
_phantom: PhantomData,
})
}
#[inline]
pub(crate) fn get_u64(&self, doc: u64) -> Item {
let data = self.reader.get_u64(doc);
Item::from_u64(data)
}
/// Internally `multivalued` also use SingleValue Fast fields.
/// It works as follows... A first column contains the list of start index
/// for each document, a second column contains the actual values.
///
/// The values associated to a given doc, are then
/// `second_column[first_column.get(doc)..first_column.get(doc+1)]`.
///
/// Which means single value fast field reader can be indexed internally with
/// something different from a `DocId`. For this use case, we want to use `u64`
/// values.
///
/// See `get_range` for an actual documentation about this method.
pub(crate) fn get_range_u64(&self, start: u64, output: &mut [Item]) {
for (i, out) in output.iter_mut().enumerate() {
*out = self.get_u64(start + (i as u64));
}
}
}
// impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
// fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
// let mut schema_builder = Schema::builder();
// let field = schema_builder.add_u64_field("field", FAST);
// let schema = schema_builder.build();
// let path = Path::new("__dummy__");
// let directory: RamDirectory = RamDirectory::create();
// {
// let write: WritePtr = directory
// .open_write(path)
// .expect("With a RamDirectory, this should never fail.");
// let mut serializer = CompositeFastFieldSerializer::from_write(write)
// .expect("With a RamDirectory, this should never fail.");
// let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
// {
// let fast_field_writer = fast_field_writers
// .get_field_writer_mut(field)
// .expect("With a RamDirectory, this should never fail.");
// for val in vals {
// fast_field_writer.add_val(val.to_u64());
// }
// }
// fast_field_writers
// .serialize(&mut serializer, &HashMap::new(), None)
// .unwrap();
// serializer.close().unwrap();
// }
// let file = directory.open_read(path).expect("Failed to open the file");
// let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file");
// let field_file = composite_file
// .open_read(field)
// .expect("File component not found");
// DynamicFastFieldReader::open(field_file).unwrap()
// }
// }