mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-01 16:10:42 +00:00
Refactor Further
This commit is contained in:
@@ -29,7 +29,7 @@ mod tests {
|
||||
fn value_iter() -> impl Iterator<Item = u64> {
|
||||
0..20_000
|
||||
}
|
||||
fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
fn bench_get<S: FastFieldCodec, R: FastFieldCodecReader>(
|
||||
b: &mut Bencher,
|
||||
data: &[u64],
|
||||
) {
|
||||
@@ -49,7 +49,7 @@ mod tests {
|
||||
}
|
||||
});
|
||||
}
|
||||
fn bench_create<S: FastFieldCodecSerializer>(b: &mut Bencher, data: &[u64]) {
|
||||
fn bench_create<S: FastFieldCodec>(b: &mut Bencher, data: &[u64]) {
|
||||
let mut bytes = vec![];
|
||||
b.iter(|| {
|
||||
S::serialize(
|
||||
|
||||
@@ -4,7 +4,7 @@ use common::BinarySerializable;
|
||||
use ownedbytes::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
|
||||
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
@@ -83,7 +83,7 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
|
||||
|
||||
pub struct BitpackedFastFieldSerializer;
|
||||
|
||||
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
|
||||
impl FastFieldCodec for BitpackedFastFieldSerializer {
|
||||
const NAME: &'static str = "Bitpacked";
|
||||
|
||||
type Reader = BitpackedFastFieldReader;
|
||||
@@ -114,6 +114,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
|
||||
/// compute the minimum number of bits required to encode
|
||||
/// values.
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl Write,
|
||||
_fastfield_accessor: &dyn FastFieldDataAccess,
|
||||
stats: FastFieldStats,
|
||||
@@ -150,7 +151,8 @@ mod tests {
|
||||
use crate::tests::get_codec_test_data_sets;
|
||||
|
||||
fn create_and_validate(data: &[u64], name: &str) {
|
||||
crate::tests::create_and_validate::<BitpackedFastFieldSerializer>(
|
||||
crate::tests::create_and_validate(
|
||||
&BitpackedFastFieldSerializer,
|
||||
data, name,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -23,16 +23,16 @@ use std::sync::Arc;
|
||||
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::FastFieldCodecSerializer;
|
||||
use crate::FastFieldCodec;
|
||||
use crate::bitpacked::BitpackedFastFieldSerializer;
|
||||
use crate::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use crate::FastFieldCodecReader;
|
||||
use crate::gcd::GCDFastFieldCodecSerializer;
|
||||
use crate::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
|
||||
struct DynamicFastFieldSerializer;
|
||||
pub struct DynamicFastFieldSerializer;
|
||||
|
||||
impl FastFieldCodecSerializer for DynamicFastFieldSerializer {
|
||||
impl FastFieldCodec for DynamicFastFieldSerializer {
|
||||
const NAME: &'static str = "dynamic";
|
||||
|
||||
type Reader = DynamicFastFieldReader;
|
||||
@@ -46,6 +46,7 @@ impl FastFieldCodecSerializer for DynamicFastFieldSerializer {
|
||||
}
|
||||
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl io::Write,
|
||||
fastfield_accessor: &dyn crate::FastFieldDataAccess,
|
||||
stats: crate::FastFieldStats,
|
||||
|
||||
@@ -4,7 +4,7 @@ use common::BinarySerializable;
|
||||
use fastdivide::DividerU64;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::{FastFieldCodecReader, FastFieldCodecSerializer};
|
||||
use crate::{FastFieldCodecReader, FastFieldCodec};
|
||||
|
||||
/// Wrapper for accessing a fastfield.
|
||||
///
|
||||
@@ -16,13 +16,13 @@ pub struct GCDFastFieldCodecReader<CodecReader> {
|
||||
reader: CodecReader,
|
||||
}
|
||||
|
||||
pub struct GCDFastFieldCodecSerializer<WrappedCodecSerializer: FastFieldCodecSerializer> {
|
||||
_wrapped_type: PhantomData<WrappedCodecSerializer>,
|
||||
pub struct GCDFastFieldCodecSerializer<WrappedCodecSerializer: FastFieldCodec> {
|
||||
pub gcd: NonZeroU64,
|
||||
pub min_value: u64,
|
||||
pub wrapped: WrappedCodecSerializer,
|
||||
}
|
||||
|
||||
impl<WrappedCodecSerializer: FastFieldCodecSerializer> GCDFastFieldCodecSerializer<WrappedCodecSerializer> {}
|
||||
|
||||
impl<WrappedCodecSerializer: FastFieldCodecSerializer> FastFieldCodecSerializer for GCDFastFieldCodecSerializer<WrappedCodecSerializer> {
|
||||
impl<WrappedCodecSerializer: FastFieldCodec> FastFieldCodec for GCDFastFieldCodecSerializer<WrappedCodecSerializer> {
|
||||
// TODO Fixme. We could like the underlying codec name as well.
|
||||
const NAME: &'static str = "GCD";
|
||||
|
||||
@@ -37,13 +37,16 @@ impl<WrappedCodecSerializer: FastFieldCodecSerializer> FastFieldCodecSerializer
|
||||
}
|
||||
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl Write,
|
||||
fastfield_accessor: &dyn crate::FastFieldDataAccess,
|
||||
stats: crate::FastFieldStats,
|
||||
data_iter: impl Iterator<Item = u64>,
|
||||
data_iter1: impl Iterator<Item = u64>,
|
||||
) -> io::Result<()> {
|
||||
todo!()
|
||||
write_gcd_header(write, self.min_value, self.gcd)?;
|
||||
self.wrapped.serialize(write, fastfield_accessor, stats, data_iter, data_iter1)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
@@ -77,8 +80,8 @@ impl<C: FastFieldCodecReader> FastFieldCodecReader for GCDFastFieldCodecReader<C
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> {
|
||||
gcd.serialize(field_write)?;
|
||||
fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: NonZeroU64) -> io::Result<()> {
|
||||
gcd.get().serialize(field_write)?;
|
||||
min_value.serialize(field_write)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ pub mod gcd;
|
||||
pub mod linearinterpol;
|
||||
pub mod multilinearinterpol;
|
||||
|
||||
pub trait FastFieldCodecReader{
|
||||
pub trait FastFieldCodecReader {
|
||||
/// reads the metadata and returns the CodecReader
|
||||
fn get_u64(&self, doc: u64) -> u64;
|
||||
fn min_value(&self) -> u64;
|
||||
@@ -22,7 +22,7 @@ pub trait FastFieldCodecReader{
|
||||
|
||||
/// The FastFieldSerializerEstimate trait is required on all variants
|
||||
/// of fast field compressions, to decide which one to choose.
|
||||
pub trait FastFieldCodecSerializer {
|
||||
pub trait FastFieldCodec {
|
||||
/// A codex needs to provide a unique name used for debugging and de/serialization.
|
||||
const NAME: &'static str;
|
||||
|
||||
@@ -42,6 +42,7 @@ pub trait FastFieldCodecSerializer {
|
||||
/// There are multiple iterators, in case the codec needs to read the data multiple times.
|
||||
/// The iterators should be preferred over using fastfield_accessor for performance reasons.
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl Write,
|
||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
||||
stats: FastFieldStats,
|
||||
@@ -93,7 +94,8 @@ mod tests {
|
||||
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
|
||||
};
|
||||
|
||||
pub fn create_and_validate<S: FastFieldCodecSerializer>(
|
||||
pub fn create_and_validate<S: FastFieldCodec>(
|
||||
codec: &S,
|
||||
data: &[u64],
|
||||
name: &str,
|
||||
) -> (f32, f32) {
|
||||
@@ -102,7 +104,7 @@ mod tests {
|
||||
}
|
||||
let estimation = S::estimate(&data, crate::tests::stats_from_vec(data));
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
S::serialize(
|
||||
codec.serialize(
|
||||
&mut out,
|
||||
&data,
|
||||
crate::tests::stats_from_vec(data),
|
||||
@@ -141,11 +143,11 @@ mod tests {
|
||||
data_and_names
|
||||
}
|
||||
|
||||
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
|
||||
let codec_name = S::NAME;
|
||||
fn test_codec<C: FastFieldCodec>(codec: &C) {
|
||||
let codec_name = C::NAME;
|
||||
for (data, data_set_name) in get_codec_test_data_sets() {
|
||||
let (estimate, actual) =
|
||||
crate::tests::create_and_validate::<S>(&data, data_set_name);
|
||||
crate::tests::create_and_validate(codec, &data, data_set_name);
|
||||
let result = if estimate == f32::MAX {
|
||||
"Disabled".to_string()
|
||||
} else {
|
||||
@@ -159,15 +161,15 @@ mod tests {
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_bitpacking() {
|
||||
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
|
||||
test_codec(&BitpackedFastFieldSerializer);
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_interpolation() {
|
||||
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
|
||||
test_codec(&LinearInterpolFastFieldSerializer);
|
||||
}
|
||||
#[test]
|
||||
fn test_codec_multi_interpolation() {
|
||||
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
|
||||
test_codec(&MultiLinearInterpolFastFieldSerializer);
|
||||
}
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
|
||||
|
||||
/// Depending on the field type, a different
|
||||
/// fast field is required.
|
||||
@@ -77,7 +77,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader {
|
||||
|
||||
/// Fastfield serializer, which tries to guess values by linear interpolation
|
||||
/// and stores the difference bitpacked.
|
||||
pub struct LinearInterpolFastFieldSerializer {}
|
||||
pub struct LinearInterpolFastFieldSerializer;
|
||||
|
||||
#[inline]
|
||||
fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
|
||||
@@ -94,7 +94,7 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
first_val + (pos as f32 * slope) as u64
|
||||
}
|
||||
|
||||
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
|
||||
impl FastFieldCodec for LinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "LinearInterpol";
|
||||
|
||||
type Reader = LinearInterpolFastFieldReader;
|
||||
@@ -117,6 +117,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
|
||||
|
||||
/// Creates a new fast field serializer.
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl Write,
|
||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
||||
stats: FastFieldStats,
|
||||
@@ -242,9 +243,7 @@ mod tests {
|
||||
use crate::tests::get_codec_test_data_sets;
|
||||
|
||||
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
|
||||
crate::tests::create_and_validate::<
|
||||
LinearInterpolFastFieldSerializer,
|
||||
>(data, name)
|
||||
crate::tests::create_and_validate(&LinearInterpolFastFieldSerializer, data, name)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
extern crate prettytable;
|
||||
// use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
// use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
|
||||
use fastfield_codecs::{FastFieldCodec, FastFieldStats, bitpacked::BitpackedFastFieldSerializer};
|
||||
use prettytable::{Cell, Row, Table};
|
||||
|
||||
fn main() {
|
||||
@@ -17,9 +17,7 @@ fn main() {
|
||||
// results.push(res);
|
||||
// let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
|
||||
// results.push(res);
|
||||
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
|
||||
&data,
|
||||
);
|
||||
let res = serialize_with_codec(&BitpackedFastFieldSerializer, &data);
|
||||
results.push(res);
|
||||
|
||||
// let best_estimation_codec = results
|
||||
@@ -91,7 +89,8 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
|
||||
data_and_names
|
||||
}
|
||||
|
||||
pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
|
||||
pub fn serialize_with_codec<S: FastFieldCodec>(
|
||||
codec: &S,
|
||||
data: &[u64],
|
||||
) -> (bool, f32, f32, &'static str) {
|
||||
let is_applicable = S::is_applicable(&data, stats_from_vec(data));
|
||||
@@ -100,7 +99,7 @@ pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
|
||||
}
|
||||
let estimation = S::estimate(&data, stats_from_vec(data));
|
||||
let mut out = vec![];
|
||||
S::serialize(
|
||||
codec.serialize(
|
||||
&mut out,
|
||||
&data,
|
||||
stats_from_vec(data),
|
||||
|
||||
@@ -17,7 +17,7 @@ use common::{BinarySerializable, CountingWriter, DeserializeFrom};
|
||||
use ownedbytes::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
use crate::{FastFieldCodecReader, FastFieldCodec, FastFieldDataAccess, FastFieldStats};
|
||||
|
||||
const CHUNK_SIZE: u64 = 512;
|
||||
|
||||
@@ -179,9 +179,9 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
}
|
||||
|
||||
/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
|
||||
pub struct MultiLinearInterpolFastFieldSerializer {}
|
||||
pub struct MultiLinearInterpolFastFieldSerializer;
|
||||
|
||||
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
|
||||
impl FastFieldCodec for MultiLinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "MultiLinearInterpol";
|
||||
|
||||
type Reader = MultiLinearInterpolFastFieldReader;
|
||||
@@ -197,6 +197,7 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
|
||||
|
||||
/// Creates a new fast field serializer.
|
||||
fn serialize(
|
||||
&self,
|
||||
write: &mut impl Write,
|
||||
fastfield_accessor: &dyn FastFieldDataAccess,
|
||||
stats: FastFieldStats,
|
||||
@@ -374,9 +375,7 @@ mod tests {
|
||||
use crate::tests::get_codec_test_data_sets;
|
||||
|
||||
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
|
||||
crate::tests::create_and_validate::<
|
||||
MultiLinearInterpolFastFieldSerializer,
|
||||
>(data, name)
|
||||
crate::tests::create_and_validate(&MultiLinearInterpolFastFieldSerializer, data, name)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -25,13 +25,14 @@ pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
|
||||
pub use self::error::{FastFieldNotAvailableError, Result};
|
||||
pub use self::facet_reader::FacetReader;
|
||||
pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter};
|
||||
pub use self::reader::{DynamicFastFieldReader, FastFieldReader};
|
||||
pub use self::reader::FastFieldReader;
|
||||
pub use self::readers::FastFieldReaders;
|
||||
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
||||
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
use crate::schema::{Cardinality, FieldType, Type, Value};
|
||||
use crate::{DateTime, DocId};
|
||||
pub use self::wrapper::FastFieldReaderCodecWrapper;
|
||||
|
||||
mod alive_bitset;
|
||||
mod bytes;
|
||||
@@ -41,6 +42,7 @@ mod multivalued;
|
||||
mod reader;
|
||||
mod readers;
|
||||
mod serializer;
|
||||
mod wrapper;
|
||||
mod writer;
|
||||
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]
|
||||
|
||||
@@ -2,17 +2,9 @@ use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
use std::path::Path;
|
||||
|
||||
use fastfield_codecs::bitpacked::{
|
||||
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
|
||||
};
|
||||
use fastfield_codecs::gcd::{GCDFastFieldCodecReader, GCD_CODEC_ID};
|
||||
use fastfield_codecs::linearinterpol::{
|
||||
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
|
||||
};
|
||||
use fastfield_codecs::multilinearinterpol::{
|
||||
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
|
||||
};
|
||||
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecSerializer};
|
||||
|
||||
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodec};
|
||||
use fastfield_codecs::dynamic::{DynamicFastFieldReader, DynamicFastFieldSerializer};
|
||||
|
||||
use super::FastValue;
|
||||
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
|
||||
@@ -61,165 +53,6 @@ pub trait FastFieldReader<Item: FastValue>: Clone {
|
||||
fn max_value(&self) -> Item;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
/// DynamicFastFieldReader wraps different readers to access
|
||||
/// the various encoded fastfield data
|
||||
pub enum DynamicFastFieldReader<Item: FastValue> {
|
||||
/// Bitpacked compressed fastfield data.
|
||||
Bitpacked(FastFieldReaderCodecWrapper<Item, BitpackedReader>),
|
||||
/// Linear interpolated values + bitpacked
|
||||
LinearInterpol(FastFieldReaderCodecWrapper<Item, LinearInterpolFastFieldReader>),
|
||||
/// Blockwise linear interpolated values + bitpacked
|
||||
MultiLinearInterpol(FastFieldReaderCodecWrapper<Item, MultiLinearInterpolFastFieldReader>),
|
||||
|
||||
/// GCD and Bitpacked compressed fastfield data.
|
||||
BitpackedGCD(FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<BitpackedReader>>),
|
||||
/// GCD and Linear interpolated values + bitpacked
|
||||
LinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<LinearInterpolFastFieldReader>>,
|
||||
),
|
||||
/// GCD and Blockwise linear interpolated values + bitpacked
|
||||
MultiLinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>>,
|
||||
),
|
||||
}
|
||||
|
||||
impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
||||
/// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data.
|
||||
pub fn open_from_id(
|
||||
mut bytes: OwnedBytes,
|
||||
codec_id: u8,
|
||||
) -> crate::Result<DynamicFastFieldReader<Item>> {
|
||||
let reader = match codec_id {
|
||||
BitpackedFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::Bitpacked(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
BitpackedReader,
|
||||
>::open_from_bytes(bytes)?)
|
||||
}
|
||||
LinearInterpolFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::LinearInterpol(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
LinearInterpolFastFieldReader,
|
||||
>::open_from_bytes(bytes)?)
|
||||
}
|
||||
MultiLinearInterpolFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::MultiLinearInterpol(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
MultiLinearInterpolFastFieldReader,
|
||||
>::open_from_bytes(
|
||||
bytes
|
||||
)?)
|
||||
}
|
||||
_ if codec_id == GCD_CODEC_ID => {
|
||||
let codec_id = bytes.read_u8();
|
||||
|
||||
match codec_id {
|
||||
BitpackedFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::BitpackedGCD(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodecReader<BitpackedReader>,
|
||||
>::open_from_bytes(
|
||||
bytes
|
||||
)?)
|
||||
}
|
||||
LinearInterpolFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::LinearInterpolGCD(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodecReader<LinearInterpolFastFieldReader>,
|
||||
>::open_from_bytes(
|
||||
bytes
|
||||
)?)
|
||||
}
|
||||
MultiLinearInterpolFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::MultiLinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>,
|
||||
>::open_from_bytes(bytes)?,
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
panic!(
|
||||
"unknown fastfield codec id {:?}. Data corrupted or using old tantivy \
|
||||
version.",
|
||||
codec_id
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!(
|
||||
"unknown fastfield codec id {:?}. Data corrupted or using old tantivy version.",
|
||||
codec_id
|
||||
)
|
||||
}
|
||||
};
|
||||
Ok(reader)
|
||||
}
|
||||
/// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data.
|
||||
pub fn open(file: FileSlice) -> crate::Result<DynamicFastFieldReader<Item>> {
|
||||
let mut bytes = file.read_bytes()?;
|
||||
let codec_id = bytes.read_u8();
|
||||
|
||||
Self::open_from_id(bytes, codec_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Item: FastValue> FastFieldReader<Item> for DynamicFastFieldReader<Item> {
|
||||
#[inline]
|
||||
fn get(&self, doc: DocId) -> Item {
|
||||
match self {
|
||||
Self::Bitpacked(reader) => reader.get(doc),
|
||||
Self::LinearInterpol(reader) => reader.get(doc),
|
||||
Self::MultiLinearInterpol(reader) => reader.get(doc),
|
||||
Self::BitpackedGCD(reader) => reader.get(doc),
|
||||
Self::LinearInterpolGCD(reader) => reader.get(doc),
|
||||
Self::MultiLinearInterpolGCD(reader) => reader.get(doc),
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn get_range(&self, start: u64, output: &mut [Item]) {
|
||||
match self {
|
||||
Self::Bitpacked(reader) => reader.get_range(start, output),
|
||||
Self::LinearInterpol(reader) => reader.get_range(start, output),
|
||||
Self::MultiLinearInterpol(reader) => reader.get_range(start, output),
|
||||
Self::BitpackedGCD(reader) => reader.get_range(start, output),
|
||||
Self::LinearInterpolGCD(reader) => reader.get_range(start, output),
|
||||
Self::MultiLinearInterpolGCD(reader) => reader.get_range(start, output),
|
||||
}
|
||||
}
|
||||
fn min_value(&self) -> Item {
|
||||
match self {
|
||||
Self::Bitpacked(reader) => reader.min_value(),
|
||||
Self::LinearInterpol(reader) => reader.min_value(),
|
||||
Self::MultiLinearInterpol(reader) => reader.min_value(),
|
||||
Self::BitpackedGCD(reader) => reader.min_value(),
|
||||
Self::LinearInterpolGCD(reader) => reader.min_value(),
|
||||
Self::MultiLinearInterpolGCD(reader) => reader.min_value(),
|
||||
}
|
||||
}
|
||||
fn max_value(&self) -> Item {
|
||||
match self {
|
||||
Self::Bitpacked(reader) => reader.max_value(),
|
||||
Self::LinearInterpol(reader) => reader.max_value(),
|
||||
Self::MultiLinearInterpol(reader) => reader.max_value(),
|
||||
Self::BitpackedGCD(reader) => reader.max_value(),
|
||||
Self::LinearInterpolGCD(reader) => reader.max_value(),
|
||||
Self::MultiLinearInterpolGCD(reader) => reader.max_value(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for accessing a fastfield.
|
||||
///
|
||||
/// Holds the data and the codec to the read the data.
|
||||
#[derive(Clone)]
|
||||
pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
|
||||
reader: CodecReader,
|
||||
_phantom: PhantomData<Item>,
|
||||
}
|
||||
|
||||
impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item, C> {
|
||||
/// Opens a fast field given a file.
|
||||
pub fn open(file: FileSlice) -> crate::Result<Self> {
|
||||
|
||||
@@ -5,10 +5,10 @@ use common::{BinarySerializable, CountingWriter};
|
||||
pub use fastfield_codecs::bitpacked::{
|
||||
BitpackedFastFieldSerializer, BitpackedFastFieldSerializerLegacy,
|
||||
};
|
||||
use fastfield_codecs::gcd::{find_gcd, write_gcd_header, GCD_CODEC_ID, GCD_DEFAULT};
|
||||
use fastfield_codecs::gcd::{find_gcd, write_gcd_header};
|
||||
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
pub use fastfield_codecs::{FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
|
||||
pub use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess, FastFieldStats};
|
||||
|
||||
use super::{FastFieldCodecName, ALL_CODECS};
|
||||
use crate::directory::{CompositeWrite, WritePtr};
|
||||
@@ -40,7 +40,7 @@ pub struct CompositeFastFieldSerializer {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FastFieldCodecEnableCheck {
|
||||
struct FastFieldCodecEnableCheck {
|
||||
enabled_codecs: Vec<FastFieldCodecName>,
|
||||
}
|
||||
impl FastFieldCodecEnableCheck {
|
||||
@@ -54,17 +54,9 @@ impl FastFieldCodecEnableCheck {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FastFieldCodecName> for FastFieldCodecEnableCheck {
|
||||
fn from(codec_name: FastFieldCodecName) -> Self {
|
||||
FastFieldCodecEnableCheck {
|
||||
enabled_codecs: vec![codec_name],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait
|
||||
// https://github.com/rust-lang/rust/pull/86176
|
||||
fn codec_estimation<T: FastFieldCodecSerializer, A: FastFieldDataAccess>(
|
||||
fn codec_estimation<T: FastFieldCodec, A: FastFieldDataAccess>(
|
||||
stats: FastFieldStats,
|
||||
fastfield_accessor: &A,
|
||||
estimations: &mut Vec<(f32, &str, u8)>,
|
||||
@@ -83,7 +75,7 @@ impl CompositeFastFieldSerializer {
|
||||
}
|
||||
|
||||
/// Constructor
|
||||
pub fn from_write_with_codec(
|
||||
fn from_write_with_codec(
|
||||
write: WritePtr,
|
||||
codec_enable_checker: FastFieldCodecEnableCheck,
|
||||
) -> io::Result<CompositeFastFieldSerializer> {
|
||||
@@ -119,7 +111,7 @@ impl CompositeFastFieldSerializer {
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn write_header<W: Write>(field_write: &mut W, codec_id: u8) -> io::Result<()> {
|
||||
fn write_header<W: Write>(field_write: &mut W, codec_id: u8) -> io::Result<()> {
|
||||
codec_id.serialize(field_write)?;
|
||||
|
||||
Ok(())
|
||||
@@ -140,7 +132,9 @@ impl CompositeFastFieldSerializer {
|
||||
I: Iterator<Item = u64>,
|
||||
{
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT);
|
||||
let gcd: u64 = find_gcd(iter_gen().map(|val| val - stats.min_value))
|
||||
.map(NonZeroU64::get)
|
||||
.unwrap_or(1);
|
||||
|
||||
if gcd == 1 {
|
||||
// No GCD opportunity here.
|
||||
@@ -154,7 +148,6 @@ impl CompositeFastFieldSerializer {
|
||||
iter_gen(),
|
||||
);
|
||||
}
|
||||
|
||||
Self::write_header(field_write, GCD_CODEC_ID)?;
|
||||
struct GCDWrappedFFAccess<T: FastFieldDataAccess> {
|
||||
fastfield_accessor: T,
|
||||
@@ -196,7 +189,7 @@ impl CompositeFastFieldSerializer {
|
||||
|
||||
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
|
||||
/// automatically.
|
||||
pub fn create_auto_detect_u64_fast_field_with_idx_gcd<W: Write>(
|
||||
fn create_auto_detect_u64_fast_field_with_idx_gcd<W: Write>(
|
||||
codec_enable_checker: FastFieldCodecEnableCheck,
|
||||
field: Field,
|
||||
field_write: &mut CountingWriter<W>,
|
||||
|
||||
@@ -18,6 +18,18 @@
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use fastfield_codecs::FastFieldCodecReader;
|
||||
use fastfield_codecs::FastFieldCodec;
|
||||
use fastfield_codecs::dynamic::DynamicFastFieldReader;
|
||||
|
||||
use crate::directory::CompositeFile;
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::directory::WritePtr;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::Schema;
|
||||
|
||||
/// Wrapper for accessing a fastfield.
|
||||
///
|
||||
/// Holds the data and the codec to the read the data.
|
||||
@@ -78,40 +90,81 @@ impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
|
||||
}
|
||||
}
|
||||
|
||||
impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
|
||||
fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let path = Path::new("__dummy__");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory
|
||||
.open_write(path)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
{
|
||||
let fast_field_writer = fast_field_writers
|
||||
.get_field_writer_mut(field)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
for val in vals {
|
||||
fast_field_writer.add_val(val.to_u64());
|
||||
}
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
impl<Item: FastValue, Codec: FastFieldCodec> FastFieldReaderCodecWrapper<Item, Codec> {
|
||||
// /// Opens a fast field given a file.
|
||||
// pub fn open(file: FileSlice) -> crate::Result<Self> {
|
||||
// let mut bytes = file.read_bytes()?;
|
||||
// Self::open_from_bytes(bytes)
|
||||
// }
|
||||
|
||||
let file = directory.open_read(path).expect("Failed to open the file");
|
||||
let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file");
|
||||
let field_file = composite_file
|
||||
.open_read(field)
|
||||
.expect("File component not found");
|
||||
DynamicFastFieldReader::open(field_file).unwrap()
|
||||
/// Opens a fast field given the bytes.
|
||||
pub fn open_from_bytes(bytes: OwnedBytes) -> crate::Result<Self> {
|
||||
let reader = C::open_from_bytes(bytes)?;
|
||||
Ok(FastFieldReaderCodecWrapper {
|
||||
reader,
|
||||
_phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn get_u64(&self, doc: u64) -> Item {
|
||||
let data = self.reader.get_u64(doc);
|
||||
Item::from_u64(data)
|
||||
}
|
||||
|
||||
/// Internally `multivalued` also use SingleValue Fast fields.
|
||||
/// It works as follows... A first column contains the list of start index
|
||||
/// for each document, a second column contains the actual values.
|
||||
///
|
||||
/// The values associated to a given doc, are then
|
||||
/// `second_column[first_column.get(doc)..first_column.get(doc+1)]`.
|
||||
///
|
||||
/// Which means single value fast field reader can be indexed internally with
|
||||
/// something different from a `DocId`. For this use case, we want to use `u64`
|
||||
/// values.
|
||||
///
|
||||
/// See `get_range` for an actual documentation about this method.
|
||||
pub(crate) fn get_range_u64(&self, start: u64, output: &mut [Item]) {
|
||||
for (i, out) in output.iter_mut().enumerate() {
|
||||
*out = self.get_u64(start + (i as u64));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
|
||||
// fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
|
||||
// let mut schema_builder = Schema::builder();
|
||||
// let field = schema_builder.add_u64_field("field", FAST);
|
||||
// let schema = schema_builder.build();
|
||||
// let path = Path::new("__dummy__");
|
||||
// let directory: RamDirectory = RamDirectory::create();
|
||||
// {
|
||||
// let write: WritePtr = directory
|
||||
// .open_write(path)
|
||||
// .expect("With a RamDirectory, this should never fail.");
|
||||
// let mut serializer = CompositeFastFieldSerializer::from_write(write)
|
||||
// .expect("With a RamDirectory, this should never fail.");
|
||||
// let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
// {
|
||||
// let fast_field_writer = fast_field_writers
|
||||
// .get_field_writer_mut(field)
|
||||
// .expect("With a RamDirectory, this should never fail.");
|
||||
// for val in vals {
|
||||
// fast_field_writer.add_val(val.to_u64());
|
||||
// }
|
||||
// }
|
||||
// fast_field_writers
|
||||
// .serialize(&mut serializer, &HashMap::new(), None)
|
||||
// .unwrap();
|
||||
// serializer.close().unwrap();
|
||||
// }
|
||||
|
||||
// let file = directory.open_read(path).expect("Failed to open the file");
|
||||
// let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file");
|
||||
// let field_file = composite_file
|
||||
// .open_read(field)
|
||||
// .expect("File component not found");
|
||||
// DynamicFastFieldReader::open(field_file).unwrap()
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user