mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
split open_from_bytes to own trait
This commit is contained in:
@@ -25,7 +25,10 @@ mod tests {
|
||||
fn value_iter() -> impl Iterator<Item = u64> {
|
||||
0..20_000
|
||||
}
|
||||
fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
fn bench_get<
|
||||
S: FastFieldCodecSerializer,
|
||||
R: FastFieldCodecDeserializer + FastFieldCodecReader,
|
||||
>(
|
||||
b: &mut Bencher,
|
||||
data: &[u64],
|
||||
) {
|
||||
|
||||
@@ -5,7 +5,8 @@ use ownedbytes::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::{
|
||||
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
|
||||
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
|
||||
FastFieldDataAccess,
|
||||
};
|
||||
|
||||
/// Depending on the field type, a different
|
||||
@@ -19,7 +20,7 @@ pub struct BitpackedReader {
|
||||
pub num_vals: u64,
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for BitpackedReader {
|
||||
impl FastFieldCodecDeserializer for BitpackedReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_offset = bytes.len() - 24;
|
||||
@@ -38,6 +39,8 @@ impl FastFieldCodecReader for BitpackedReader {
|
||||
num_vals,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl FastFieldCodecReader for BitpackedReader {
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
self.min_value_u64 + self.bit_unpacker.get(doc, &self.data)
|
||||
|
||||
@@ -19,7 +19,8 @@ use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::linear::{get_calculated_value, get_slope};
|
||||
use crate::{
|
||||
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
|
||||
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
|
||||
FastFieldDataAccess,
|
||||
};
|
||||
|
||||
const CHUNK_SIZE: u64 = 512;
|
||||
@@ -148,7 +149,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
|
||||
&interpolations[get_interpolation_position(doc)]
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for BlockwiseLinearReader {
|
||||
impl FastFieldCodecDeserializer for BlockwiseLinearReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
|
||||
@@ -157,7 +158,9 @@ impl FastFieldCodecReader for BlockwiseLinearReader {
|
||||
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
|
||||
Ok(BlockwiseLinearReader { data, footer })
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for BlockwiseLinearReader {
|
||||
#[inline]
|
||||
fn get_u64(&self, idx: u64) -> u64 {
|
||||
let interpolation = get_interpolation_function(idx, &self.footer.interpolations);
|
||||
|
||||
@@ -12,13 +12,22 @@ pub mod bitpacked;
|
||||
pub mod blockwise_linear;
|
||||
pub mod linear;
|
||||
|
||||
pub trait FastFieldCodecDeserializer: Sized {
|
||||
/// Reads the metadata and returns the CodecReader
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self>
|
||||
where
|
||||
Self: FastFieldCodecReader;
|
||||
}
|
||||
|
||||
pub trait FastFieldCodecReader: Sized {
|
||||
/// reads the metadata and returns the CodecReader
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self>;
|
||||
fn get_u64(&self, doc: u64) -> u64;
|
||||
fn min_value(&self) -> u64;
|
||||
fn max_value(&self) -> u64;
|
||||
fn num_vals(&self) -> u64;
|
||||
/// Returns a iterator over the data
|
||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = u64> + 'a> {
|
||||
Box::new((0..self.num_vals()).map(|idx| self.get_u64(idx)))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
|
||||
@@ -170,7 +179,10 @@ mod tests {
|
||||
use crate::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
|
||||
use crate::linear::{LinearReader, LinearSerializer};
|
||||
|
||||
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
pub fn create_and_validate<
|
||||
S: FastFieldCodecSerializer,
|
||||
R: FastFieldCodecDeserializer + FastFieldCodecReader,
|
||||
>(
|
||||
data: &[u64],
|
||||
name: &str,
|
||||
) -> (f32, f32) {
|
||||
@@ -230,7 +242,10 @@ mod tests {
|
||||
data_and_names
|
||||
}
|
||||
|
||||
fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
|
||||
fn test_codec<
|
||||
S: FastFieldCodecSerializer,
|
||||
R: FastFieldCodecReader + FastFieldCodecDeserializer,
|
||||
>() {
|
||||
let codec_name = format!("{:?}", S::CODEC_TYPE);
|
||||
for (data, dataset_name) in get_codec_test_data_sets() {
|
||||
let (estimate, actual) = crate::tests::create_and_validate::<S, R>(&data, dataset_name);
|
||||
|
||||
@@ -6,7 +6,8 @@ use ownedbytes::OwnedBytes;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
|
||||
use crate::{
|
||||
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
|
||||
FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType,
|
||||
FastFieldDataAccess,
|
||||
};
|
||||
|
||||
/// Depending on the field type, a different
|
||||
@@ -59,7 +60,7 @@ impl FixedSize for LinearFooter {
|
||||
const SIZE_IN_BYTES: usize = 56;
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for LinearReader {
|
||||
impl FastFieldCodecDeserializer for LinearReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
|
||||
@@ -75,6 +76,9 @@ impl FastFieldCodecReader for LinearReader {
|
||||
slope,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for LinearReader {
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::num::NonZeroU64;
|
||||
|
||||
use common::BinarySerializable;
|
||||
use fastdivide::DividerU64;
|
||||
use fastfield_codecs::FastFieldCodecReader;
|
||||
use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader};
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
pub const GCD_DEFAULT: u64 = 1;
|
||||
@@ -15,24 +15,30 @@ pub const GCD_DEFAULT: u64 = 1;
|
||||
pub struct GCDFastFieldCodec<CodecReader> {
|
||||
gcd: u64,
|
||||
min_value: u64,
|
||||
num_vals: u64,
|
||||
reader: CodecReader,
|
||||
}
|
||||
|
||||
impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec<C> {
|
||||
/// Opens a fast field given the bytes.
|
||||
impl<C: FastFieldCodecReader + FastFieldCodecDeserializer + Clone> FastFieldCodecDeserializer
|
||||
for GCDFastFieldCodec<C>
|
||||
{
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self> {
|
||||
let footer_offset = bytes.len() - 16;
|
||||
let footer_offset = bytes.len() - 24;
|
||||
let (body, mut footer) = bytes.split(footer_offset);
|
||||
let gcd = u64::deserialize(&mut footer)?;
|
||||
let min_value = u64::deserialize(&mut footer)?;
|
||||
let num_vals = u64::deserialize(&mut footer)?;
|
||||
let reader = C::open_from_bytes(body)?;
|
||||
Ok(GCDFastFieldCodec {
|
||||
gcd,
|
||||
min_value,
|
||||
num_vals,
|
||||
reader,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec<C> {
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
let mut data = self.reader.get_u64(doc);
|
||||
@@ -48,11 +54,20 @@ impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec
|
||||
fn max_value(&self) -> u64 {
|
||||
self.min_value + self.reader.max_value() * self.gcd
|
||||
}
|
||||
fn num_vals(&self) -> u64 {
|
||||
self.num_vals
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_gcd_header<W: Write>(field_write: &mut W, min_value: u64, gcd: u64) -> io::Result<()> {
|
||||
pub fn write_gcd_header<W: Write>(
|
||||
field_write: &mut W,
|
||||
min_value: u64,
|
||||
gcd: u64,
|
||||
num_vals: u64,
|
||||
) -> io::Result<()> {
|
||||
gcd.serialize(field_write)?;
|
||||
min_value.serialize(field_write)?;
|
||||
num_vals.serialize(field_write)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -326,7 +326,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 37);
|
||||
assert_eq!(file.len(), 45);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
||||
@@ -357,7 +357,7 @@ mod tests {
|
||||
serializer.close()?;
|
||||
}
|
||||
let file = directory.open_read(path)?;
|
||||
assert_eq!(file.len(), 62);
|
||||
assert_eq!(file.len(), 70);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
@@ -393,7 +393,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 35);
|
||||
assert_eq!(file.len(), 43);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
@@ -425,7 +425,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 80043);
|
||||
assert_eq!(file.len(), 80051);
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
@@ -896,7 +896,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 36);
|
||||
assert_eq!(file.len(), 44);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(field).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||
@@ -932,7 +932,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 48);
|
||||
assert_eq!(file.len(), 56);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(field).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||
@@ -966,7 +966,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 35);
|
||||
assert_eq!(file.len(), 43);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(field).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<bool>::open(file)?;
|
||||
|
||||
@@ -6,7 +6,7 @@ use common::BinarySerializable;
|
||||
use fastfield_codecs::bitpacked::BitpackedReader;
|
||||
use fastfield_codecs::blockwise_linear::BlockwiseLinearReader;
|
||||
use fastfield_codecs::linear::LinearReader;
|
||||
use fastfield_codecs::{FastFieldCodecReader, FastFieldCodecType};
|
||||
use fastfield_codecs::{FastFieldCodecDeserializer, FastFieldCodecReader, FastFieldCodecType};
|
||||
|
||||
use super::{FastValue, GCDFastFieldCodec};
|
||||
use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirectory, WritePtr};
|
||||
@@ -199,7 +199,9 @@ pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
|
||||
_phantom: PhantomData<Item>,
|
||||
}
|
||||
|
||||
impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item, C> {
|
||||
impl<Item: FastValue, C: FastFieldCodecReader + FastFieldCodecDeserializer>
|
||||
FastFieldReaderCodecWrapper<Item, C>
|
||||
{
|
||||
/// Opens a fast field given a file.
|
||||
pub fn open(file: FileSlice) -> crate::Result<Self> {
|
||||
let mut bytes = file.read_bytes()?;
|
||||
@@ -249,8 +251,8 @@ impl<Item: FastValue, C: FastFieldCodecReader> FastFieldReaderCodecWrapper<Item,
|
||||
}
|
||||
}
|
||||
|
||||
impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
|
||||
for FastFieldReaderCodecWrapper<Item, C>
|
||||
impl<Item: FastValue, C: FastFieldCodecReader + FastFieldCodecDeserializer + Clone>
|
||||
FastFieldReader<Item> for FastFieldReaderCodecWrapper<Item, C>
|
||||
{
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
|
||||
@@ -189,7 +189,7 @@ impl CompositeFastFieldSerializer {
|
||||
field_write,
|
||||
fastfield_accessor,
|
||||
)?;
|
||||
write_gcd_header(field_write, base_value, gcd)?;
|
||||
write_gcd_header(field_write, base_value, gcd, num_vals)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user