mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 07:00:41 +00:00
Experimental refactor
This commit is contained in:
@@ -17,22 +17,7 @@ pub struct BitpackedFastFieldReader {
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for BitpackedFastFieldReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_offset = bytes.len() - 16;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let min_value = u64::deserialize(&mut footer)?;
|
||||
let amplitude = u64::deserialize(&mut footer)?;
|
||||
let max_value = min_value + amplitude;
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
let bit_unpacker = BitUnpacker::new(num_bits);
|
||||
Ok(BitpackedFastFieldReader {
|
||||
data,
|
||||
min_value_u64: min_value,
|
||||
max_value_u64: max_value,
|
||||
bit_unpacker,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
self.min_value_u64 + self.bit_unpacker.get(doc, &self.data)
|
||||
@@ -96,11 +81,30 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BitpackedFastFieldSerializer {}
|
||||
pub struct BitpackedFastFieldSerializer;
|
||||
|
||||
impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
|
||||
const NAME: &'static str = "Bitpacked";
|
||||
const ID: u8 = 1;
|
||||
|
||||
type Reader = BitpackedFastFieldReader;
|
||||
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
let footer_offset = bytes.len() - 16;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let min_value = u64::deserialize(&mut footer)?;
|
||||
let amplitude = u64::deserialize(&mut footer)?;
|
||||
let max_value = min_value + amplitude;
|
||||
let num_bits = compute_num_bits(amplitude);
|
||||
let bit_unpacker = BitUnpacker::new(num_bits);
|
||||
Ok(BitpackedFastFieldReader {
|
||||
data,
|
||||
min_value_u64: min_value,
|
||||
max_value_u64: max_value,
|
||||
bit_unpacker,
|
||||
})
|
||||
}
|
||||
|
||||
/// Serializes data with the BitpackedFastFieldSerializer.
|
||||
///
|
||||
/// The serializer in fact encode the values by bitpacking
|
||||
@@ -146,7 +150,7 @@ mod tests {
|
||||
use crate::tests::get_codec_test_data_sets;
|
||||
|
||||
fn create_and_validate(data: &[u64], name: &str) {
|
||||
crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
|
||||
crate::tests::create_and_validate::<BitpackedFastFieldSerializer>(
|
||||
data, name,
|
||||
);
|
||||
}
|
||||
|
||||
148
fastfield_codecs/src/dynamic.rs
Normal file
148
fastfield_codecs/src/dynamic.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
// Copyright (C) 2022 Quickwit, Inc.
|
||||
//
|
||||
// Quickwit is offered under the AGPL v3.0 and as commercial software.
|
||||
// For commercial licensing, contact us at hello@quickwit.io.
|
||||
//
|
||||
// AGPL:
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::FastFieldCodecSerializer;
|
||||
use crate::bitpacked::BitpackedFastFieldSerializer;
|
||||
use crate::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use crate::FastFieldCodecReader;
|
||||
use crate::gcd::GCDFastFieldCodecSerializer;
|
||||
use crate::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
|
||||
struct DynamicFastFieldSerializer;
|
||||
|
||||
impl FastFieldCodecSerializer for DynamicFastFieldSerializer {
|
||||
const NAME: &'static str = "dynamic";
|
||||
|
||||
type Reader = DynamicFastFieldReader;
|
||||
|
||||
fn is_applicable(fastfield_accessor: &impl crate::FastFieldDataAccess, stats: crate::FastFieldStats) -> bool {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn estimate(fastfield_accessor: &impl crate::FastFieldDataAccess, stats: crate::FastFieldStats) -> f32 {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn serialize(
|
||||
write: &mut impl io::Write,
|
||||
fastfield_accessor: &dyn crate::FastFieldDataAccess,
|
||||
stats: crate::FastFieldStats,
|
||||
data_iter: impl Iterator<Item = u64>,
|
||||
data_iter1: impl Iterator<Item = u64>,
|
||||
) -> io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn open_from_bytes(mut bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
let codec_code = bytes.read_u8();
|
||||
let codec_type = CodecType::from_code(codec_code).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("Unknown codec code `{codec_code}`"),
|
||||
)
|
||||
})?;
|
||||
let fast_field_reader: Arc<dyn FastFieldCodecReader> = match codec_type {
|
||||
CodecType::Bitpacked => Arc::new(BitpackedFastFieldSerializer::open_from_bytes(bytes)?),
|
||||
CodecType::LinearInterpol => {
|
||||
Arc::new(LinearInterpolFastFieldSerializer::open_from_bytes(bytes)?)
|
||||
}
|
||||
CodecType::MultiLinearInterpol => {
|
||||
Arc::new(MultiLinearInterpolFastFieldSerializer::open_from_bytes(bytes)?)
|
||||
}
|
||||
CodecType::Gcd => {
|
||||
let inner_codec_id = bytes.read_u8();
|
||||
let inner_codec_type = CodecType::from_code(inner_codec_id).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("Unknown codec code `{codec_code}`"),
|
||||
)
|
||||
})?;
|
||||
match inner_codec_type {
|
||||
CodecType::Bitpacked => {
|
||||
Arc::new(GCDFastFieldCodecSerializer::<BitpackedFastFieldSerializer>::open_from_bytes(bytes)?)
|
||||
}
|
||||
CodecType::LinearInterpol => {
|
||||
Arc::new(GCDFastFieldCodecSerializer::<LinearInterpolFastFieldSerializer>::open_from_bytes(bytes)?)
|
||||
}
|
||||
CodecType::MultiLinearInterpol => {
|
||||
Arc::new(GCDFastFieldCodecSerializer::<MultiLinearInterpolFastFieldSerializer>::open_from_bytes(bytes)?)
|
||||
}
|
||||
CodecType::Gcd => {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"A GCD codec may not wrap another GCD codec.",
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(DynamicFastFieldReader(fast_field_reader))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone)]
|
||||
/// DynamicFastFieldReader wraps different readers to access
|
||||
/// the various encoded fastfield data
|
||||
pub struct DynamicFastFieldReader(Arc<dyn FastFieldCodecReader>);
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum CodecType {
|
||||
Bitpacked = 0,
|
||||
LinearInterpol = 1,
|
||||
MultiLinearInterpol = 2,
|
||||
Gcd = 3,
|
||||
}
|
||||
|
||||
impl CodecType {
|
||||
pub fn from_code(code: u8) -> Option<Self> {
|
||||
match code {
|
||||
0 => Some(CodecType::Bitpacked),
|
||||
1 => Some(CodecType::LinearInterpol),
|
||||
2 => Some(CodecType::MultiLinearInterpol),
|
||||
3 => Some(CodecType::Gcd),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_code(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for DynamicFastFieldReader {
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
self.0.get_u64(doc)
|
||||
}
|
||||
|
||||
fn min_value(&self) -> u64 {
|
||||
self.0.min_value()
|
||||
}
|
||||
|
||||
fn max_value(&self) -> u64 {
|
||||
self.0.max_value()
|
||||
}
|
||||
}
|
||||
@@ -1,44 +1,71 @@
|
||||
use std::io::{self, Write};
|
||||
use std::{io::{self, Write}, marker::PhantomData, num::NonZeroU64};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use fastdivide::DividerU64;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
use crate::FastFieldCodecReader;
|
||||
|
||||
pub const GCD_DEFAULT: u64 = 1;
|
||||
pub const GCD_CODEC_ID: u8 = 4;
|
||||
use crate::{FastFieldCodecReader, FastFieldCodecSerializer};
|
||||
|
||||
/// Wrapper for accessing a fastfield.
|
||||
///
|
||||
/// Holds the data and the codec to the read the data.
|
||||
#[derive(Clone)]
|
||||
pub struct GCDFastFieldCodec<CodecReader> {
|
||||
pub struct GCDFastFieldCodecReader<CodecReader> {
|
||||
gcd: u64,
|
||||
min_value: u64,
|
||||
reader: CodecReader,
|
||||
}
|
||||
impl<C: FastFieldCodecReader + Clone> FastFieldCodecReader for GCDFastFieldCodec<C> {
|
||||
/// Opens a fast field given the bytes.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self> {
|
||||
|
||||
pub struct GCDFastFieldCodecSerializer<WrappedCodecSerializer: FastFieldCodecSerializer> {
|
||||
_wrapped_type: PhantomData<WrappedCodecSerializer>,
|
||||
}
|
||||
|
||||
impl<WrappedCodecSerializer: FastFieldCodecSerializer> GCDFastFieldCodecSerializer<WrappedCodecSerializer> {}
|
||||
|
||||
impl<WrappedCodecSerializer: FastFieldCodecSerializer> FastFieldCodecSerializer for GCDFastFieldCodecSerializer<WrappedCodecSerializer> {
|
||||
// TODO Fixme. We could like the underlying codec name as well.
|
||||
const NAME: &'static str = "GCD";
|
||||
|
||||
type Reader = GCDFastFieldCodecReader<WrappedCodecSerializer::Reader>;
|
||||
|
||||
fn is_applicable(fastfield_accessor: &impl crate::FastFieldDataAccess, stats: crate::FastFieldStats) -> bool {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn estimate(fastfield_accessor: &impl crate::FastFieldDataAccess, stats: crate::FastFieldStats) -> f32 {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn serialize(
|
||||
write: &mut impl Write,
|
||||
fastfield_accessor: &dyn crate::FastFieldDataAccess,
|
||||
stats: crate::FastFieldStats,
|
||||
data_iter: impl Iterator<Item = u64>,
|
||||
data_iter1: impl Iterator<Item = u64>,
|
||||
) -> io::Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
let footer_offset = bytes.len() - 16;
|
||||
let (body, mut footer) = bytes.split(footer_offset);
|
||||
let gcd = u64::deserialize(&mut footer)?;
|
||||
let min_value = u64::deserialize(&mut footer)?;
|
||||
let reader = C::open_from_bytes(body)?;
|
||||
Ok(GCDFastFieldCodec {
|
||||
let reader = WrappedCodecSerializer::open_from_bytes(body)?;
|
||||
Ok(GCDFastFieldCodecReader {
|
||||
gcd,
|
||||
min_value,
|
||||
reader,
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
impl<C: FastFieldCodecReader> FastFieldCodecReader for GCDFastFieldCodecReader<C> {
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
let mut data = self.reader.get_u64(doc);
|
||||
data *= self.gcd;
|
||||
data += self.min_value;
|
||||
data
|
||||
self.min_value + self.gcd * self.reader.get_u64(doc)
|
||||
}
|
||||
|
||||
fn min_value(&self) -> u64 {
|
||||
@@ -64,11 +91,13 @@ fn compute_gcd(mut left: u64, mut right: u64) -> u64 {
|
||||
}
|
||||
|
||||
// Find GCD for iterator of numbers
|
||||
pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
|
||||
//
|
||||
// If all numbers are '0' (or if there are not numbers, return None).
|
||||
pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<NonZeroU64> {
|
||||
let mut numbers = numbers.filter(|n| *n != 0);
|
||||
let mut gcd = numbers.next()?;
|
||||
if gcd == 1 {
|
||||
return Some(1);
|
||||
return NonZeroU64::new(gcd);
|
||||
}
|
||||
|
||||
let mut gcd_divider = DividerU64::divide_by(gcd);
|
||||
@@ -79,151 +108,150 @@ pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<u64> {
|
||||
}
|
||||
gcd = compute_gcd(gcd, val);
|
||||
if gcd == 1 {
|
||||
return Some(1);
|
||||
return NonZeroU64::new(1);
|
||||
}
|
||||
|
||||
gcd_divider = DividerU64::divide_by(gcd);
|
||||
}
|
||||
Some(gcd)
|
||||
NonZeroU64::new(gcd)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
/*
|
||||
TODO Move test
|
||||
// TODO Move test
|
||||
//
|
||||
// use std::collections::HashMap;
|
||||
// use std::path::Path;
|
||||
//
|
||||
// use crate::directory::{CompositeFile, RamDirectory, WritePtr};
|
||||
// use crate::fastfield::serializer::FastFieldCodecEnableCheck;
|
||||
// use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
|
||||
// use super::{
|
||||
// find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecName,
|
||||
// FastFieldReader, FastFieldsWriter, ALL_CODECS,
|
||||
// };
|
||||
// use crate::schema::Schema;
|
||||
// use crate::Directory;
|
||||
//
|
||||
// fn get_index(
|
||||
// docs: &[crate::Document],
|
||||
// schema: &Schema,
|
||||
// codec_enable_checker: FastFieldCodecEnableCheck,
|
||||
// ) -> crate::Result<RamDirectory> {
|
||||
// let directory: RamDirectory = RamDirectory::create();
|
||||
// {
|
||||
// let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
// let mut serializer =
|
||||
// CompositeFastFieldSerializer::from_write_with_codec(write, codec_enable_checker)
|
||||
// .unwrap();
|
||||
// let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
|
||||
// for doc in docs {
|
||||
// fast_field_writers.add_document(doc);
|
||||
// }
|
||||
// fast_field_writers
|
||||
// .serialize(&mut serializer, &HashMap::new(), None)
|
||||
// .unwrap();
|
||||
// serializer.close().unwrap();
|
||||
// }
|
||||
// Ok(directory)
|
||||
// }
|
||||
//
|
||||
// fn test_fastfield_gcd_i64_with_codec(
|
||||
// codec_name: FastFieldCodecName,
|
||||
// num_vals: usize,
|
||||
// ) -> crate::Result<()> {
|
||||
// let path = Path::new("test");
|
||||
// let mut docs = vec![];
|
||||
// for i in 1..=num_vals {
|
||||
// let val = i as i64 * 1000i64;
|
||||
// docs.push(doc!(*FIELDI64=>val));
|
||||
// }
|
||||
// let directory = get_index(&docs, &SCHEMAI64, codec_name.clone().into())?;
|
||||
// let file = directory.open_read(path).unwrap();
|
||||
// assert_eq!(file.len(), 118);
|
||||
// let composite_file = CompositeFile::open(&file)?;
|
||||
// let file = composite_file.open_read(*FIELD).unwrap();
|
||||
// let fast_field_reader = DynamicFastFieldReader::<i64>::open(file)?;
|
||||
// assert_eq!(fast_field_reader.get(0), 1000i64);
|
||||
// assert_eq!(fast_field_reader.get(1), 2000i64);
|
||||
// assert_eq!(fast_field_reader.get(2), 3000i64);
|
||||
// assert_eq!(fast_field_reader.max_value(), num_vals as i64 * 1000);
|
||||
// assert_eq!(fast_field_reader.min_value(), 1000i64);
|
||||
// let file = directory.open_read(path).unwrap();
|
||||
//
|
||||
// Can't apply gcd
|
||||
// let path = Path::new("test");
|
||||
// docs.pop();
|
||||
// docs.push(doc!(*FIELDI64=>2001i64));
|
||||
// let directory = get_index(&docs, &SCHEMAI64, codec_name.into())?;
|
||||
// let file2 = directory.open_read(path).unwrap();
|
||||
// assert!(file2.len() > file.len());
|
||||
//
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// #[test]
|
||||
// fn test_fastfield_gcd_i64() -> crate::Result<()> {
|
||||
// for codec_name in ALL_CODECS {
|
||||
// test_fastfield_gcd_i64_with_codec(codec_name.clone(), 5005)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// fn test_fastfield_gcd_u64_with_codec(
|
||||
// codec_name: FastFieldCodecName,
|
||||
// num_vals: usize,
|
||||
// ) -> crate::Result<()> {
|
||||
// let path = Path::new("test");
|
||||
// let mut docs = vec![];
|
||||
// for i in 1..=num_vals {
|
||||
// let val = i as u64 * 1000u64;
|
||||
// docs.push(doc!(*FIELD=>val));
|
||||
// }
|
||||
// let directory = get_index(&docs, &SCHEMA, codec_name.clone().into())?;
|
||||
// let file = directory.open_read(path).unwrap();
|
||||
// assert_eq!(file.len(), 118);
|
||||
// let composite_file = CompositeFile::open(&file)?;
|
||||
// let file = composite_file.open_read(*FIELD).unwrap();
|
||||
// let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
||||
// assert_eq!(fast_field_reader.get(0), 1000u64);
|
||||
// assert_eq!(fast_field_reader.get(1), 2000u64);
|
||||
// assert_eq!(fast_field_reader.get(2), 3000u64);
|
||||
// assert_eq!(fast_field_reader.max_value(), num_vals as u64 * 1000);
|
||||
// assert_eq!(fast_field_reader.min_value(), 1000u64);
|
||||
// let file = directory.open_read(path).unwrap();
|
||||
//
|
||||
// Can't apply gcd
|
||||
// let path = Path::new("test");
|
||||
// docs.pop();
|
||||
// docs.push(doc!(*FIELDI64=>2001u64));
|
||||
// let directory = get_index(&docs, &SCHEMA, codec_name.into())?;
|
||||
// let file2 = directory.open_read(path).unwrap();
|
||||
// assert!(file2.len() > file.len());
|
||||
//
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// #[test]
|
||||
// fn test_fastfield_gcd_u64() -> crate::Result<()> {
|
||||
// for codec_name in ALL_CODECS {
|
||||
// test_fastfield_gcd_u64_with_codec(codec_name.clone(), 5005)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
//
|
||||
// #[test]
|
||||
// pub fn test_fastfield2() {
|
||||
// let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||
// assert_eq!(test_fastfield.get(0), 100);
|
||||
// assert_eq!(test_fastfield.get(1), 200);
|
||||
// assert_eq!(test_fastfield.get(2), 300);
|
||||
// }
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use crate::directory::{CompositeFile, RamDirectory, WritePtr};
|
||||
use crate::fastfield::serializer::FastFieldCodecEnableCheck;
|
||||
use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64};
|
||||
use super::{
|
||||
find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecName,
|
||||
FastFieldReader, FastFieldsWriter, ALL_CODECS,
|
||||
};
|
||||
use crate::schema::Schema;
|
||||
use crate::Directory;
|
||||
|
||||
fn get_index(
|
||||
docs: &[crate::Document],
|
||||
schema: &Schema,
|
||||
codec_enable_checker: FastFieldCodecEnableCheck,
|
||||
) -> crate::Result<RamDirectory> {
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer =
|
||||
CompositeFastFieldSerializer::from_write_with_codec(write, codec_enable_checker)
|
||||
.unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
|
||||
for doc in docs {
|
||||
fast_field_writers.add_document(doc);
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
Ok(directory)
|
||||
}
|
||||
|
||||
fn test_fastfield_gcd_i64_with_codec(
|
||||
codec_name: FastFieldCodecName,
|
||||
num_vals: usize,
|
||||
) -> crate::Result<()> {
|
||||
let path = Path::new("test");
|
||||
let mut docs = vec![];
|
||||
for i in 1..=num_vals {
|
||||
let val = i as i64 * 1000i64;
|
||||
docs.push(doc!(*FIELDI64=>val));
|
||||
}
|
||||
let directory = get_index(&docs, &SCHEMAI64, codec_name.clone().into())?;
|
||||
let file = directory.open_read(path).unwrap();
|
||||
// assert_eq!(file.len(), 118);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<i64>::open(file)?;
|
||||
assert_eq!(fast_field_reader.get(0), 1000i64);
|
||||
assert_eq!(fast_field_reader.get(1), 2000i64);
|
||||
assert_eq!(fast_field_reader.get(2), 3000i64);
|
||||
assert_eq!(fast_field_reader.max_value(), num_vals as i64 * 1000);
|
||||
assert_eq!(fast_field_reader.min_value(), 1000i64);
|
||||
let file = directory.open_read(path).unwrap();
|
||||
|
||||
// Can't apply gcd
|
||||
let path = Path::new("test");
|
||||
docs.pop();
|
||||
docs.push(doc!(*FIELDI64=>2001i64));
|
||||
let directory = get_index(&docs, &SCHEMAI64, codec_name.into())?;
|
||||
let file2 = directory.open_read(path).unwrap();
|
||||
assert!(file2.len() > file.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fastfield_gcd_i64() -> crate::Result<()> {
|
||||
for codec_name in ALL_CODECS {
|
||||
test_fastfield_gcd_i64_with_codec(codec_name.clone(), 5005)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn test_fastfield_gcd_u64_with_codec(
|
||||
codec_name: FastFieldCodecName,
|
||||
num_vals: usize,
|
||||
) -> crate::Result<()> {
|
||||
let path = Path::new("test");
|
||||
let mut docs = vec![];
|
||||
for i in 1..=num_vals {
|
||||
let val = i as u64 * 1000u64;
|
||||
docs.push(doc!(*FIELD=>val));
|
||||
}
|
||||
let directory = get_index(&docs, &SCHEMA, codec_name.clone().into())?;
|
||||
let file = directory.open_read(path).unwrap();
|
||||
// assert_eq!(file.len(), 118);
|
||||
let composite_file = CompositeFile::open(&file)?;
|
||||
let file = composite_file.open_read(*FIELD).unwrap();
|
||||
let fast_field_reader = DynamicFastFieldReader::<u64>::open(file)?;
|
||||
assert_eq!(fast_field_reader.get(0), 1000u64);
|
||||
assert_eq!(fast_field_reader.get(1), 2000u64);
|
||||
assert_eq!(fast_field_reader.get(2), 3000u64);
|
||||
assert_eq!(fast_field_reader.max_value(), num_vals as u64 * 1000);
|
||||
assert_eq!(fast_field_reader.min_value(), 1000u64);
|
||||
let file = directory.open_read(path).unwrap();
|
||||
|
||||
// Can't apply gcd
|
||||
let path = Path::new("test");
|
||||
docs.pop();
|
||||
docs.push(doc!(*FIELDI64=>2001u64));
|
||||
let directory = get_index(&docs, &SCHEMA, codec_name.into())?;
|
||||
let file2 = directory.open_read(path).unwrap();
|
||||
assert!(file2.len() > file.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fastfield_gcd_u64() -> crate::Result<()> {
|
||||
for codec_name in ALL_CODECS {
|
||||
test_fastfield_gcd_u64_with_codec(codec_name.clone(), 5005)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_fastfield2() {
|
||||
let test_fastfield = DynamicFastFieldReader::<u64>::from(vec![100, 200, 300]);
|
||||
assert_eq!(test_fastfield.get(0), 100);
|
||||
assert_eq!(test_fastfield.get(1), 200);
|
||||
assert_eq!(test_fastfield.get(2), 300);
|
||||
}
|
||||
*/
|
||||
|
||||
use crate::gcd::compute_gcd;
|
||||
use crate::gcd::find_gcd;
|
||||
use crate::gcd::{compute_gcd, find_gcd};
|
||||
|
||||
#[test]
|
||||
fn test_compute_gcd() {
|
||||
@@ -238,16 +266,15 @@ mod tests {
|
||||
assert_eq!(compute_gcd(25, 25), 25);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn find_gcd_test() {
|
||||
assert_eq!(find_gcd([0].into_iter()), None);
|
||||
assert_eq!(find_gcd([0, 10].into_iter()), Some(10));
|
||||
assert_eq!(find_gcd([10, 0].into_iter()), Some(10));
|
||||
assert_eq!(find_gcd([0, 10].into_iter()), NonZeroU64::new(10));
|
||||
assert_eq!(find_gcd([10, 0].into_iter()), NonZeroU64::new(10));
|
||||
assert_eq!(find_gcd([].into_iter()), None);
|
||||
assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), Some(5));
|
||||
assert_eq!(find_gcd([15, 16, 10].into_iter()), Some(1));
|
||||
assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), Some(5));
|
||||
assert_eq!(find_gcd([0, 0].into_iter()), Some(0));
|
||||
assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), NonZeroU64::new(5));
|
||||
assert_eq!(find_gcd([15, 16, 10].into_iter()), NonZeroU64::new(1));
|
||||
assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), NonZeroU64::new(5));
|
||||
assert_eq!(find_gcd([0, 0].into_iter()), None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ use std::io::Write;
|
||||
use ownedbytes::OwnedBytes;
|
||||
|
||||
pub mod bitpacked;
|
||||
pub mod dynamic;
|
||||
pub mod gcd;
|
||||
pub mod linearinterpol;
|
||||
pub mod multilinearinterpol;
|
||||
|
||||
pub trait FastFieldCodecReader: Sized {
|
||||
pub trait FastFieldCodecReader{
|
||||
/// reads the metadata and returns the CodecReader
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result<Self>;
|
||||
fn get_u64(&self, doc: u64) -> u64;
|
||||
fn min_value(&self) -> u64;
|
||||
fn max_value(&self) -> u64;
|
||||
@@ -23,10 +23,10 @@ pub trait FastFieldCodecReader: Sized {
|
||||
/// The FastFieldSerializerEstimate trait is required on all variants
|
||||
/// of fast field compressions, to decide which one to choose.
|
||||
pub trait FastFieldCodecSerializer {
|
||||
/// A codex needs to provide a unique name and id, which is
|
||||
/// used for debugging and de/serialization.
|
||||
/// A codex needs to provide a unique name used for debugging and de/serialization.
|
||||
const NAME: &'static str;
|
||||
const ID: u8;
|
||||
|
||||
type Reader: FastFieldCodecReader;
|
||||
|
||||
/// Check if the Codec is able to compress the data
|
||||
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> bool;
|
||||
@@ -48,6 +48,8 @@ pub trait FastFieldCodecSerializer {
|
||||
data_iter: impl Iterator<Item = u64>,
|
||||
data_iter1: impl Iterator<Item = u64>,
|
||||
) -> io::Result<()>;
|
||||
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader>;
|
||||
}
|
||||
|
||||
/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
|
||||
@@ -91,7 +93,7 @@ mod tests {
|
||||
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
|
||||
};
|
||||
|
||||
pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
|
||||
pub fn create_and_validate<S: FastFieldCodecSerializer>(
|
||||
data: &[u64],
|
||||
name: &str,
|
||||
) -> (f32, f32) {
|
||||
@@ -111,7 +113,7 @@ mod tests {
|
||||
|
||||
let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
|
||||
|
||||
let reader = R::open_from_bytes(OwnedBytes::new(out)).unwrap();
|
||||
let reader = S::open_from_bytes(OwnedBytes::new(out)).unwrap();
|
||||
for (doc, orig_val) in data.iter().enumerate() {
|
||||
let val = reader.get_u64(doc as u64);
|
||||
if val != *orig_val {
|
||||
@@ -143,7 +145,7 @@ mod tests {
|
||||
let codec_name = S::NAME;
|
||||
for (data, data_set_name) in get_codec_test_data_sets() {
|
||||
let (estimate, actual) =
|
||||
crate::tests::create_and_validate::<S, R>(&data, data_set_name);
|
||||
crate::tests::create_and_validate::<S>(&data, data_set_name);
|
||||
let result = if estimate == f32::MAX {
|
||||
"Disabled".to_string()
|
||||
} else {
|
||||
|
||||
@@ -58,21 +58,7 @@ impl FixedSize for LinearInterpolFooter {
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for LinearInterpolFastFieldReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_offset = bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let footer = LinearInterpolFooter::deserialize(&mut footer)?;
|
||||
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
|
||||
let num_bits = compute_num_bits(footer.relative_max_value);
|
||||
let bit_unpacker = BitUnpacker::new(num_bits);
|
||||
Ok(LinearInterpolFastFieldReader {
|
||||
data,
|
||||
bit_unpacker,
|
||||
footer,
|
||||
slope,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
|
||||
@@ -110,7 +96,25 @@ fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
|
||||
|
||||
impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "LinearInterpol";
|
||||
const ID: u8 = 2;
|
||||
|
||||
type Reader = LinearInterpolFastFieldReader;
|
||||
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
let footer_offset = bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let footer = LinearInterpolFooter::deserialize(&mut footer)?;
|
||||
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
|
||||
let num_bits = compute_num_bits(footer.relative_max_value);
|
||||
let bit_unpacker = BitUnpacker::new(num_bits);
|
||||
Ok(LinearInterpolFastFieldReader {
|
||||
data,
|
||||
bit_unpacker,
|
||||
footer,
|
||||
slope,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new fast field serializer.
|
||||
fn serialize(
|
||||
write: &mut impl Write,
|
||||
@@ -240,7 +244,6 @@ mod tests {
|
||||
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
|
||||
crate::tests::create_and_validate::<
|
||||
LinearInterpolFastFieldSerializer,
|
||||
LinearInterpolFastFieldReader,
|
||||
>(data, name)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#[macro_use]
|
||||
extern crate prettytable;
|
||||
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
// use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
|
||||
// use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
|
||||
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
|
||||
use prettytable::{Cell, Row, Table};
|
||||
|
||||
@@ -12,11 +12,11 @@ fn main() {
|
||||
table.add_row(row!["", "Compression Ratio", "Compression Estimation"]);
|
||||
|
||||
for (data, data_set_name) in get_codec_test_data_sets() {
|
||||
let mut results = vec![];
|
||||
let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
|
||||
results.push(res);
|
||||
let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
|
||||
results.push(res);
|
||||
let mut results = Vec::new();
|
||||
// let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
|
||||
// results.push(res);
|
||||
// let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
|
||||
// results.push(res);
|
||||
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
|
||||
&data,
|
||||
);
|
||||
|
||||
@@ -146,15 +146,6 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
|
||||
}
|
||||
|
||||
impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
|
||||
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
|
||||
let footer_offset = bytes.len() - 4 - footer_len as usize;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
|
||||
Ok(MultiLinearInterpolFastFieldReader { data, footer })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_u64(&self, doc: u64) -> u64 {
|
||||
let interpolation = get_interpolation_function(doc, &self.footer.interpolations);
|
||||
@@ -192,7 +183,18 @@ pub struct MultiLinearInterpolFastFieldSerializer {}
|
||||
|
||||
impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
|
||||
const NAME: &'static str = "MultiLinearInterpol";
|
||||
const ID: u8 = 3;
|
||||
|
||||
type Reader = MultiLinearInterpolFastFieldReader;
|
||||
|
||||
/// Opens a fast field given a file.
|
||||
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self::Reader> {
|
||||
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
|
||||
let footer_offset = bytes.len() - 4 - footer_len as usize;
|
||||
let (data, mut footer) = bytes.split(footer_offset);
|
||||
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
|
||||
Ok(MultiLinearInterpolFastFieldReader { data, footer })
|
||||
}
|
||||
|
||||
/// Creates a new fast field serializer.
|
||||
fn serialize(
|
||||
write: &mut impl Write,
|
||||
@@ -374,7 +376,6 @@ mod tests {
|
||||
fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
|
||||
crate::tests::create_and_validate::<
|
||||
MultiLinearInterpolFastFieldSerializer,
|
||||
MultiLinearInterpolFastFieldReader,
|
||||
>(data, name)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::path::Path;
|
||||
use fastfield_codecs::bitpacked::{
|
||||
BitpackedFastFieldReader as BitpackedReader, BitpackedFastFieldSerializer,
|
||||
};
|
||||
use fastfield_codecs::gcd::{GCDFastFieldCodec, GCD_CODEC_ID};
|
||||
use fastfield_codecs::gcd::{GCDFastFieldCodecReader, GCD_CODEC_ID};
|
||||
use fastfield_codecs::linearinterpol::{
|
||||
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
|
||||
};
|
||||
@@ -73,14 +73,14 @@ pub enum DynamicFastFieldReader<Item: FastValue> {
|
||||
MultiLinearInterpol(FastFieldReaderCodecWrapper<Item, MultiLinearInterpolFastFieldReader>),
|
||||
|
||||
/// GCD and Bitpacked compressed fastfield data.
|
||||
BitpackedGCD(FastFieldReaderCodecWrapper<Item, GCDFastFieldCodec<BitpackedReader>>),
|
||||
BitpackedGCD(FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<BitpackedReader>>),
|
||||
/// GCD and Linear interpolated values + bitpacked
|
||||
LinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodec<LinearInterpolFastFieldReader>>,
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<LinearInterpolFastFieldReader>>,
|
||||
),
|
||||
/// GCD and Blockwise linear interpolated values + bitpacked
|
||||
MultiLinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodec<MultiLinearInterpolFastFieldReader>>,
|
||||
FastFieldReaderCodecWrapper<Item, GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>>,
|
||||
),
|
||||
}
|
||||
|
||||
@@ -118,7 +118,7 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
||||
BitpackedFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::BitpackedGCD(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodec<BitpackedReader>,
|
||||
GCDFastFieldCodecReader<BitpackedReader>,
|
||||
>::open_from_bytes(
|
||||
bytes
|
||||
)?)
|
||||
@@ -126,7 +126,7 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
||||
LinearInterpolFastFieldSerializer::ID => {
|
||||
DynamicFastFieldReader::LinearInterpolGCD(FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodec<LinearInterpolFastFieldReader>,
|
||||
GCDFastFieldCodecReader<LinearInterpolFastFieldReader>,
|
||||
>::open_from_bytes(
|
||||
bytes
|
||||
)?)
|
||||
@@ -135,7 +135,7 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
||||
DynamicFastFieldReader::MultiLinearInterpolGCD(
|
||||
FastFieldReaderCodecWrapper::<
|
||||
Item,
|
||||
GCDFastFieldCodec<MultiLinearInterpolFastFieldReader>,
|
||||
GCDFastFieldCodecReader<MultiLinearInterpolFastFieldReader>,
|
||||
>::open_from_bytes(bytes)?,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::io::{self, Write};
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use common::{BinarySerializable, CountingWriter};
|
||||
pub use fastfield_codecs::bitpacked::{
|
||||
@@ -141,7 +142,8 @@ impl CompositeFastFieldSerializer {
|
||||
let field_write = self.composite_write.for_field_with_idx(field, idx);
|
||||
let gcd = find_gcd(iter_gen().map(|val| val - stats.min_value)).unwrap_or(GCD_DEFAULT);
|
||||
|
||||
if gcd <= 1 {
|
||||
if gcd == 1 {
|
||||
// No GCD opportunity here.
|
||||
return Self::create_auto_detect_u64_fast_field_with_idx_gcd(
|
||||
self.codec_enable_checker.clone(),
|
||||
field,
|
||||
@@ -157,7 +159,7 @@ impl CompositeFastFieldSerializer {
|
||||
struct GCDWrappedFFAccess<T: FastFieldDataAccess> {
|
||||
fastfield_accessor: T,
|
||||
min_value: u64,
|
||||
gcd: u64,
|
||||
gcd: NonZeroU64,
|
||||
}
|
||||
impl<T: FastFieldDataAccess> FastFieldDataAccess for GCDWrappedFFAccess<T> {
|
||||
fn get_val(&self, position: u64) -> u64 {
|
||||
|
||||
117
src/fastfield/wrapper.rs
Normal file
117
src/fastfield/wrapper.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2022 Quickwit, Inc.
|
||||
//
|
||||
// Quickwit is offered under the AGPL v3.0 and as commercial software.
|
||||
// For commercial licensing, contact us at hello@quickwit.io.
|
||||
//
|
||||
// AGPL:
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
|
||||
/// Wrapper for accessing a fastfield.
|
||||
///
|
||||
/// Holds the data and the codec to the read the data.
|
||||
#[derive(Clone)]
|
||||
pub struct FastFieldReaderCodecWrapper<Item: FastValue, CodecReader> {
|
||||
reader: CodecReader,
|
||||
_phantom: PhantomData<Item>,
|
||||
}
|
||||
|
||||
impl<Item: FastValue, C: FastFieldCodecReader + Clone> FastFieldReader<Item>
|
||||
for FastFieldReaderCodecWrapper<Item, C>
|
||||
{
|
||||
/// Return the value associated to the given document.
|
||||
///
|
||||
/// This accessor should return as fast as possible.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `doc` is greater than the segment
|
||||
// `maxdoc`.
|
||||
fn get(&self, doc: DocId) -> Item {
|
||||
self.get_u64(u64::from(doc))
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
///
|
||||
/// Regardless of the type of `Item`, this method works
|
||||
/// - transmuting the output array
|
||||
/// - extracting the `Item`s as if they were `u64`
|
||||
/// - possibly converting the `u64` value to the right type.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// May panic if `start + output.len()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
fn get_range(&self, start: u64, output: &mut [Item]) {
|
||||
self.get_range_u64(start, output);
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
fn min_value(&self) -> Item {
|
||||
Item::from_u64(self.reader.min_value())
|
||||
}
|
||||
|
||||
/// Returns the maximum value for this fast field.
|
||||
///
|
||||
/// The max value does not take in account of possible
|
||||
/// deleted document, and should be considered as an upper bound
|
||||
/// of the actual maximum value.
|
||||
fn max_value(&self) -> Item {
|
||||
Item::from_u64(self.reader.max_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Item: FastValue> From<Vec<Item>> for DynamicFastFieldReader<Item> {
|
||||
fn from(vals: Vec<Item>) -> DynamicFastFieldReader<Item> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let path = Path::new("__dummy__");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let write: WritePtr = directory
|
||||
.open_write(path)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
{
|
||||
let fast_field_writer = fast_field_writers
|
||||
.get_field_writer_mut(field)
|
||||
.expect("With a RamDirectory, this should never fail.");
|
||||
for val in vals {
|
||||
fast_field_writer.add_val(val.to_u64());
|
||||
}
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
|
||||
let file = directory.open_read(path).expect("Failed to open the file");
|
||||
let composite_file = CompositeFile::open(&file).expect("Failed to read the composite file");
|
||||
let field_file = composite_file
|
||||
.open_read(field)
|
||||
.expect("File component not found");
|
||||
DynamicFastFieldReader::open(field_file).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user