Merge pull request #1475 from quickwit-oss/extend_ff_access

move fastfield stats to trait
This commit is contained in:
PSeitz
2022-08-24 06:44:57 -07:00
committed by GitHub
9 changed files with 205 additions and 120 deletions

View File

@@ -45,7 +45,7 @@ mod tests {
fn bench_create<S: FastFieldCodecSerializer>(b: &mut Bencher, data: &[u64]) {
let mut bytes = vec![];
b.iter(|| {
S::serialize(&mut bytes, &data, stats_from_vec(data)).unwrap();
S::serialize(&mut bytes, &data).unwrap();
});
}

View File

@@ -4,7 +4,7 @@ use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
/// Depending on the field type, a different
/// fast field is required.
@@ -112,10 +112,12 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
fn serialize(
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
) -> io::Result<()> {
let mut serializer =
BitpackedFastFieldSerializerLegacy::open(write, stats.min_value, stats.max_value)?;
let mut serializer = BitpackedFastFieldSerializerLegacy::open(
write,
fastfield_accessor.min_value(),
fastfield_accessor.max_value(),
)?;
for val in fastfield_accessor.iter() {
serializer.add_val(val)?;
@@ -124,14 +126,11 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
Ok(())
}
fn is_applicable(
_fastfield_accessor: &impl FastFieldDataAccess,
_stats: FastFieldStats,
) -> bool {
fn is_applicable(_fastfield_accessor: &impl FastFieldDataAccess) -> bool {
true
}
fn estimate(_fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
let amplitude = stats.max_value - stats.min_value;
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> f32 {
let amplitude = fastfield_accessor.max_value() - fastfield_accessor.min_value();
let num_bits = compute_num_bits(amplitude);
let num_bits_uncompressed = 64;
num_bits as f32 / num_bits_uncompressed as f32

View File

@@ -28,14 +28,14 @@ pub trait FastFieldCodecSerializer {
const ID: u8;
/// Check if the Codec is able to compress the data
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> bool;
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess) -> bool;
/// Returns an estimate of the compression ratio.
/// The baseline is uncompressed 64bit data.
///
/// It could make sense to also return a value representing
/// computational complexity.
fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32;
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> f32;
/// Serializes the data using the serializer into write.
///
@@ -44,7 +44,6 @@ pub trait FastFieldCodecSerializer {
fn serialize(
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
) -> io::Result<()>;
}
@@ -62,6 +61,15 @@ pub trait FastFieldDataAccess {
/// Returns a iterator over the data
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = u64> + 'a>;
/// min value of the data
fn min_value(&self) -> u64;
/// max value of the data
fn max_value(&self) -> u64;
/// num vals
fn num_vals(&self) -> u64;
}
#[derive(Debug, Clone)]
@@ -80,6 +88,18 @@ impl<'a> FastFieldDataAccess for &'a [u64] {
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new((self as &[u64]).iter().cloned())
}
fn min_value(&self) -> u64 {
self.iter().min().unwrap_or(0)
}
fn max_value(&self) -> u64 {
self.iter().max().unwrap_or(0)
}
fn num_vals(&self) -> u64 {
self.len() as u64
}
}
impl FastFieldDataAccess for Vec<u64> {
@@ -89,6 +109,17 @@ impl FastFieldDataAccess for Vec<u64> {
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new((self as &[u64]).iter().cloned())
}
fn min_value(&self) -> u64 {
self.iter().min().unwrap_or(0)
}
fn max_value(&self) -> u64 {
self.iter().max().unwrap_or(0)
}
fn num_vals(&self) -> u64 {
self.len() as u64
}
}
#[cfg(test)]
@@ -103,12 +134,12 @@ mod tests {
data: &[u64],
name: &str,
) -> (f32, f32) {
if !S::is_applicable(&data, crate::tests::stats_from_vec(data)) {
if !S::is_applicable(&data) {
return (f32::MAX, 0.0);
}
let estimation = S::estimate(&data, crate::tests::stats_from_vec(data));
let estimation = S::estimate(&data);
let mut out: Vec<u8> = Vec::new();
S::serialize(&mut out, &data, crate::tests::stats_from_vec(data)).unwrap();
S::serialize(&mut out, &data).unwrap();
let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
@@ -184,29 +215,25 @@ mod tests {
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();
let linear_interpol_estimation =
LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.01);
let multi_linear_interpol_estimation =
MultiLinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
MultiLinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);
let bitpacked_estimation =
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, bitpacked_estimation);
}
#[test]
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];
let linear_interpol_estimation =
LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.32);
let bitpacked_estimation =
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
#[test]
@@ -216,12 +243,10 @@ mod tests {
// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior
let linear_interpol_estimation =
LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.35);
let bitpacked_estimation =
BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}

View File

@@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
/// Depending on the field type, a different
/// fast field is required.
@@ -139,13 +139,12 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
fn serialize(
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
) -> io::Result<()> {
assert!(stats.min_value <= stats.max_value);
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
let first_val = fastfield_accessor.get_val(0);
let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
let slope = get_slope(first_val, last_val, stats.num_vals);
let last_val = fastfield_accessor.get_val(fastfield_accessor.num_vals() as u64 - 1);
let slope = get_slope(first_val, last_val, fastfield_accessor.num_vals());
// calculate offset to ensure all values are positive
let mut offset = 0;
let mut rel_positive_max = 0;
@@ -179,27 +178,25 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
offset,
first_val,
last_val,
num_vals: stats.num_vals,
min_value: stats.min_value,
max_value: stats.max_value,
num_vals: fastfield_accessor.num_vals(),
min_value: fastfield_accessor.min_value(),
max_value: fastfield_accessor.max_value(),
};
footer.serialize(write)?;
Ok(())
}
fn is_applicable(
_fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
) -> bool {
if stats.num_vals < 3 {
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess) -> bool {
if fastfield_accessor.num_vals() < 3 {
return false; // disable compressor for this case
}
// On serialisation the offset is added to the actual value.
// We need to make sure this won't run into overflow calculation issues.
// For this we take the maximum theroretical offset and add this to the max value.
// If this doesn't overflow the algorithm should be fine
let theorethical_maximum_offset = stats.max_value - stats.min_value;
if stats
.max_value
let theorethical_maximum_offset =
fastfield_accessor.max_value() - fastfield_accessor.min_value();
if fastfield_accessor
.max_value()
.checked_add(theorethical_maximum_offset)
.is_none()
{
@@ -210,13 +207,13 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
/// estimation for linear interpolation is hard because, you don't know
/// where the local maxima for the deviation of the calculated value are and
/// the offset to shift all values to >=0 is also unknown.
fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> f32 {
let first_val = fastfield_accessor.get_val(0);
let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
let slope = get_slope(first_val, last_val, stats.num_vals);
let last_val = fastfield_accessor.get_val(fastfield_accessor.num_vals() as u64 - 1);
let slope = get_slope(first_val, last_val, fastfield_accessor.num_vals());
// let's sample at 0%, 5%, 10% .. 95%, 100%
let num_vals = stats.num_vals as f32 / 100.0;
let num_vals = fastfield_accessor.num_vals() as f32 / 100.0;
let sample_positions = (0..20)
.map(|pos| (num_vals * pos as f32 * 5.0) as usize)
.collect::<Vec<_>>();
@@ -238,9 +235,10 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
//
let relative_max_value = (max_distance as f32 * 1.5) * 2.0;
let num_bits = compute_num_bits(relative_max_value as u64) as u64 * stats.num_vals as u64
let num_bits = compute_num_bits(relative_max_value as u64) as u64
* fastfield_accessor.num_vals()
+ LinearInterpolFooter::SIZE_IN_BYTES as u64;
let num_bits_uncompressed = 64 * stats.num_vals;
let num_bits_uncompressed = 64 * fastfield_accessor.num_vals();
num_bits as f32 / num_bits_uncompressed as f32
}
}

View File

@@ -94,13 +94,13 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
data: &[u64],
) -> (bool, f32, f32, &'static str) {
let is_applicable = S::is_applicable(&data, stats_from_vec(data));
let is_applicable = S::is_applicable(&data);
if !is_applicable {
return (false, 0.0, 0.0, S::NAME);
}
let estimation = S::estimate(&data, stats_from_vec(data));
let estimation = S::estimate(&data);
let mut out = vec![];
S::serialize(&mut out, &data, stats_from_vec(data)).unwrap();
S::serialize(&mut out, &data).unwrap();
let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
(true, estimation, actual_compression, S::NAME)

View File

@@ -18,7 +18,7 @@ use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
use crate::linearinterpol::{get_calculated_value, get_slope};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
const CHUNK_SIZE: u64 = 512;
@@ -188,15 +188,14 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
fn serialize(
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
stats: FastFieldStats,
) -> io::Result<()> {
assert!(stats.min_value <= stats.max_value);
assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value());
let first_val = fastfield_accessor.get_val(0);
let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
let last_val = fastfield_accessor.get_val(fastfield_accessor.num_vals() as u64 - 1);
let mut first_function = Function {
end_pos: stats.num_vals,
end_pos: fastfield_accessor.num_vals(),
value_start_pos: first_val,
value_end_pos: last_val,
..Default::default()
@@ -271,29 +270,27 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
bit_packer.close(write)?;
let footer = MultiLinearInterpolFooter {
num_vals: stats.num_vals,
min_value: stats.min_value,
max_value: stats.max_value,
num_vals: fastfield_accessor.num_vals(),
min_value: fastfield_accessor.min_value(),
max_value: fastfield_accessor.max_value(),
interpolations,
};
footer.serialize(write)?;
Ok(())
}
fn is_applicable(
_fastfield_accessor: &impl FastFieldDataAccess,
stats: FastFieldStats,
) -> bool {
if stats.num_vals < 5_000 {
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess) -> bool {
if fastfield_accessor.num_vals() < 5_000 {
return false;
}
// On serialization the offset is added to the actual value.
// We need to make sure this won't run into overflow calculation issues.
// For this we take the maximum theroretical offset and add this to the max value.
// If this doesn't overflow the algorithm should be fine
let theorethical_maximum_offset = stats.max_value - stats.min_value;
if stats
.max_value
let theorethical_maximum_offset =
fastfield_accessor.max_value() - fastfield_accessor.min_value();
if fastfield_accessor
.max_value()
.checked_add(theorethical_maximum_offset)
.is_none()
{
@@ -304,15 +301,15 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
/// estimation for linear interpolation is hard because, you don't know
/// where the local maxima are for the deviation of the calculated value and
/// the offset is also unknown.
fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> f32 {
let first_val_in_first_block = fastfield_accessor.get_val(0);
let last_elem_in_first_chunk = CHUNK_SIZE.min(stats.num_vals);
let last_elem_in_first_chunk = CHUNK_SIZE.min(fastfield_accessor.num_vals());
let last_val_in_first_block =
fastfield_accessor.get_val(last_elem_in_first_chunk as u64 - 1);
let slope = get_slope(
first_val_in_first_block,
last_val_in_first_block,
stats.num_vals,
fastfield_accessor.num_vals(),
);
// let's sample at 0%, 5%, 10% .. 95%, 100%, but for the first block only
@@ -339,10 +336,10 @@ impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
//
let relative_max_value = (max_distance as f32 * 1.5) * 2.0;
let num_bits = compute_num_bits(relative_max_value as u64) as u64 * stats.num_vals as u64
let num_bits = compute_num_bits(relative_max_value as u64) as u64 * fastfield_accessor.num_vals() as u64
// function metadata per block
+ 29 * (stats.num_vals / CHUNK_SIZE);
let num_bits_uncompressed = 64 * stats.num_vals;
+ 29 * (fastfield_accessor.num_vals() / CHUNK_SIZE);
let num_bits_uncompressed = 64 * fastfield_accessor.num_vals();
num_bits as f32 / num_bits_uncompressed as f32
}
}

View File

@@ -66,14 +66,13 @@ impl From<FastFieldCodecName> for FastFieldCodecEnableCheck {
// use this, when this is merged and stabilized explicit_generic_args_with_impl_trait
// https://github.com/rust-lang/rust/pull/86176
fn codec_estimation<T: FastFieldCodecSerializer, A: FastFieldDataAccess>(
stats: FastFieldStats,
fastfield_accessor: &A,
estimations: &mut Vec<(f32, &str, u8)>,
) {
if !T::is_applicable(fastfield_accessor, stats.clone()) {
if !T::is_applicable(fastfield_accessor) {
return;
}
let (ratio, name, id) = (T::estimate(fastfield_accessor, stats), T::NAME, T::ID);
let (ratio, name, id) = (T::estimate(fastfield_accessor), T::NAME, T::ID);
estimations.push((ratio, name, id));
}
@@ -101,10 +100,9 @@ impl CompositeFastFieldSerializer {
pub fn create_auto_detect_u64_fast_field(
&mut self,
field: Field,
stats: FastFieldStats,
fastfield_accessor: impl FastFieldDataAccess,
) -> io::Result<()> {
self.create_auto_detect_u64_fast_field_with_idx(field, stats, fastfield_accessor, 0)
self.create_auto_detect_u64_fast_field_with_idx(field, fastfield_accessor, 0)
}
/// Serialize data into a new u64 fast field. The best compression codec will be chosen
@@ -120,12 +118,12 @@ impl CompositeFastFieldSerializer {
pub fn create_auto_detect_u64_fast_field_with_idx(
&mut self,
field: Field,
stats: FastFieldStats,
fastfield_accessor: impl FastFieldDataAccess,
idx: usize,
) -> io::Result<()> {
let min_value = fastfield_accessor.min_value();
let field_write = self.composite_write.for_field_with_idx(field, idx);
let gcd = find_gcd(fastfield_accessor.iter().map(|val| val - stats.min_value))
let gcd = find_gcd(fastfield_accessor.iter().map(|val| val - min_value))
.map(NonZeroU64::get)
.unwrap_or(GCD_DEFAULT);
@@ -134,7 +132,6 @@ impl CompositeFastFieldSerializer {
self.codec_enable_checker.clone(),
field,
field_write,
stats,
fastfield_accessor,
);
}
@@ -142,42 +139,54 @@ impl CompositeFastFieldSerializer {
Self::write_header(field_write, GCD_CODEC_ID)?;
struct GCDWrappedFFAccess<T: FastFieldDataAccess> {
fastfield_accessor: T,
min_value: u64,
base_value: u64,
max_value: u64,
num_vals: u64,
gcd: u64,
}
impl<T: FastFieldDataAccess> FastFieldDataAccess for GCDWrappedFFAccess<T> {
fn get_val(&self, position: u64) -> u64 {
(self.fastfield_accessor.get_val(position) - self.min_value) / self.gcd
(self.fastfield_accessor.get_val(position) - self.base_value) / self.gcd
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new(
self.fastfield_accessor
.iter()
.map(|val| (val - self.min_value) / self.gcd),
.map(|val| (val - self.base_value) / self.gcd),
)
}
fn min_value(&self) -> u64 {
0
}
fn max_value(&self) -> u64 {
self.max_value
}
fn num_vals(&self) -> u64 {
self.num_vals
}
}
let num_vals = fastfield_accessor.num_vals();
let base_value = fastfield_accessor.min_value();
let max_value = (fastfield_accessor.max_value() - fastfield_accessor.min_value()) / gcd;
let fastfield_accessor = GCDWrappedFFAccess {
fastfield_accessor,
min_value: stats.min_value,
base_value,
max_value,
num_vals,
gcd,
};
let min_value = stats.min_value;
let stats = FastFieldStats {
min_value: 0,
max_value: (stats.max_value - stats.min_value) / gcd,
num_vals: stats.num_vals,
};
Self::create_auto_detect_u64_fast_field_with_idx_gcd(
self.codec_enable_checker.clone(),
field,
field_write,
stats,
fastfield_accessor,
)?;
write_gcd_header(field_write, min_value, gcd)?;
write_gcd_header(field_write, base_value, gcd)?;
Ok(())
}
@@ -187,28 +196,24 @@ impl CompositeFastFieldSerializer {
codec_enable_checker: FastFieldCodecEnableCheck,
field: Field,
field_write: &mut CountingWriter<W>,
stats: FastFieldStats,
fastfield_accessor: impl FastFieldDataAccess,
) -> io::Result<()> {
let mut estimations = vec![];
if codec_enable_checker.is_enabled(FastFieldCodecName::Bitpacked) {
codec_estimation::<BitpackedFastFieldSerializer, _>(
stats.clone(),
&fastfield_accessor,
&mut estimations,
);
}
if codec_enable_checker.is_enabled(FastFieldCodecName::LinearInterpol) {
codec_estimation::<LinearInterpolFastFieldSerializer, _>(
stats.clone(),
&fastfield_accessor,
&mut estimations,
);
}
if codec_enable_checker.is_enabled(FastFieldCodecName::BlockwiseLinearInterpol) {
codec_estimation::<MultiLinearInterpolFastFieldSerializer, _>(
stats.clone(),
&fastfield_accessor,
&mut estimations,
);
@@ -233,20 +238,15 @@ impl CompositeFastFieldSerializer {
Self::write_header(field_write, id)?;
match name {
BitpackedFastFieldSerializer::NAME => {
BitpackedFastFieldSerializer::serialize(field_write, &fastfield_accessor, stats)?;
BitpackedFastFieldSerializer::serialize(field_write, &fastfield_accessor)?;
}
LinearInterpolFastFieldSerializer::NAME => {
LinearInterpolFastFieldSerializer::serialize(
field_write,
&fastfield_accessor,
stats,
)?;
LinearInterpolFastFieldSerializer::serialize(field_write, &fastfield_accessor)?;
}
MultiLinearInterpolFastFieldSerializer::NAME => {
MultiLinearInterpolFastFieldSerializer::serialize(
field_write,
&fastfield_accessor,
stats,
)?;
}
_ => {

View File

@@ -359,17 +359,19 @@ impl IntFastFieldWriter {
(self.val_min, self.val_max)
};
let fastfield_accessor = WriterFastFieldAccessProvider {
doc_id_map,
vals: &self.vals,
};
let stats = FastFieldStats {
min_value: min,
max_value: max,
num_vals: self.val_count as u64,
};
serializer.create_auto_detect_u64_fast_field(self.field, stats, fastfield_accessor)?;
let fastfield_accessor = WriterFastFieldAccessProvider {
doc_id_map,
vals: &self.vals,
stats,
};
serializer.create_auto_detect_u64_fast_field(self.field, fastfield_accessor)?;
Ok(())
}
@@ -379,6 +381,7 @@ impl IntFastFieldWriter {
struct WriterFastFieldAccessProvider<'map, 'bitp> {
doc_id_map: Option<&'map DocIdMapping>,
vals: &'bitp BlockedBitpacker,
stats: FastFieldStats,
}
impl<'map, 'bitp> FastFieldDataAccess for WriterFastFieldAccessProvider<'map, 'bitp> {
/// Return the value associated to the given doc.
@@ -411,4 +414,16 @@ impl<'map, 'bitp> FastFieldDataAccess for WriterFastFieldAccessProvider<'map, 'b
Box::new(self.vals.iter())
}
}
fn min_value(&self) -> u64 {
self.stats.min_value
}
fn max_value(&self) -> u64 {
self.stats.max_value
}
fn num_vals(&self) -> u64 {
self.stats.num_vals
}
}

View File

@@ -374,6 +374,7 @@ impl IndexMerger {
struct SortedDocIdFieldAccessProvider<'a> {
doc_id_mapping: &'a SegmentDocIdMapping,
fast_field_readers: &'a Vec<DynamicFastFieldReader<u64>>,
stats: FastFieldStats,
}
impl<'a> FastFieldDataAccess for SortedDocIdFieldAccessProvider<'a> {
fn get_val(&self, doc: u64) -> u64 {
@@ -395,16 +396,24 @@ impl IndexMerger {
}),
)
}
fn min_value(&self) -> u64 {
self.stats.min_value
}
fn max_value(&self) -> u64 {
self.stats.max_value
}
fn num_vals(&self) -> u64 {
self.stats.num_vals
}
}
let fastfield_accessor = SortedDocIdFieldAccessProvider {
doc_id_mapping,
fast_field_readers: &fast_field_readers,
};
fast_field_serializer.create_auto_detect_u64_fast_field(
field,
stats,
fastfield_accessor,
)?;
};
fast_field_serializer.create_auto_detect_u64_fast_field(field, fastfield_accessor)?;
Ok(())
}
@@ -564,7 +573,37 @@ impl IndexMerger {
}
offsets.push(offset);
fast_field_serializer.create_auto_detect_u64_fast_field(field, stats, &offsets[..])?;
#[derive(Clone)]
struct FieldIndexAccessProvider<'a> {
offsets: &'a [u64],
stats: FastFieldStats,
}
impl<'a> FastFieldDataAccess for FieldIndexAccessProvider<'a> {
fn get_val(&self, doc: u64) -> u64 {
self.offsets[doc as usize]
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new(self.offsets.iter().cloned())
}
fn min_value(&self) -> u64 {
self.stats.min_value
}
fn max_value(&self) -> u64 {
self.stats.max_value
}
fn num_vals(&self) -> u64 {
self.stats.num_vals
}
}
let fastfield_accessor = FieldIndexAccessProvider {
offsets: &offsets,
stats,
};
fast_field_serializer.create_auto_detect_u64_fast_field(field, fastfield_accessor)?;
Ok(offsets)
}
/// Returns the fastfield index (index for the data, not the data).
@@ -737,6 +776,7 @@ impl IndexMerger {
doc_id_mapping: &'a SegmentDocIdMapping,
fast_field_readers: &'a Vec<MultiValuedFastFieldReader<u64>>,
offsets: Vec<u64>,
stats: FastFieldStats,
}
impl<'a> FastFieldDataAccess for SortedDocIdMultiValueAccessProvider<'a> {
fn get_val(&self, pos: u64) -> u64 {
@@ -777,15 +817,26 @@ impl IndexMerger {
}),
)
}
fn min_value(&self) -> u64 {
self.stats.min_value
}
fn max_value(&self) -> u64 {
self.stats.max_value
}
fn num_vals(&self) -> u64 {
self.stats.num_vals
}
}
let fastfield_accessor = SortedDocIdMultiValueAccessProvider {
doc_id_mapping,
fast_field_readers: &ff_readers,
offsets,
stats,
};
fast_field_serializer.create_auto_detect_u64_fast_field_with_idx(
field,
stats,
fastfield_accessor,
1,
)?;