move minmax to bitpacker

move minmax to bitpacker
use minmax in blocked bitpacker
This commit is contained in:
Pascal Seitz
2021-04-30 17:07:30 +02:00
parent fde9d27482
commit 478571ebb4
5 changed files with 22 additions and 24 deletions

View File

@@ -1,4 +1,4 @@
use crate::BitUnpacker;
use crate::{minmax, BitUnpacker};
use super::{bitpacker::BitPacker, compute_num_bits};
@@ -83,14 +83,9 @@ impl BlockedBitpacker {
}
pub fn flush(&mut self) {
if let Some(min_value) = self.buffer.iter().min() {
if let Some((min_value, max_value)) = minmax(self.buffer.iter()) {
let mut bit_packer = BitPacker::new();
let num_bits_block = self
.buffer
.iter()
.map(|val| compute_num_bits(*val - min_value))
.max()
.unwrap();
let num_bits_block = compute_num_bits(*max_value - min_value);
// todo performance: the padding handling could be done better, e.g. use a slice and
// return num_bytes written from bitpacker
self.compressed_blocks

View File

@@ -37,3 +37,16 @@ pub fn compute_num_bits(n: u64) -> u8 {
64
}
}
pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
where
I: Iterator<Item = T>,
T: Copy + Ord,
{
if let Some(first_el) = vals.next() {
return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| {
(min_val.min(el), max_val.max(el))
}));
}
None
}

View File

@@ -19,19 +19,6 @@ pub use byteorder::LittleEndian as Endianness;
/// We do not allow segments with more than
pub const MAX_DOC_LIMIT: u32 = 1 << 31;
pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
where
I: Iterator<Item = T>,
T: Copy + Ord,
{
if let Some(first_el) = vals.next() {
return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| {
(min_val.min(el), max_val.max(el))
}));
}
None
}
/// Has length trait
pub trait HasLen {
/// Return length
@@ -116,12 +103,12 @@ pub fn u64_to_f64(val: u64) -> f64 {
#[cfg(test)]
pub(crate) mod test {
pub use super::minmax;
pub use super::serialize::test::fixed_size_test;
use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
use proptest::prelude::*;
use std::f64;
use tantivy_bitpacker::compute_num_bits;
pub use tantivy_bitpacker::minmax;
fn test_i64_converter_helper(val: i64) {
assert_eq!(u64_to_i64(i64_to_u64(val)), val);

View File

@@ -8,6 +8,7 @@ use crate::DocId;
use fnv::FnvHashMap;
use std::io;
use std::iter::once;
use tantivy_bitpacker::minmax;
/// Writer for multi-valued (as in, more than one value per document)
/// int fast field.
@@ -154,7 +155,7 @@ impl MultiValuedFastFieldWriter {
}
}
None => {
let val_min_max = crate::common::minmax(self.vals.iter().cloned());
let val_min_max = minmax(self.vals.iter().cloned());
let (val_min, val_max) = val_min_max.unwrap_or((0u64, 0u64));
value_serializer =
serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?;

View File

@@ -1,3 +1,5 @@
use tantivy_bitpacker::minmax;
use crate::common::MAX_DOC_LIMIT;
use crate::core::Segment;
use crate::core::SegmentReader;
@@ -70,7 +72,7 @@ fn compute_min_max_val(
Some(delete_bitset) => {
// some deleted documents,
// we need to recompute the max / min
crate::common::minmax(
minmax(
(0..max_doc)
.filter(|doc_id| delete_bitset.is_alive(*doc_id))
.map(|doc_id| u64_reader.get(doc_id)),