mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
move minmax to bitpacker
move minmax to bitpacker use minmax in blocked bitpacker
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use crate::BitUnpacker;
|
||||
use crate::{minmax, BitUnpacker};
|
||||
|
||||
use super::{bitpacker::BitPacker, compute_num_bits};
|
||||
|
||||
@@ -83,14 +83,9 @@ impl BlockedBitpacker {
|
||||
}
|
||||
|
||||
pub fn flush(&mut self) {
|
||||
if let Some(min_value) = self.buffer.iter().min() {
|
||||
if let Some((min_value, max_value)) = minmax(self.buffer.iter()) {
|
||||
let mut bit_packer = BitPacker::new();
|
||||
let num_bits_block = self
|
||||
.buffer
|
||||
.iter()
|
||||
.map(|val| compute_num_bits(*val - min_value))
|
||||
.max()
|
||||
.unwrap();
|
||||
let num_bits_block = compute_num_bits(*max_value - min_value);
|
||||
// todo performance: the padding handling could be done better, e.g. use a slice and
|
||||
// return num_bytes written from bitpacker
|
||||
self.compressed_blocks
|
||||
|
||||
@@ -37,3 +37,16 @@ pub fn compute_num_bits(n: u64) -> u8 {
|
||||
64
|
||||
}
|
||||
}
|
||||
|
||||
pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
T: Copy + Ord,
|
||||
{
|
||||
if let Some(first_el) = vals.next() {
|
||||
return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| {
|
||||
(min_val.min(el), max_val.max(el))
|
||||
}));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
@@ -19,19 +19,6 @@ pub use byteorder::LittleEndian as Endianness;
|
||||
/// We do not allow segments with more than
|
||||
pub const MAX_DOC_LIMIT: u32 = 1 << 31;
|
||||
|
||||
pub fn minmax<I, T>(mut vals: I) -> Option<(T, T)>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
T: Copy + Ord,
|
||||
{
|
||||
if let Some(first_el) = vals.next() {
|
||||
return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| {
|
||||
(min_val.min(el), max_val.max(el))
|
||||
}));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Has length trait
|
||||
pub trait HasLen {
|
||||
/// Return length
|
||||
@@ -116,12 +103,12 @@ pub fn u64_to_f64(val: u64) -> f64 {
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
|
||||
pub use super::minmax;
|
||||
pub use super::serialize::test::fixed_size_test;
|
||||
use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
|
||||
use proptest::prelude::*;
|
||||
use std::f64;
|
||||
use tantivy_bitpacker::compute_num_bits;
|
||||
pub use tantivy_bitpacker::minmax;
|
||||
|
||||
fn test_i64_converter_helper(val: i64) {
|
||||
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::DocId;
|
||||
use fnv::FnvHashMap;
|
||||
use std::io;
|
||||
use std::iter::once;
|
||||
use tantivy_bitpacker::minmax;
|
||||
|
||||
/// Writer for multi-valued (as in, more than one value per document)
|
||||
/// int fast field.
|
||||
@@ -154,7 +155,7 @@ impl MultiValuedFastFieldWriter {
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let val_min_max = crate::common::minmax(self.vals.iter().cloned());
|
||||
let val_min_max = minmax(self.vals.iter().cloned());
|
||||
let (val_min, val_max) = val_min_max.unwrap_or((0u64, 0u64));
|
||||
value_serializer =
|
||||
serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?;
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use tantivy_bitpacker::minmax;
|
||||
|
||||
use crate::common::MAX_DOC_LIMIT;
|
||||
use crate::core::Segment;
|
||||
use crate::core::SegmentReader;
|
||||
@@ -70,7 +72,7 @@ fn compute_min_max_val(
|
||||
Some(delete_bitset) => {
|
||||
// some deleted documents,
|
||||
// we need to recompute the max / min
|
||||
crate::common::minmax(
|
||||
minmax(
|
||||
(0..max_doc)
|
||||
.filter(|doc_id| delete_bitset.is_alive(*doc_id))
|
||||
.map(|doc_id| u64_reader.get(doc_id)),
|
||||
|
||||
Reference in New Issue
Block a user