diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs index 271ab7f67..027886dfb 100644 --- a/bitpacker/src/blocked_bitpacker.rs +++ b/bitpacker/src/blocked_bitpacker.rs @@ -1,4 +1,4 @@ -use crate::BitUnpacker; +use crate::{minmax, BitUnpacker}; use super::{bitpacker::BitPacker, compute_num_bits}; @@ -83,14 +83,9 @@ impl BlockedBitpacker { } pub fn flush(&mut self) { - if let Some(min_value) = self.buffer.iter().min() { + if let Some((min_value, max_value)) = minmax(self.buffer.iter()) { let mut bit_packer = BitPacker::new(); - let num_bits_block = self - .buffer - .iter() - .map(|val| compute_num_bits(*val - min_value)) - .max() - .unwrap(); + let num_bits_block = compute_num_bits(*max_value - min_value); // todo performance: the padding handling could be done better, e.g. use a slice and // return num_bytes written from bitpacker self.compressed_blocks diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs index cb566769c..1697a8488 100644 --- a/bitpacker/src/lib.rs +++ b/bitpacker/src/lib.rs @@ -37,3 +37,16 @@ pub fn compute_num_bits(n: u64) -> u8 { 64 } } + +pub fn minmax(mut vals: I) -> Option<(T, T)> +where + I: Iterator, + T: Copy + Ord, +{ + if let Some(first_el) = vals.next() { + return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| { + (min_val.min(el), max_val.max(el)) + })); + } + None +} diff --git a/src/common/mod.rs b/src/common/mod.rs index 43ea7be51..bfa3b1262 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -19,19 +19,6 @@ pub use byteorder::LittleEndian as Endianness; /// We do not allow segments with more than pub const MAX_DOC_LIMIT: u32 = 1 << 31; -pub fn minmax(mut vals: I) -> Option<(T, T)> -where - I: Iterator, - T: Copy + Ord, -{ - if let Some(first_el) = vals.next() { - return Some(vals.fold((first_el, first_el), |(min_val, max_val), el| { - (min_val.min(el), max_val.max(el)) - })); - } - None -} - /// Has length trait pub trait HasLen { /// Return length @@ -116,12 +103,12 @@ pub fn u64_to_f64(val: u64) -> f64 { #[cfg(test)] pub(crate) mod test { - pub use super::minmax; pub use super::serialize::test::fixed_size_test; use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64}; use proptest::prelude::*; use std::f64; use tantivy_bitpacker::compute_num_bits; + pub use tantivy_bitpacker::minmax; fn test_i64_converter_helper(val: i64) { assert_eq!(u64_to_i64(i64_to_u64(val)), val); diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index e7c2bc409..b25888e78 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -8,6 +8,7 @@ use crate::DocId; use fnv::FnvHashMap; use std::io; use std::iter::once; +use tantivy_bitpacker::minmax; /// Writer for multi-valued (as in, more than one value per document) /// int fast field. @@ -154,7 +155,7 @@ impl MultiValuedFastFieldWriter { } } None => { - let val_min_max = crate::common::minmax(self.vals.iter().cloned()); + let val_min_max = minmax(self.vals.iter().cloned()); let (val_min, val_max) = val_min_max.unwrap_or((0u64, 0u64)); value_serializer = serializer.new_u64_fast_field_with_idx(self.field, val_min, val_max, 1)?; diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 93099b50c..6004d2984 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1,3 +1,5 @@ +use tantivy_bitpacker::minmax; + use crate::common::MAX_DOC_LIMIT; use crate::core::Segment; use crate::core::SegmentReader; @@ -70,7 +72,7 @@ fn compute_min_max_val( Some(delete_bitset) => { // some deleted documents, // we need to recompute the max / min - crate::common::minmax( + minmax( (0..max_doc) .filter(|doc_id| delete_bitset.is_alive(*doc_id)) .map(|doc_id| u64_reader.get(doc_id)),