diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index cc2938bb8..093f24e8d 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -143,7 +143,7 @@ impl Column { #[inline] pub fn get_docids_for_value_range( &self, - value_range: RangeInclusive, + value_range: ValueRange, selected_docid_range: Range, doc_ids: &mut Vec, ) { @@ -168,6 +168,18 @@ impl Column { } } +/// A range of values. +/// +/// This type is intended to be used in batch APIs, where the cost of unpacking the enum +/// is outweighed by the time spent processing a batch. +/// +/// Implementers should pattern match on the variants to use optimized loops for each case. +#[derive(Clone, Debug)] +pub enum ValueRange { + /// A range that includes both start and end. + Inclusive(RangeInclusive), +} + impl BinarySerializable for Cardinality { fn serialize(&self, writer: &mut W) -> std::io::Result<()> { self.to_code().serialize(writer) diff --git a/columnar/src/column_index/multivalued_index.rs b/columnar/src/column_index/multivalued_index.rs index c26d21e0e..7bd1df236 100644 --- a/columnar/src/column_index/multivalued_index.rs +++ b/columnar/src/column_index/multivalued_index.rs @@ -339,7 +339,7 @@ mod tests { use std::ops::Range; use super::MultiValueIndex; - use crate::{ColumnarReader, DynamicColumn}; + use crate::{ColumnarReader, DynamicColumn, ValueRange}; fn index_to_pos_helper( index: &MultiValueIndex, @@ -419,7 +419,7 @@ mod tests { assert_eq!(row_id_range, 0..4); let check = |range, expected| { - let full_range = 0..=u64::MAX; + let full_range = ValueRange::Inclusive(0..=u64::MAX); let mut docids = Vec::new(); column.get_docids_for_value_range(full_range, range, &mut docids); assert_eq!(docids, expected); diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index 69af909cf..cf1a9502f 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -7,13 +7,15 @@ //! - Monotonically map values to u64/u128 use std::fmt::Debug; -use std::ops::{Range, RangeInclusive}; +use std::ops::Range; use std::sync::Arc; use downcast_rs::DowncastSync; pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn}; pub use monotonic_mapping_u128::MonotonicallyMappableToU128; +use crate::column::ValueRange; + mod merge; pub(crate) mod monotonic_mapping; pub(crate) mod monotonic_mapping_u128; @@ -128,15 +130,19 @@ pub trait ColumnValues: Send + Sync + DowncastSync { /// Note that position == docid for single value fast fields fn get_row_ids_for_value_range( &self, - value_range: RangeInclusive, + value_range: ValueRange, row_id_range: Range, row_id_hits: &mut Vec, ) { let row_id_range = row_id_range.start..row_id_range.end.min(self.num_vals()); - for idx in row_id_range { - let val = self.get_val(idx); - if value_range.contains(&val) { - row_id_hits.push(idx); + match value_range { + ValueRange::Inclusive(range) => { + for idx in row_id_range { + let val = self.get_val(idx); + if range.contains(&val) { + row_id_hits.push(idx); + } + } } } } @@ -233,7 +239,7 @@ impl ColumnValues for Arc, + range: ValueRange, doc_id_range: Range, positions: &mut Vec, ) { diff --git a/columnar/src/column_values/monotonic_column.rs b/columnar/src/column_values/monotonic_column.rs index 35de3787a..384861a97 100644 --- a/columnar/src/column_values/monotonic_column.rs +++ b/columnar/src/column_values/monotonic_column.rs @@ -1,8 +1,9 @@ use std::fmt::Debug; use std::marker::PhantomData; -use std::ops::{Range, RangeInclusive}; +use std::ops::Range; use crate::ColumnValues; +use crate::column::ValueRange; use crate::column_values::monotonic_mapping::StrictlyMonotonicFn; struct MonotonicMappingColumn { @@ -80,13 +81,16 @@ where fn get_row_ids_for_value_range( &self, - range: RangeInclusive, + range: ValueRange, doc_id_range: Range, positions: &mut Vec, ) { + let ValueRange::Inclusive(range) = range; self.from_column.get_row_ids_for_value_range( - self.monotonic_mapping.inverse(range.start().clone()) - ..=self.monotonic_mapping.inverse(range.end().clone()), + ValueRange::Inclusive( + self.monotonic_mapping.inverse(range.start().clone()) + ..=self.monotonic_mapping.inverse(range.end().clone()), + ), doc_id_range, positions, ) diff --git a/columnar/src/column_values/u128_based/compact_space/mod.rs b/columnar/src/column_values/u128_based/compact_space/mod.rs index 5d928bc76..63abe5746 100644 --- a/columnar/src/column_values/u128_based/compact_space/mod.rs +++ b/columnar/src/column_values/u128_based/compact_space/mod.rs @@ -25,6 +25,7 @@ use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128}; use tantivy_bitpacker::{BitPacker, BitUnpacker}; use crate::RowId; +use crate::column::ValueRange; use crate::column_values::ColumnValues; /// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of @@ -338,12 +339,15 @@ impl ColumnValues for CompactSpaceU64Accessor { #[inline] fn get_row_ids_for_value_range( &self, - value_range: RangeInclusive, + value_range: ValueRange, position_range: Range, positions: &mut Vec, ) { - let value_range = self.0.compact_to_u128(*value_range.start() as u32) - ..=self.0.compact_to_u128(*value_range.end() as u32); + let ValueRange::Inclusive(value_range) = value_range; + let value_range = ValueRange::Inclusive( + self.0.compact_to_u128(*value_range.start() as u32) + ..=self.0.compact_to_u128(*value_range.end() as u32), + ); self.0 .get_row_ids_for_value_range(value_range, position_range, positions) } @@ -375,10 +379,11 @@ impl ColumnValues for CompactSpaceDecompressor { #[inline] fn get_row_ids_for_value_range( &self, - value_range: RangeInclusive, + value_range: ValueRange, position_range: Range, positions: &mut Vec, ) { + let ValueRange::Inclusive(value_range) = value_range; if value_range.start() > value_range.end() { return; } @@ -560,7 +565,7 @@ mod tests { .collect::>(); let mut positions = Vec::new(); decompressor.get_row_ids_for_value_range( - range, + ValueRange::Inclusive(range), 0..decompressor.num_vals(), &mut positions, ); @@ -604,7 +609,11 @@ mod tests { let val = *val; let pos = pos as u32; let mut positions = Vec::new(); - decomp.get_row_ids_for_value_range(val..=val, pos..pos + 1, &mut positions); + decomp.get_row_ids_for_value_range( + ValueRange::Inclusive(val..=val), + pos..pos + 1, + &mut positions, + ); assert_eq!(positions, vec![pos]); } @@ -746,7 +755,11 @@ mod tests { doc_id_range: Range, ) -> Vec { let mut positions = Vec::new(); - column.get_row_ids_for_value_range(value_range, doc_id_range, &mut positions); + column.get_row_ids_for_value_range( + ValueRange::Inclusive(value_range), + doc_id_range, + &mut positions, + ); positions } diff --git a/columnar/src/column_values/u64_based/bitpacked.rs b/columnar/src/column_values/u64_based/bitpacked.rs index 5d0494158..c423abc9e 100644 --- a/columnar/src/column_values/u64_based/bitpacked.rs +++ b/columnar/src/column_values/u64_based/bitpacked.rs @@ -8,6 +8,7 @@ use common::{BinarySerializable, HasLen, OwnedBytes}; use fastdivide::DividerU64; use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits}; +use crate::column::ValueRange; use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; use crate::{ColumnValues, RowId}; @@ -69,8 +70,9 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 { // f(bitpacked) ∈ [min_value, max_value] <=> bitpacked ∈ [min_bitpacked_value, max_bitpacked_value] fn transform_range_before_linear_transformation( stats: &ColumnStats, - range: RangeInclusive, + range: ValueRange, ) -> Option> { + let ValueRange::Inclusive(range) = range; if range.is_empty() { return None; } @@ -108,7 +110,7 @@ impl ColumnValues for BitpackedReader { fn get_row_ids_for_value_range( &self, - range: RangeInclusive, + range: ValueRange, doc_id_range: Range, positions: &mut Vec, ) { diff --git a/columnar/src/column_values/u64_based/tests.rs b/columnar/src/column_values/u64_based/tests.rs index 5f707ecfd..f0ddef3c8 100644 --- a/columnar/src/column_values/u64_based/tests.rs +++ b/columnar/src/column_values/u64_based/tests.rs @@ -132,7 +132,7 @@ pub(crate) fn create_and_validate( .collect(); let mut positions = Vec::new(); reader.get_row_ids_for_value_range( - vals[test_rand_idx]..=vals[test_rand_idx], + crate::column::ValueRange::Inclusive(vals[test_rand_idx]..=vals[test_rand_idx]), 0..vals.len() as u32, &mut positions, ); diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 537c52562..ce499f72b 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -36,7 +36,7 @@ pub(crate) mod utils; mod value; pub use block_accessor::ColumnBlockAccessor; -pub use column::{BytesColumn, Column, StrColumn}; +pub use column::{BytesColumn, Column, StrColumn, ValueRange}; pub use column_index::ColumnIndex; pub use column_values::{ ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128, diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 9bbad3132..57e3ecbe6 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -79,7 +79,7 @@ mod tests { use std::ops::{Range, RangeInclusive}; use std::path::Path; - use columnar::StrColumn; + use columnar::{StrColumn, ValueRange}; use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite}; use once_cell::sync::Lazy; use rand::prelude::SliceRandom; @@ -944,7 +944,7 @@ mod tests { let test_range = |range: RangeInclusive| { let expected_count = numbers.iter().filter(|num| range.contains(*num)).count(); let mut vec = vec![]; - field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec); + field.get_row_ids_for_value_range(ValueRange::Inclusive(range), 0..u32::MAX, &mut vec); assert_eq!(vec.len(), expected_count); }; test_range(50..=50); @@ -1022,7 +1022,7 @@ mod tests { let test_range = |range: RangeInclusive| { let expected_count = numbers.iter().filter(|num| range.contains(*num)).count(); let mut vec = vec![]; - field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec); + field.get_row_ids_for_value_range(ValueRange::Inclusive(range), 0..u32::MAX, &mut vec); assert_eq!(vec.len(), expected_count); }; let test_range_variant = |start, stop| { diff --git a/src/query/range_query/fast_field_range_doc_set.rs b/src/query/range_query/fast_field_range_doc_set.rs index dd4b8fe68..efd8a73fd 100644 --- a/src/query/range_query/fast_field_range_doc_set.rs +++ b/src/query/range_query/fast_field_range_doc_set.rs @@ -1,7 +1,6 @@ use core::fmt::Debug; -use std::ops::RangeInclusive; -use columnar::Column; +use columnar::{Column, ValueRange}; use crate::{DocId, DocSet, TERMINATED}; @@ -41,7 +40,7 @@ impl VecCursor { pub(crate) struct RangeDocSet { /// The range filter on the values. - value_range: RangeInclusive, + value_range: ValueRange, column: Column, /// The next docid start range to fetch (inclusive). next_fetch_start: u32, @@ -61,7 +60,7 @@ pub(crate) struct RangeDocSet { const DEFAULT_FETCH_HORIZON: u32 = 128; impl RangeDocSet { - pub(crate) fn new(value_range: RangeInclusive, column: Column) -> Self { + pub(crate) fn new(value_range: ValueRange, column: Column) -> Self { let mut range_docset = Self { value_range, column, diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index 54cf0cad5..b1134e416 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -7,7 +7,7 @@ use std::ops::{Bound, RangeInclusive}; use columnar::{ Cardinality, Column, ColumnType, MonotonicallyMappableToU128, MonotonicallyMappableToU64, - NumericalType, StrColumn, + NumericalType, StrColumn, ValueRange, }; use common::bounds::{BoundsRange, TransformBound}; @@ -154,7 +154,7 @@ impl Weight for FastFieldRangeWeight { ip_addr_column.min_value(), ip_addr_column.max_value(), ); - let docset = RangeDocSet::new(value_range, ip_addr_column); + let docset = RangeDocSet::new(ValueRange::Inclusive(value_range), ip_addr_column); Ok(Box::new(ConstScorer::new(docset, boost))) } else if field_type.is_str() { let Some(str_dict_column): Option = reader.fast_fields().str(&field_name)? @@ -426,7 +426,7 @@ fn search_on_u64_ff( } } - let docset = RangeDocSet::new(value_range, column); + let docset = RangeDocSet::new(ValueRange::Inclusive(value_range), column); Ok(Box::new(ConstScorer::new(docset, boost))) }