diff --git a/columnar/benches/bench_access.rs b/columnar/benches/bench_access.rs index 397a35af0..1c8830893 100644 --- a/columnar/benches/bench_access.rs +++ b/columnar/benches/bench_access.rs @@ -1,6 +1,6 @@ use binggan::{InputGroup, black_box}; use common::*; -use tantivy_columnar::Column; +use tantivy_columnar::{Column, ValueRange}; pub mod common; @@ -55,7 +55,7 @@ fn bench_group(mut runner: InputGroup) { docs[idx] = idx as u32 + i; } - column.first_vals(&docs, &mut buffer); + column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All); for val in buffer.iter() { let Some(val) = val else { continue }; sum += *val; diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index 83ad37def..d12fab3c5 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -91,23 +91,57 @@ impl Column { /// Load the first value for each docid in the provided slice. #[inline] - pub fn first_vals(&self, docids: &[DocId], output: &mut [Option]) { - match &self.index { - ColumnIndex::Empty { .. } => {} - ColumnIndex::Full => self.values.get_vals_opt(docids, output), - ColumnIndex::Optional(optional_index) => { + pub fn first_vals_in_value_range( + &self, + docids: &[DocId], + output: &mut [Option], + value_range: ValueRange, + ) { + match (&self.index, value_range) { + (ColumnIndex::Empty { .. }, _) => {} + (ColumnIndex::Full, value_range) => { + self.values + .get_vals_in_value_range(docids, output, value_range); + } + (ColumnIndex::Optional(optional_index), ValueRange::All) => { for (i, docid) in docids.iter().enumerate() { output[i] = optional_index .rank_if_exists(*docid) .map(|rowid| self.values.get_val(rowid)); } } - ColumnIndex::Multivalued(multivalued_index) => { + (ColumnIndex::Optional(optional_index), ValueRange::Inclusive(range)) => { + for (i, docid) in docids.iter().enumerate() { + output[i] = optional_index + .rank_if_exists(*docid) + .map(|rowid| self.values.get_val(rowid)) + .filter(|val| range.contains(val)); + } + } + (ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => { for (i, docid) in docids.iter().enumerate() { let range = multivalued_index.range(*docid); let is_empty = range.start == range.end; if !is_empty { output[i] = Some(self.values.get_val(range.start)); + } else { + output[i] = None; + } + } + } + (ColumnIndex::Multivalued(multivalued_index), ValueRange::Inclusive(range)) => { + for (i, docid) in docids.iter().enumerate() { + let row_range = multivalued_index.range(*docid); + let is_empty = row_range.start == row_range.end; + if !is_empty { + let val = self.values.get_val(row_range.start); + if range.contains(&val) { + output[i] = Some(val); + } else { + output[i] = None; + } + } else { + output[i] = None; } } } diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index 83f5721e5..4dab2578b 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -110,6 +110,43 @@ pub trait ColumnValues: Send + Sync + DowncastSync { } } + /// Load the values for the provided docids. + /// + /// The values are filtered by the provided value range. + fn get_vals_in_value_range( + &self, + indexes: &[u32], + output: &mut [Option], + value_range: ValueRange, + ) { + assert!(indexes.len() == output.len()); + match value_range { + ValueRange::All => self.get_vals_opt(indexes, output), + ValueRange::Inclusive(range) => { + let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4)); + for (out_x4, idx_x4) in out_and_idx_chunks { + let v0 = self.get_val(idx_x4[0]); + out_x4[0] = if range.contains(&v0) { Some(v0) } else { None }; + let v1 = self.get_val(idx_x4[1]); + out_x4[1] = if range.contains(&v1) { Some(v1) } else { None }; + let v2 = self.get_val(idx_x4[2]); + out_x4[2] = if range.contains(&v2) { Some(v2) } else { None }; + let v3 = self.get_val(idx_x4[3]); + out_x4[3] = if range.contains(&v3) { Some(v3) } else { None }; + } + let out_and_idx_chunks = output + .chunks_exact_mut(4) + .into_remainder() + .iter_mut() + .zip(indexes.chunks_exact(4).remainder()); + for (out, idx) in out_and_idx_chunks { + let v = self.get_val(*idx); + *out = if range.contains(&v) { Some(v) } else { None }; + } + } + } + } + /// Fills an output buffer with the fast field values /// associated with the `DocId` going from /// `start` to `start + output.len()`. @@ -214,6 +251,17 @@ impl ColumnValues for Arc], + value_range: ValueRange, + ) { + self.as_ref() + .get_vals_in_value_range(indexes, output, value_range) + } + #[inline(always)] fn min_value(&self) -> T { self.as_ref().min_value() diff --git a/columnar/src/column_values/u64_based/bitpacked.rs b/columnar/src/column_values/u64_based/bitpacked.rs index 388de9b65..bbff868a9 100644 --- a/columnar/src/column_values/u64_based/bitpacked.rs +++ b/columnar/src/column_values/u64_based/bitpacked.rs @@ -107,6 +107,37 @@ impl ColumnValues for BitpackedReader { self.stats.num_rows } + fn get_vals_in_value_range( + &self, + indexes: &[u32], + output: &mut [Option], + value_range: ValueRange, + ) { + match value_range { + ValueRange::All => { + self.get_vals_opt(indexes, output); + } + ValueRange::Inclusive(range) => { + if let Some(transformed_range) = + transform_range_before_linear_transformation(&self.stats, range) + { + for (i, doc) in indexes.iter().enumerate() { + let raw_val = self.unpack_val(*doc); + if transformed_range.contains(&raw_val) { + output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val); + } else { + output[i] = None; + } + } + } else { + for out in output.iter_mut() { + *out = None; + } + } + } + } + } + fn get_row_ids_for_value_range( &self, range: ValueRange, diff --git a/src/collector/sort_key/sort_by_static_fast_value.rs b/src/collector/sort_key/sort_by_static_fast_value.rs index 0b01148d3..d333aad53 100644 --- a/src/collector/sort_key/sort_by_static_fast_value.rs +++ b/src/collector/sort_key/sort_by_static_fast_value.rs @@ -1,6 +1,6 @@ use std::marker::PhantomData; -use columnar::Column; +use columnar::{Column, ValueRange}; use crate::collector::sort_key::NaturalComparator; use crate::collector::{SegmentSortKeyComputer, SortKeyComputer}; @@ -94,7 +94,8 @@ impl SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec { self.buffer.resize(docs.len(), None); - self.sort_column.first_vals(docs, &mut self.buffer); + self.sort_column + .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All); &mut self.buffer } diff --git a/src/collector/sort_key/sort_by_string.rs b/src/collector/sort_key/sort_by_string.rs index 47d74f3e1..78f05549e 100644 --- a/src/collector/sort_key/sort_by_string.rs +++ b/src/collector/sort_key/sort_by_string.rs @@ -1,4 +1,4 @@ -use columnar::StrColumn; +use columnar::{StrColumn, ValueRange}; use crate::collector::sort_key::NaturalComparator; use crate::collector::{SegmentSortKeyComputer, SortKeyComputer}; @@ -64,7 +64,9 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer { fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec { self.buffer.resize(docs.len(), None); if let Some(str_column) = &self.str_column_opt { - str_column.ords().first_vals(docs, &mut self.buffer); + str_column + .ords() + .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All); } &mut self.buffer }