Replace Column::first_vals with Column::first_vals_in_value_range.

This commit is contained in:
Stu Hood
2025-12-25 15:26:33 -07:00
parent efc9e585a9
commit e8a4adeedd
6 changed files with 128 additions and 12 deletions

View File

@@ -1,6 +1,6 @@
use binggan::{InputGroup, black_box};
use common::*;
use tantivy_columnar::Column;
use tantivy_columnar::{Column, ValueRange};
pub mod common;
@@ -55,7 +55,7 @@ fn bench_group(mut runner: InputGroup<Column>) {
docs[idx] = idx as u32 + i;
}
column.first_vals(&docs, &mut buffer);
column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All);
for val in buffer.iter() {
let Some(val) = val else { continue };
sum += *val;

View File

@@ -91,23 +91,57 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
/// Load the first value for each docid in the provided slice.
#[inline]
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
match &self.index {
ColumnIndex::Empty { .. } => {}
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
ColumnIndex::Optional(optional_index) => {
pub fn first_vals_in_value_range(
&self,
docids: &[DocId],
output: &mut [Option<T>],
value_range: ValueRange<T>,
) {
match (&self.index, value_range) {
(ColumnIndex::Empty { .. }, _) => {}
(ColumnIndex::Full, value_range) => {
self.values
.get_vals_in_value_range(docids, output, value_range);
}
(ColumnIndex::Optional(optional_index), ValueRange::All) => {
for (i, docid) in docids.iter().enumerate() {
output[i] = optional_index
.rank_if_exists(*docid)
.map(|rowid| self.values.get_val(rowid));
}
}
ColumnIndex::Multivalued(multivalued_index) => {
(ColumnIndex::Optional(optional_index), ValueRange::Inclusive(range)) => {
for (i, docid) in docids.iter().enumerate() {
output[i] = optional_index
.rank_if_exists(*docid)
.map(|rowid| self.values.get_val(rowid))
.filter(|val| range.contains(val));
}
}
(ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => {
for (i, docid) in docids.iter().enumerate() {
let range = multivalued_index.range(*docid);
let is_empty = range.start == range.end;
if !is_empty {
output[i] = Some(self.values.get_val(range.start));
} else {
output[i] = None;
}
}
}
(ColumnIndex::Multivalued(multivalued_index), ValueRange::Inclusive(range)) => {
for (i, docid) in docids.iter().enumerate() {
let row_range = multivalued_index.range(*docid);
let is_empty = row_range.start == row_range.end;
if !is_empty {
let val = self.values.get_val(row_range.start);
if range.contains(&val) {
output[i] = Some(val);
} else {
output[i] = None;
}
} else {
output[i] = None;
}
}
}

View File

@@ -110,6 +110,43 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
}
}
/// Load the values for the provided docids.
///
/// The values are filtered by the provided value range.
fn get_vals_in_value_range(
&self,
indexes: &[u32],
output: &mut [Option<T>],
value_range: ValueRange<T>,
) {
assert!(indexes.len() == output.len());
match value_range {
ValueRange::All => self.get_vals_opt(indexes, output),
ValueRange::Inclusive(range) => {
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
for (out_x4, idx_x4) in out_and_idx_chunks {
let v0 = self.get_val(idx_x4[0]);
out_x4[0] = if range.contains(&v0) { Some(v0) } else { None };
let v1 = self.get_val(idx_x4[1]);
out_x4[1] = if range.contains(&v1) { Some(v1) } else { None };
let v2 = self.get_val(idx_x4[2]);
out_x4[2] = if range.contains(&v2) { Some(v2) } else { None };
let v3 = self.get_val(idx_x4[3]);
out_x4[3] = if range.contains(&v3) { Some(v3) } else { None };
}
let out_and_idx_chunks = output
.chunks_exact_mut(4)
.into_remainder()
.iter_mut()
.zip(indexes.chunks_exact(4).remainder());
for (out, idx) in out_and_idx_chunks {
let v = self.get_val(*idx);
*out = if range.contains(&v) { Some(v) } else { None };
}
}
}
}
/// Fills an output buffer with the fast field values
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
@@ -214,6 +251,17 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
self.as_ref().get_vals_opt(indexes, output)
}
#[inline(always)]
fn get_vals_in_value_range(
&self,
indexes: &[u32],
output: &mut [Option<T>],
value_range: ValueRange<T>,
) {
self.as_ref()
.get_vals_in_value_range(indexes, output, value_range)
}
#[inline(always)]
fn min_value(&self) -> T {
self.as_ref().min_value()

View File

@@ -107,6 +107,37 @@ impl ColumnValues for BitpackedReader {
self.stats.num_rows
}
fn get_vals_in_value_range(
&self,
indexes: &[u32],
output: &mut [Option<u64>],
value_range: ValueRange<u64>,
) {
match value_range {
ValueRange::All => {
self.get_vals_opt(indexes, output);
}
ValueRange::Inclusive(range) => {
if let Some(transformed_range) =
transform_range_before_linear_transformation(&self.stats, range)
{
for (i, doc) in indexes.iter().enumerate() {
let raw_val = self.unpack_val(*doc);
if transformed_range.contains(&raw_val) {
output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val);
} else {
output[i] = None;
}
}
} else {
for out in output.iter_mut() {
*out = None;
}
}
}
}
}
fn get_row_ids_for_value_range(
&self,
range: ValueRange<u64>,

View File

@@ -1,6 +1,6 @@
use std::marker::PhantomData;
use columnar::Column;
use columnar::{Column, ValueRange};
use crate::collector::sort_key::NaturalComparator;
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
@@ -94,7 +94,8 @@ impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu
fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
self.buffer.resize(docs.len(), None);
self.sort_column.first_vals(docs, &mut self.buffer);
self.sort_column
.first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
&mut self.buffer
}

View File

@@ -1,4 +1,4 @@
use columnar::StrColumn;
use columnar::{StrColumn, ValueRange};
use crate::collector::sort_key::NaturalComparator;
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
@@ -64,7 +64,9 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
self.buffer.resize(docs.len(), None);
if let Some(str_column) = &self.str_column_opt {
str_column.ords().first_vals(docs, &mut self.buffer);
str_column
.ords()
.first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
}
&mut self.buffer
}