Replace Column::first_vals with Column::first_vals_in_value_range.

2026-01-07 09:32:54 +00:00 · 2025-12-25 15:26:33 -07:00
parent efc9e585a9
commit e8a4adeedd
6 changed files with 128 additions and 12 deletions
--- a/columnar/benches/bench_access.rs
+++ b/columnar/benches/bench_access.rs
@@ -1,6 +1,6 @@
 use binggan::{InputGroup, black_box};
 use common::*;
-use tantivy_columnar::Column;
+use tantivy_columnar::{Column, ValueRange};

 pub mod common;

@@ -55,7 +55,7 @@ fn bench_group(mut runner: InputGroup<Column>) {
                docs[idx] = idx as u32 + i;
            }

-            column.first_vals(&docs, &mut buffer);
+            column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All);
            for val in buffer.iter() {
                let Some(val) = val else { continue };
                sum += *val;
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -91,23 +91,57 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {

    /// Load the first value for each docid in the provided slice.
    #[inline]
-    pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
-        match &self.index {
-            ColumnIndex::Empty { .. } => {}
-            ColumnIndex::Full => self.values.get_vals_opt(docids, output),
-            ColumnIndex::Optional(optional_index) => {
+    pub fn first_vals_in_value_range(
+        &self,
+        docids: &[DocId],
+        output: &mut [Option<T>],
+        value_range: ValueRange<T>,
+    ) {
+        match (&self.index, value_range) {
+            (ColumnIndex::Empty { .. }, _) => {}
+            (ColumnIndex::Full, value_range) => {
+                self.values
+                    .get_vals_in_value_range(docids, output, value_range);
+            }
+            (ColumnIndex::Optional(optional_index), ValueRange::All) => {
                for (i, docid) in docids.iter().enumerate() {
                    output[i] = optional_index
                        .rank_if_exists(*docid)
                        .map(|rowid| self.values.get_val(rowid));
                }
            }
-            ColumnIndex::Multivalued(multivalued_index) => {
+            (ColumnIndex::Optional(optional_index), ValueRange::Inclusive(range)) => {
+                for (i, docid) in docids.iter().enumerate() {
+                    output[i] = optional_index
+                        .rank_if_exists(*docid)
+                        .map(|rowid| self.values.get_val(rowid))
+                        .filter(|val| range.contains(val));
+                }
+            }
+            (ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => {
                for (i, docid) in docids.iter().enumerate() {
                    let range = multivalued_index.range(*docid);
                    let is_empty = range.start == range.end;
                    if !is_empty {
                        output[i] = Some(self.values.get_val(range.start));
+                    } else {
+                        output[i] = None;
+                    }
+                }
+            }
+            (ColumnIndex::Multivalued(multivalued_index), ValueRange::Inclusive(range)) => {
+                for (i, docid) in docids.iter().enumerate() {
+                    let row_range = multivalued_index.range(*docid);
+                    let is_empty = row_range.start == row_range.end;
+                    if !is_empty {
+                        let val = self.values.get_val(row_range.start);
+                        if range.contains(&val) {
+                            output[i] = Some(val);
+                        } else {
+                            output[i] = None;
+                        }
+                    } else {
+                        output[i] = None;
                    }
                }
            }
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -110,6 +110,43 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
        }
    }

+    /// Load the values for the provided docids.
+    ///
+    /// The values are filtered by the provided value range.
+    fn get_vals_in_value_range(
+        &self,
+        indexes: &[u32],
+        output: &mut [Option<T>],
+        value_range: ValueRange<T>,
+    ) {
+        assert!(indexes.len() == output.len());
+        match value_range {
+            ValueRange::All => self.get_vals_opt(indexes, output),
+            ValueRange::Inclusive(range) => {
+                let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
+                for (out_x4, idx_x4) in out_and_idx_chunks {
+                    let v0 = self.get_val(idx_x4[0]);
+                    out_x4[0] = if range.contains(&v0) { Some(v0) } else { None };
+                    let v1 = self.get_val(idx_x4[1]);
+                    out_x4[1] = if range.contains(&v1) { Some(v1) } else { None };
+                    let v2 = self.get_val(idx_x4[2]);
+                    out_x4[2] = if range.contains(&v2) { Some(v2) } else { None };
+                    let v3 = self.get_val(idx_x4[3]);
+                    out_x4[3] = if range.contains(&v3) { Some(v3) } else { None };
+                }
+                let out_and_idx_chunks = output
+                    .chunks_exact_mut(4)
+                    .into_remainder()
+                    .iter_mut()
+                    .zip(indexes.chunks_exact(4).remainder());
+                for (out, idx) in out_and_idx_chunks {
+                    let v = self.get_val(*idx);
+                    *out = if range.contains(&v) { Some(v) } else { None };
+                }
+            }
+        }
+    }
+
    /// Fills an output buffer with the fast field values
    /// associated with the `DocId` going from
    /// `start` to `start + output.len()`.
@@ -214,6 +251,17 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
        self.as_ref().get_vals_opt(indexes, output)
    }

+    #[inline(always)]
+    fn get_vals_in_value_range(
+        &self,
+        indexes: &[u32],
+        output: &mut [Option<T>],
+        value_range: ValueRange<T>,
+    ) {
+        self.as_ref()
+            .get_vals_in_value_range(indexes, output, value_range)
+    }
+
    #[inline(always)]
    fn min_value(&self) -> T {
        self.as_ref().min_value()
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -107,6 +107,37 @@ impl ColumnValues for BitpackedReader {
        self.stats.num_rows
    }

+    fn get_vals_in_value_range(
+        &self,
+        indexes: &[u32],
+        output: &mut [Option<u64>],
+        value_range: ValueRange<u64>,
+    ) {
+        match value_range {
+            ValueRange::All => {
+                self.get_vals_opt(indexes, output);
+            }
+            ValueRange::Inclusive(range) => {
+                if let Some(transformed_range) =
+                    transform_range_before_linear_transformation(&self.stats, range)
+                {
+                    for (i, doc) in indexes.iter().enumerate() {
+                        let raw_val = self.unpack_val(*doc);
+                        if transformed_range.contains(&raw_val) {
+                            output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val);
+                        } else {
+                            output[i] = None;
+                        }
+                    }
+                } else {
+                    for out in output.iter_mut() {
+                        *out = None;
+                    }
+                }
+            }
+        }
+    }
+
    fn get_row_ids_for_value_range(
        &self,
        range: ValueRange<u64>,
--- a/src/collector/sort_key/sort_by_static_fast_value.rs
+++ b/src/collector/sort_key/sort_by_static_fast_value.rs
@@ -1,6 +1,6 @@
 use std::marker::PhantomData;

-use columnar::Column;
+use columnar::{Column, ValueRange};

 use crate::collector::sort_key::NaturalComparator;
 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
@@ -94,7 +94,8 @@ impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu

    fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
        self.buffer.resize(docs.len(), None);
-        self.sort_column.first_vals(docs, &mut self.buffer);
+        self.sort_column
+            .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
        &mut self.buffer
    }

--- a/src/collector/sort_key/sort_by_string.rs
+++ b/src/collector/sort_key/sort_by_string.rs
@@ -1,4 +1,4 @@
-use columnar::StrColumn;
+use columnar::{StrColumn, ValueRange};

 use crate::collector::sort_key::NaturalComparator;
 use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
@@ -64,7 +64,9 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
    fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
        self.buffer.resize(docs.len(), None);
        if let Some(str_column) = &self.str_column_opt {
-            str_column.ords().first_vals(docs, &mut self.buffer);
+            str_column
+                .ords()
+                .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
        }
        &mut self.buffer
    }