mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 09:32:54 +00:00
Replace Column::first_vals with Column::first_vals_in_value_range.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use binggan::{InputGroup, black_box};
|
||||
use common::*;
|
||||
use tantivy_columnar::Column;
|
||||
use tantivy_columnar::{Column, ValueRange};
|
||||
|
||||
pub mod common;
|
||||
|
||||
@@ -55,7 +55,7 @@ fn bench_group(mut runner: InputGroup<Column>) {
|
||||
docs[idx] = idx as u32 + i;
|
||||
}
|
||||
|
||||
column.first_vals(&docs, &mut buffer);
|
||||
column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All);
|
||||
for val in buffer.iter() {
|
||||
let Some(val) = val else { continue };
|
||||
sum += *val;
|
||||
|
||||
@@ -91,23 +91,57 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
|
||||
/// Load the first value for each docid in the provided slice.
|
||||
#[inline]
|
||||
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
|
||||
match &self.index {
|
||||
ColumnIndex::Empty { .. } => {}
|
||||
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
|
||||
ColumnIndex::Optional(optional_index) => {
|
||||
pub fn first_vals_in_value_range(
|
||||
&self,
|
||||
docids: &[DocId],
|
||||
output: &mut [Option<T>],
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
match (&self.index, value_range) {
|
||||
(ColumnIndex::Empty { .. }, _) => {}
|
||||
(ColumnIndex::Full, value_range) => {
|
||||
self.values
|
||||
.get_vals_in_value_range(docids, output, value_range);
|
||||
}
|
||||
(ColumnIndex::Optional(optional_index), ValueRange::All) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
output[i] = optional_index
|
||||
.rank_if_exists(*docid)
|
||||
.map(|rowid| self.values.get_val(rowid));
|
||||
}
|
||||
}
|
||||
ColumnIndex::Multivalued(multivalued_index) => {
|
||||
(ColumnIndex::Optional(optional_index), ValueRange::Inclusive(range)) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
output[i] = optional_index
|
||||
.rank_if_exists(*docid)
|
||||
.map(|rowid| self.values.get_val(rowid))
|
||||
.filter(|val| range.contains(val));
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let range = multivalued_index.range(*docid);
|
||||
let is_empty = range.start == range.end;
|
||||
if !is_empty {
|
||||
output[i] = Some(self.values.get_val(range.start));
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Multivalued(multivalued_index), ValueRange::Inclusive(range)) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let row_range = multivalued_index.range(*docid);
|
||||
let is_empty = row_range.start == row_range.end;
|
||||
if !is_empty {
|
||||
let val = self.values.get_val(row_range.start);
|
||||
if range.contains(&val) {
|
||||
output[i] = Some(val);
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,6 +110,43 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
}
|
||||
}
|
||||
|
||||
/// Load the values for the provided docids.
|
||||
///
|
||||
/// The values are filtered by the provided value range.
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<T>],
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
assert!(indexes.len() == output.len());
|
||||
match value_range {
|
||||
ValueRange::All => self.get_vals_opt(indexes, output),
|
||||
ValueRange::Inclusive(range) => {
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let v0 = self.get_val(idx_x4[0]);
|
||||
out_x4[0] = if range.contains(&v0) { Some(v0) } else { None };
|
||||
let v1 = self.get_val(idx_x4[1]);
|
||||
out_x4[1] = if range.contains(&v1) { Some(v1) } else { None };
|
||||
let v2 = self.get_val(idx_x4[2]);
|
||||
out_x4[2] = if range.contains(&v2) { Some(v2) } else { None };
|
||||
let v3 = self.get_val(idx_x4[3]);
|
||||
out_x4[3] = if range.contains(&v3) { Some(v3) } else { None };
|
||||
}
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
let v = self.get_val(*idx);
|
||||
*out = if range.contains(&v) { Some(v) } else { None };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
@@ -214,6 +251,17 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
|
||||
self.as_ref().get_vals_opt(indexes, output)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<T>],
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
self.as_ref()
|
||||
.get_vals_in_value_range(indexes, output, value_range)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn min_value(&self) -> T {
|
||||
self.as_ref().min_value()
|
||||
|
||||
@@ -107,6 +107,37 @@ impl ColumnValues for BitpackedReader {
|
||||
self.stats.num_rows
|
||||
}
|
||||
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<u64>],
|
||||
value_range: ValueRange<u64>,
|
||||
) {
|
||||
match value_range {
|
||||
ValueRange::All => {
|
||||
self.get_vals_opt(indexes, output);
|
||||
}
|
||||
ValueRange::Inclusive(range) => {
|
||||
if let Some(transformed_range) =
|
||||
transform_range_before_linear_transformation(&self.stats, range)
|
||||
{
|
||||
for (i, doc) in indexes.iter().enumerate() {
|
||||
let raw_val = self.unpack_val(*doc);
|
||||
if transformed_range.contains(&raw_val) {
|
||||
output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val);
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for out in output.iter_mut() {
|
||||
*out = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
range: ValueRange<u64>,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use columnar::Column;
|
||||
use columnar::{Column, ValueRange};
|
||||
|
||||
use crate::collector::sort_key::NaturalComparator;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
@@ -94,7 +94,8 @@ impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu
|
||||
|
||||
fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
|
||||
self.buffer.resize(docs.len(), None);
|
||||
self.sort_column.first_vals(docs, &mut self.buffer);
|
||||
self.sort_column
|
||||
.first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
|
||||
&mut self.buffer
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use columnar::StrColumn;
|
||||
use columnar::{StrColumn, ValueRange};
|
||||
|
||||
use crate::collector::sort_key::NaturalComparator;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
@@ -64,7 +64,9 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec<Self::SegmentSortKey> {
|
||||
self.buffer.resize(docs.len(), None);
|
||||
if let Some(str_column) = &self.str_column_opt {
|
||||
str_column.ords().first_vals(docs, &mut self.buffer);
|
||||
str_column
|
||||
.ords()
|
||||
.first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All);
|
||||
}
|
||||
&mut self.buffer
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user