Revert "remove unused columnar api (#2742)" (#2748)

* Revert "remove unused columnar api (#2742)"

This reverts commit 8725594d47.

* Clippy comment + removing fill_vals

---------

Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
This commit is contained in:
Paul Masurel
2025-11-26 17:44:02 +01:00
committed by GitHub
parent 842fe9295f
commit 25d44fcec8
5 changed files with 60 additions and 22 deletions

View File

@@ -43,5 +43,26 @@ fn bench_group(mut runner: InputGroup<Column>) {
}
black_box(sum);
});
runner.register("access_first_vals", |column| {
let mut sum = 0;
const BLOCK_SIZE: usize = 32;
let mut docs = vec![0; BLOCK_SIZE];
let mut buffer = vec![None; BLOCK_SIZE];
for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
// fill docs
#[allow(clippy::needless_range_loop)]
for idx in 0..BLOCK_SIZE {
docs[idx] = idx as u32 + i;
}
column.first_vals(&docs, &mut buffer);
for val in buffer.iter() {
let Some(val) = val else { continue };
sum += *val;
}
}
black_box(sum);
});
runner.run();
}

View File

@@ -14,7 +14,7 @@ pub use serialize::{
serialize_column_mappable_to_u128,
};
use crate::column_index::ColumnIndex;
use crate::column_index::{ColumnIndex, Set};
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
use crate::column_values::{ColumnValues, monotonic_map_column};
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
@@ -89,6 +89,31 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
self.values_for_doc(row_id).next()
}
/// Load the first value for each docid in the provided slice.
#[inline]
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
match &self.index {
ColumnIndex::Empty { .. } => {}
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
ColumnIndex::Optional(optional_index) => {
for (i, docid) in docids.iter().enumerate() {
output[i] = optional_index
.rank_if_exists(*docid)
.map(|rowid| self.values.get_val(rowid));
}
}
ColumnIndex::Multivalued(multivalued_index) => {
for (i, docid) in docids.iter().enumerate() {
let range = multivalued_index.range(*docid);
let is_empty = range.start == range.end;
if !is_empty {
output[i] = Some(self.values.get_val(range.start));
}
}
}
}
}
/// Translates a block of docids to row_ids.
///
/// returns the row_ids and the matching docids on the same index

View File

@@ -726,24 +726,22 @@ mod tests {
.column_opt::<DateTime>("multi_date")
.unwrap()
.unwrap();
{
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64);
dates.extend(dates_fast_field.values_for_doc(0));
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(0u32).collect();
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_nanos(), 2i64);
assert_eq!(dates[1].into_timestamp_nanos(), 3i64);
}
{
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64);
dates.extend(dates_fast_field.values_for_doc(1));
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(1u32).collect();
assert!(dates.is_empty());
}
{
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64);
dates.extend(dates_fast_field.values_for_doc(2));
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(2u32).collect();
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
assert_eq!(dates[1].into_timestamp_nanos(), 6i64);

View File

@@ -268,9 +268,7 @@ mod tests {
use crate::indexer::NoMergePolicy;
use crate::query::range_query::fast_field_range_doc_set::RangeDocSet;
use crate::query::range_query::range_query::InvertedIndexRangeQuery;
use crate::query::{
AllScorer, BitSetDocSet, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser,
};
use crate::query::{AllScorer, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser};
use crate::schema::{
Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT,
};

View File

@@ -18,7 +18,7 @@ pub struct TermWeight {
}
enum TermOrEmptyOrAllScorer {
TermScorer(TermScorer),
TermScorer(Box<TermScorer>),
Empty,
AllMatch(AllScorer),
}
@@ -26,7 +26,7 @@ enum TermOrEmptyOrAllScorer {
impl TermOrEmptyOrAllScorer {
pub fn into_boxed_scorer(self) -> Box<dyn Scorer> {
match self {
TermOrEmptyOrAllScorer::TermScorer(scorer) => Box::new(scorer),
TermOrEmptyOrAllScorer::TermScorer(scorer) => scorer,
TermOrEmptyOrAllScorer::Empty => Box::new(EmptyScorer),
TermOrEmptyOrAllScorer::AllMatch(scorer) => Box::new(scorer),
}
@@ -48,9 +48,7 @@ impl Weight for TermWeight {
explanation.add_context(format!("Term={:?}", self.term,));
Ok(explanation)
}
TermOrEmptyOrAllScorer::Empty => {
return Err(does_not_match(doc));
}
TermOrEmptyOrAllScorer::Empty => Err(does_not_match(doc)),
TermOrEmptyOrAllScorer::AllMatch(_) => AllWeight.explain(reader, doc),
}
}
@@ -75,7 +73,7 @@ impl Weight for TermWeight {
) -> crate::Result<()> {
match self.specialized_scorer(reader, 1.0)? {
TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => {
for_each_scorer(&mut term_scorer, callback);
for_each_scorer(&mut *term_scorer, callback);
}
TermOrEmptyOrAllScorer::Empty => {}
TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => {
@@ -127,7 +125,7 @@ impl Weight for TermWeight {
match specialized_scorer {
TermOrEmptyOrAllScorer::TermScorer(term_scorer) => {
crate::query::boolean_query::block_wand_single_scorer(
term_scorer,
*term_scorer,
threshold,
callback,
);
@@ -173,7 +171,7 @@ impl TermWeight {
) -> crate::Result<Option<TermScorer>> {
let scorer = self.specialized_scorer(reader, boost)?;
Ok(match scorer {
TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(scorer),
TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(*scorer),
_ => None,
})
}
@@ -203,10 +201,8 @@ impl TermWeight {
let fieldnorm_reader = self.fieldnorm_reader(reader)?;
let similarity_weight = self.similarity_weight.boost_by(boost);
Ok(TermOrEmptyOrAllScorer::TermScorer(TermScorer::new(
segment_postings,
fieldnorm_reader,
similarity_weight,
Ok(TermOrEmptyOrAllScorer::TermScorer(Box::new(
TermScorer::new(segment_postings, fieldnorm_reader, similarity_weight),
)))
}