Revert "remove unused columnar api (#2742)" (#2748)

* Revert "remove unused columnar api (#2742)"

This reverts commit 8725594d47.

* Clippy comment + removing fill_vals

---------

Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
This commit is contained in:
Paul Masurel
2025-11-26 17:44:02 +01:00
committed by GitHub
parent 842fe9295f
commit 25d44fcec8
5 changed files with 60 additions and 22 deletions

View File

@@ -43,5 +43,26 @@ fn bench_group(mut runner: InputGroup<Column>) {
} }
black_box(sum); black_box(sum);
}); });
runner.register("access_first_vals", |column| {
let mut sum = 0;
const BLOCK_SIZE: usize = 32;
let mut docs = vec![0; BLOCK_SIZE];
let mut buffer = vec![None; BLOCK_SIZE];
for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
// fill docs
#[allow(clippy::needless_range_loop)]
for idx in 0..BLOCK_SIZE {
docs[idx] = idx as u32 + i;
}
column.first_vals(&docs, &mut buffer);
for val in buffer.iter() {
let Some(val) = val else { continue };
sum += *val;
}
}
black_box(sum);
});
runner.run(); runner.run();
} }

View File

@@ -14,7 +14,7 @@ pub use serialize::{
serialize_column_mappable_to_u128, serialize_column_mappable_to_u128,
}; };
use crate::column_index::ColumnIndex; use crate::column_index::{ColumnIndex, Set};
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal; use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
use crate::column_values::{ColumnValues, monotonic_map_column}; use crate::column_values::{ColumnValues, monotonic_map_column};
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId}; use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
@@ -89,6 +89,31 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
self.values_for_doc(row_id).next() self.values_for_doc(row_id).next()
} }
/// Load the first value for each docid in the provided slice.
#[inline]
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
match &self.index {
ColumnIndex::Empty { .. } => {}
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
ColumnIndex::Optional(optional_index) => {
for (i, docid) in docids.iter().enumerate() {
output[i] = optional_index
.rank_if_exists(*docid)
.map(|rowid| self.values.get_val(rowid));
}
}
ColumnIndex::Multivalued(multivalued_index) => {
for (i, docid) in docids.iter().enumerate() {
let range = multivalued_index.range(*docid);
let is_empty = range.start == range.end;
if !is_empty {
output[i] = Some(self.values.get_val(range.start));
}
}
}
}
}
/// Translates a block of docids to row_ids. /// Translates a block of docids to row_ids.
/// ///
/// returns the row_ids and the matching docids on the same index /// returns the row_ids and the matching docids on the same index

View File

@@ -726,24 +726,22 @@ mod tests {
.column_opt::<DateTime>("multi_date") .column_opt::<DateTime>("multi_date")
.unwrap() .unwrap()
.unwrap(); .unwrap();
{ {
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64); assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64);
dates.extend(dates_fast_field.values_for_doc(0)); let dates: Vec<DateTime> = dates_fast_field.values_for_doc(0u32).collect();
assert_eq!(dates.len(), 2); assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_nanos(), 2i64); assert_eq!(dates[0].into_timestamp_nanos(), 2i64);
assert_eq!(dates[1].into_timestamp_nanos(), 3i64); assert_eq!(dates[1].into_timestamp_nanos(), 3i64);
} }
{ {
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64); assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64);
dates.extend(dates_fast_field.values_for_doc(1)); let dates: Vec<DateTime> = dates_fast_field.values_for_doc(1u32).collect();
assert!(dates.is_empty()); assert!(dates.is_empty());
} }
{ {
let mut dates = Vec::new();
assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64); assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64);
dates.extend(dates_fast_field.values_for_doc(2)); let dates: Vec<DateTime> = dates_fast_field.values_for_doc(2u32).collect();
assert_eq!(dates.len(), 2); assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_nanos(), 5i64); assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
assert_eq!(dates[1].into_timestamp_nanos(), 6i64); assert_eq!(dates[1].into_timestamp_nanos(), 6i64);

View File

@@ -268,9 +268,7 @@ mod tests {
use crate::indexer::NoMergePolicy; use crate::indexer::NoMergePolicy;
use crate::query::range_query::fast_field_range_doc_set::RangeDocSet; use crate::query::range_query::fast_field_range_doc_set::RangeDocSet;
use crate::query::range_query::range_query::InvertedIndexRangeQuery; use crate::query::range_query::range_query::InvertedIndexRangeQuery;
use crate::query::{ use crate::query::{AllScorer, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser};
AllScorer, BitSetDocSet, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser,
};
use crate::schema::{ use crate::schema::{
Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT, Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT,
}; };

View File

@@ -18,7 +18,7 @@ pub struct TermWeight {
} }
enum TermOrEmptyOrAllScorer { enum TermOrEmptyOrAllScorer {
TermScorer(TermScorer), TermScorer(Box<TermScorer>),
Empty, Empty,
AllMatch(AllScorer), AllMatch(AllScorer),
} }
@@ -26,7 +26,7 @@ enum TermOrEmptyOrAllScorer {
impl TermOrEmptyOrAllScorer { impl TermOrEmptyOrAllScorer {
pub fn into_boxed_scorer(self) -> Box<dyn Scorer> { pub fn into_boxed_scorer(self) -> Box<dyn Scorer> {
match self { match self {
TermOrEmptyOrAllScorer::TermScorer(scorer) => Box::new(scorer), TermOrEmptyOrAllScorer::TermScorer(scorer) => scorer,
TermOrEmptyOrAllScorer::Empty => Box::new(EmptyScorer), TermOrEmptyOrAllScorer::Empty => Box::new(EmptyScorer),
TermOrEmptyOrAllScorer::AllMatch(scorer) => Box::new(scorer), TermOrEmptyOrAllScorer::AllMatch(scorer) => Box::new(scorer),
} }
@@ -48,9 +48,7 @@ impl Weight for TermWeight {
explanation.add_context(format!("Term={:?}", self.term,)); explanation.add_context(format!("Term={:?}", self.term,));
Ok(explanation) Ok(explanation)
} }
TermOrEmptyOrAllScorer::Empty => { TermOrEmptyOrAllScorer::Empty => Err(does_not_match(doc)),
return Err(does_not_match(doc));
}
TermOrEmptyOrAllScorer::AllMatch(_) => AllWeight.explain(reader, doc), TermOrEmptyOrAllScorer::AllMatch(_) => AllWeight.explain(reader, doc),
} }
} }
@@ -75,7 +73,7 @@ impl Weight for TermWeight {
) -> crate::Result<()> { ) -> crate::Result<()> {
match self.specialized_scorer(reader, 1.0)? { match self.specialized_scorer(reader, 1.0)? {
TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => { TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => {
for_each_scorer(&mut term_scorer, callback); for_each_scorer(&mut *term_scorer, callback);
} }
TermOrEmptyOrAllScorer::Empty => {} TermOrEmptyOrAllScorer::Empty => {}
TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => { TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => {
@@ -127,7 +125,7 @@ impl Weight for TermWeight {
match specialized_scorer { match specialized_scorer {
TermOrEmptyOrAllScorer::TermScorer(term_scorer) => { TermOrEmptyOrAllScorer::TermScorer(term_scorer) => {
crate::query::boolean_query::block_wand_single_scorer( crate::query::boolean_query::block_wand_single_scorer(
term_scorer, *term_scorer,
threshold, threshold,
callback, callback,
); );
@@ -173,7 +171,7 @@ impl TermWeight {
) -> crate::Result<Option<TermScorer>> { ) -> crate::Result<Option<TermScorer>> {
let scorer = self.specialized_scorer(reader, boost)?; let scorer = self.specialized_scorer(reader, boost)?;
Ok(match scorer { Ok(match scorer {
TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(scorer), TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(*scorer),
_ => None, _ => None,
}) })
} }
@@ -203,10 +201,8 @@ impl TermWeight {
let fieldnorm_reader = self.fieldnorm_reader(reader)?; let fieldnorm_reader = self.fieldnorm_reader(reader)?;
let similarity_weight = self.similarity_weight.boost_by(boost); let similarity_weight = self.similarity_weight.boost_by(boost);
Ok(TermOrEmptyOrAllScorer::TermScorer(TermScorer::new( Ok(TermOrEmptyOrAllScorer::TermScorer(Box::new(
segment_postings, TermScorer::new(segment_postings, fieldnorm_reader, similarity_weight),
fieldnorm_reader,
similarity_weight,
))) )))
} }