mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
* Revert "remove unused columnar api (#2742)"
This reverts commit 8725594d47.
* Clippy comment + removing fill_vals
---------
Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
This commit is contained in:
@@ -43,5 +43,26 @@ fn bench_group(mut runner: InputGroup<Column>) {
|
||||
}
|
||||
black_box(sum);
|
||||
});
|
||||
runner.register("access_first_vals", |column| {
|
||||
let mut sum = 0;
|
||||
const BLOCK_SIZE: usize = 32;
|
||||
let mut docs = vec![0; BLOCK_SIZE];
|
||||
let mut buffer = vec![None; BLOCK_SIZE];
|
||||
for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
|
||||
// fill docs
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for idx in 0..BLOCK_SIZE {
|
||||
docs[idx] = idx as u32 + i;
|
||||
}
|
||||
|
||||
column.first_vals(&docs, &mut buffer);
|
||||
for val in buffer.iter() {
|
||||
let Some(val) = val else { continue };
|
||||
sum += *val;
|
||||
}
|
||||
}
|
||||
|
||||
black_box(sum);
|
||||
});
|
||||
runner.run();
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ pub use serialize::{
|
||||
serialize_column_mappable_to_u128,
|
||||
};
|
||||
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::column_index::{ColumnIndex, Set};
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||
use crate::column_values::{ColumnValues, monotonic_map_column};
|
||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||
@@ -89,6 +89,31 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
self.values_for_doc(row_id).next()
|
||||
}
|
||||
|
||||
/// Load the first value for each docid in the provided slice.
|
||||
#[inline]
|
||||
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
|
||||
match &self.index {
|
||||
ColumnIndex::Empty { .. } => {}
|
||||
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
|
||||
ColumnIndex::Optional(optional_index) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
output[i] = optional_index
|
||||
.rank_if_exists(*docid)
|
||||
.map(|rowid| self.values.get_val(rowid));
|
||||
}
|
||||
}
|
||||
ColumnIndex::Multivalued(multivalued_index) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let range = multivalued_index.range(*docid);
|
||||
let is_empty = range.start == range.end;
|
||||
if !is_empty {
|
||||
output[i] = Some(self.values.get_val(range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a block of docids to row_ids.
|
||||
///
|
||||
/// returns the row_ids and the matching docids on the same index
|
||||
|
||||
@@ -726,24 +726,22 @@ mod tests {
|
||||
.column_opt::<DateTime>("multi_date")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let mut dates = Vec::new();
|
||||
assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64);
|
||||
dates.extend(dates_fast_field.values_for_doc(0));
|
||||
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(0u32).collect();
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_timestamp_nanos(), 2i64);
|
||||
assert_eq!(dates[1].into_timestamp_nanos(), 3i64);
|
||||
}
|
||||
{
|
||||
let mut dates = Vec::new();
|
||||
assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64);
|
||||
dates.extend(dates_fast_field.values_for_doc(1));
|
||||
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(1u32).collect();
|
||||
assert!(dates.is_empty());
|
||||
}
|
||||
{
|
||||
let mut dates = Vec::new();
|
||||
assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64);
|
||||
dates.extend(dates_fast_field.values_for_doc(2));
|
||||
let dates: Vec<DateTime> = dates_fast_field.values_for_doc(2u32).collect();
|
||||
assert_eq!(dates.len(), 2);
|
||||
assert_eq!(dates[0].into_timestamp_nanos(), 5i64);
|
||||
assert_eq!(dates[1].into_timestamp_nanos(), 6i64);
|
||||
|
||||
@@ -268,9 +268,7 @@ mod tests {
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::range_query::fast_field_range_doc_set::RangeDocSet;
|
||||
use crate::query::range_query::range_query::InvertedIndexRangeQuery;
|
||||
use crate::query::{
|
||||
AllScorer, BitSetDocSet, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser,
|
||||
};
|
||||
use crate::query::{AllScorer, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser};
|
||||
use crate::schema::{
|
||||
Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT,
|
||||
};
|
||||
|
||||
@@ -18,7 +18,7 @@ pub struct TermWeight {
|
||||
}
|
||||
|
||||
enum TermOrEmptyOrAllScorer {
|
||||
TermScorer(TermScorer),
|
||||
TermScorer(Box<TermScorer>),
|
||||
Empty,
|
||||
AllMatch(AllScorer),
|
||||
}
|
||||
@@ -26,7 +26,7 @@ enum TermOrEmptyOrAllScorer {
|
||||
impl TermOrEmptyOrAllScorer {
|
||||
pub fn into_boxed_scorer(self) -> Box<dyn Scorer> {
|
||||
match self {
|
||||
TermOrEmptyOrAllScorer::TermScorer(scorer) => Box::new(scorer),
|
||||
TermOrEmptyOrAllScorer::TermScorer(scorer) => scorer,
|
||||
TermOrEmptyOrAllScorer::Empty => Box::new(EmptyScorer),
|
||||
TermOrEmptyOrAllScorer::AllMatch(scorer) => Box::new(scorer),
|
||||
}
|
||||
@@ -48,9 +48,7 @@ impl Weight for TermWeight {
|
||||
explanation.add_context(format!("Term={:?}", self.term,));
|
||||
Ok(explanation)
|
||||
}
|
||||
TermOrEmptyOrAllScorer::Empty => {
|
||||
return Err(does_not_match(doc));
|
||||
}
|
||||
TermOrEmptyOrAllScorer::Empty => Err(does_not_match(doc)),
|
||||
TermOrEmptyOrAllScorer::AllMatch(_) => AllWeight.explain(reader, doc),
|
||||
}
|
||||
}
|
||||
@@ -75,7 +73,7 @@ impl Weight for TermWeight {
|
||||
) -> crate::Result<()> {
|
||||
match self.specialized_scorer(reader, 1.0)? {
|
||||
TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => {
|
||||
for_each_scorer(&mut term_scorer, callback);
|
||||
for_each_scorer(&mut *term_scorer, callback);
|
||||
}
|
||||
TermOrEmptyOrAllScorer::Empty => {}
|
||||
TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => {
|
||||
@@ -127,7 +125,7 @@ impl Weight for TermWeight {
|
||||
match specialized_scorer {
|
||||
TermOrEmptyOrAllScorer::TermScorer(term_scorer) => {
|
||||
crate::query::boolean_query::block_wand_single_scorer(
|
||||
term_scorer,
|
||||
*term_scorer,
|
||||
threshold,
|
||||
callback,
|
||||
);
|
||||
@@ -173,7 +171,7 @@ impl TermWeight {
|
||||
) -> crate::Result<Option<TermScorer>> {
|
||||
let scorer = self.specialized_scorer(reader, boost)?;
|
||||
Ok(match scorer {
|
||||
TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(scorer),
|
||||
TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(*scorer),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
@@ -203,10 +201,8 @@ impl TermWeight {
|
||||
|
||||
let fieldnorm_reader = self.fieldnorm_reader(reader)?;
|
||||
let similarity_weight = self.similarity_weight.boost_by(boost);
|
||||
Ok(TermOrEmptyOrAllScorer::TermScorer(TermScorer::new(
|
||||
segment_postings,
|
||||
fieldnorm_reader,
|
||||
similarity_weight,
|
||||
Ok(TermOrEmptyOrAllScorer::TermScorer(Box::new(
|
||||
TermScorer::new(segment_postings, fieldnorm_reader, similarity_weight),
|
||||
)))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user