From 25d44fcec811da8d2d47877975eda31e58b77639 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 26 Nov 2025 17:44:02 +0100 Subject: [PATCH] Revert "remove unused columnar api (#2742)" (#2748) * Revert "remove unused columnar api (#2742)" This reverts commit 8725594d477680460a8c281139e8376c37462fe7. * Clippy comment + removing fill_vals --------- Co-authored-by: Paul Masurel --- columnar/benches/bench_access.rs | 21 +++++++++++++++++++++ columnar/src/column/mod.rs | 27 ++++++++++++++++++++++++++- src/fastfield/mod.rs | 10 ++++------ src/query/range_query/range_query.rs | 4 +--- src/query/term_query/term_weight.rs | 20 ++++++++------------ 5 files changed, 60 insertions(+), 22 deletions(-) diff --git a/columnar/benches/bench_access.rs b/columnar/benches/bench_access.rs index e82e8cd78..397a35af0 100644 --- a/columnar/benches/bench_access.rs +++ b/columnar/benches/bench_access.rs @@ -43,5 +43,26 @@ fn bench_group(mut runner: InputGroup) { } black_box(sum); }); + runner.register("access_first_vals", |column| { + let mut sum = 0; + const BLOCK_SIZE: usize = 32; + let mut docs = vec![0; BLOCK_SIZE]; + let mut buffer = vec![None; BLOCK_SIZE]; + for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) { + // fill docs + #[allow(clippy::needless_range_loop)] + for idx in 0..BLOCK_SIZE { + docs[idx] = idx as u32 + i; + } + + column.first_vals(&docs, &mut buffer); + for val in buffer.iter() { + let Some(val) = val else { continue }; + sum += *val; + } + } + + black_box(sum); + }); runner.run(); } diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index c66ff31b9..cc2938bb8 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -14,7 +14,7 @@ pub use serialize::{ serialize_column_mappable_to_u128, }; -use crate::column_index::ColumnIndex; +use crate::column_index::{ColumnIndex, Set}; use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal; use crate::column_values::{ColumnValues, monotonic_map_column}; use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId}; @@ -89,6 +89,31 @@ impl Column { self.values_for_doc(row_id).next() } + /// Load the first value for each docid in the provided slice. + #[inline] + pub fn first_vals(&self, docids: &[DocId], output: &mut [Option]) { + match &self.index { + ColumnIndex::Empty { .. } => {} + ColumnIndex::Full => self.values.get_vals_opt(docids, output), + ColumnIndex::Optional(optional_index) => { + for (i, docid) in docids.iter().enumerate() { + output[i] = optional_index + .rank_if_exists(*docid) + .map(|rowid| self.values.get_val(rowid)); + } + } + ColumnIndex::Multivalued(multivalued_index) => { + for (i, docid) in docids.iter().enumerate() { + let range = multivalued_index.range(*docid); + let is_empty = range.start == range.end; + if !is_empty { + output[i] = Some(self.values.get_val(range.start)); + } + } + } + } + } + /// Translates a block of docids to row_ids. /// /// returns the row_ids and the matching docids on the same index diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index db1ce0d77..726b9b76a 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -726,24 +726,22 @@ mod tests { .column_opt::("multi_date") .unwrap() .unwrap(); + { - let mut dates = Vec::new(); assert_eq!(date_fast_field.get_val(0).into_timestamp_nanos(), 1i64); - dates.extend(dates_fast_field.values_for_doc(0)); + let dates: Vec = dates_fast_field.values_for_doc(0u32).collect(); assert_eq!(dates.len(), 2); assert_eq!(dates[0].into_timestamp_nanos(), 2i64); assert_eq!(dates[1].into_timestamp_nanos(), 3i64); } { - let mut dates = Vec::new(); assert_eq!(date_fast_field.get_val(1).into_timestamp_nanos(), 4i64); - dates.extend(dates_fast_field.values_for_doc(1)); + let dates: Vec = dates_fast_field.values_for_doc(1u32).collect(); assert!(dates.is_empty()); } { - let mut dates = Vec::new(); assert_eq!(date_fast_field.get_val(2).into_timestamp_nanos(), 0i64); - dates.extend(dates_fast_field.values_for_doc(2)); + let dates: Vec = dates_fast_field.values_for_doc(2u32).collect(); assert_eq!(dates.len(), 2); assert_eq!(dates[0].into_timestamp_nanos(), 5i64); assert_eq!(dates[1].into_timestamp_nanos(), 6i64); diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 7e9e691ec..5fe7f03ec 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -268,9 +268,7 @@ mod tests { use crate::indexer::NoMergePolicy; use crate::query::range_query::fast_field_range_doc_set::RangeDocSet; use crate::query::range_query::range_query::InvertedIndexRangeQuery; - use crate::query::{ - AllScorer, BitSetDocSet, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser, - }; + use crate::query::{AllScorer, ConstScorer, EmptyScorer, EnableScoring, Query, QueryParser}; use crate::schema::{ Field, IntoIpv6Addr, Schema, TantivyDocument, FAST, INDEXED, STORED, TEXT, }; diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 39f59d147..89b527cca 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -18,7 +18,7 @@ pub struct TermWeight { } enum TermOrEmptyOrAllScorer { - TermScorer(TermScorer), + TermScorer(Box), Empty, AllMatch(AllScorer), } @@ -26,7 +26,7 @@ enum TermOrEmptyOrAllScorer { impl TermOrEmptyOrAllScorer { pub fn into_boxed_scorer(self) -> Box { match self { - TermOrEmptyOrAllScorer::TermScorer(scorer) => Box::new(scorer), + TermOrEmptyOrAllScorer::TermScorer(scorer) => scorer, TermOrEmptyOrAllScorer::Empty => Box::new(EmptyScorer), TermOrEmptyOrAllScorer::AllMatch(scorer) => Box::new(scorer), } @@ -48,9 +48,7 @@ impl Weight for TermWeight { explanation.add_context(format!("Term={:?}", self.term,)); Ok(explanation) } - TermOrEmptyOrAllScorer::Empty => { - return Err(does_not_match(doc)); - } + TermOrEmptyOrAllScorer::Empty => Err(does_not_match(doc)), TermOrEmptyOrAllScorer::AllMatch(_) => AllWeight.explain(reader, doc), } } @@ -75,7 +73,7 @@ impl Weight for TermWeight { ) -> crate::Result<()> { match self.specialized_scorer(reader, 1.0)? { TermOrEmptyOrAllScorer::TermScorer(mut term_scorer) => { - for_each_scorer(&mut term_scorer, callback); + for_each_scorer(&mut *term_scorer, callback); } TermOrEmptyOrAllScorer::Empty => {} TermOrEmptyOrAllScorer::AllMatch(mut all_scorer) => { @@ -127,7 +125,7 @@ impl Weight for TermWeight { match specialized_scorer { TermOrEmptyOrAllScorer::TermScorer(term_scorer) => { crate::query::boolean_query::block_wand_single_scorer( - term_scorer, + *term_scorer, threshold, callback, ); @@ -173,7 +171,7 @@ impl TermWeight { ) -> crate::Result> { let scorer = self.specialized_scorer(reader, boost)?; Ok(match scorer { - TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(scorer), + TermOrEmptyOrAllScorer::TermScorer(scorer) => Some(*scorer), _ => None, }) } @@ -203,10 +201,8 @@ impl TermWeight { let fieldnorm_reader = self.fieldnorm_reader(reader)?; let similarity_weight = self.similarity_weight.boost_by(boost); - Ok(TermOrEmptyOrAllScorer::TermScorer(TermScorer::new( - segment_postings, - fieldnorm_reader, - similarity_weight, + Ok(TermOrEmptyOrAllScorer::TermScorer(Box::new( + TermScorer::new(segment_postings, fieldnorm_reader, similarity_weight), ))) }