diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 7848cf544..05e49a405 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -1,5 +1,7 @@ use std::ops::Range; +use crate::ColumnIter; + pub trait Column { /// Return the value associated to the given idx. /// @@ -16,8 +18,12 @@ pub trait Column { /// /// May panic if `range.end()` is greater than /// the segment's `maxdoc`. - fn get_range(&self, range: Range) -> Box + '_> { - Box::new(range.map(|idx| self.get_val(idx))) + #[inline] + fn get_range(&self, range: Range) -> ColumnIter<'_, Self, T> + where + Self: Sized, + { + ColumnIter::new(self, range) } /// Returns the minimum value for this fast field. diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 602c7fda0..d923938c4 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -4,6 +4,8 @@ extern crate more_asserts; use std::io; use std::io::Write; +use std::marker::PhantomData; +use std::ops::Range; use common::BinarySerializable; use ownedbytes::OwnedBytes; @@ -112,6 +114,41 @@ impl<'a> Column for &'a [u64] { } } +pub struct ColumnIter<'a, C: Column, I> { + column: &'a C, + end_pos: u64, + current_pos: u64, + _phantom: PhantomData, +} + +impl<'a, C: Column, I> ColumnIter<'a, C, I> { + #[inline] + pub fn new(col: &'a C, range: Range) -> Self { + let current_pos = range.start; + Self { + column: col, + end_pos: range.end, + current_pos, + _phantom: PhantomData, + } + } +} + +impl<'a, C: Column, I> Iterator for ColumnIter<'a, C, I> { + type Item = I; + + #[inline] + fn next(&mut self) -> Option { + if self.current_pos < self.end_pos { + let val = self.column.get_val(self.current_pos); + self.current_pos += 1; + Some(val) + } else { + None + } + } +} + impl Column for Vec { fn get_val(&self, position: u64) -> u64 { self[position as usize] diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index fc0049665..a57dd12be 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -982,6 +982,67 @@ mod bench { use super::*; use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr}; use crate::fastfield::tests::generate_permutation_gcd; + use crate::schema::{NumericOptions, Schema}; + use crate::Document; + + fn multi_values(num_docs: usize, vals_per_doc: usize) -> Vec> { + let mut vals = vec![]; + for _i in 0..num_docs { + let mut block = vec![]; + for j in 0..vals_per_doc { + block.push(j as u64); + } + vals.push(block); + } + + vals + } + + #[bench] + fn bench_multi_value_fflookup(b: &mut Bencher) { + let num_docs = 100_000; + + let path = Path::new("test"); + let directory: RamDirectory = RamDirectory::create(); + { + let options = NumericOptions::default().set_fast(Cardinality::MultiValues); + let mut schema_builder = Schema::builder(); + let field = schema_builder.add_u64_field("field", options); + let schema = schema_builder.build(); + + let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); + let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); + for block in &multi_values(num_docs, 3) { + let mut doc = Document::new(); + for val in block { + doc.add_u64(field, *val); + } + fast_field_writers.add_document(&doc); + } + fast_field_writers + .serialize(&mut serializer, &HashMap::new(), None) + .unwrap(); + serializer.close().unwrap(); + } + let file = directory.open_read(&path).unwrap(); + { + let fast_fields_composite = CompositeFile::open(&file).unwrap(); + let data_idx = fast_fields_composite.open_read_with_idx(*FIELD, 0).unwrap(); + let idx_reader = DynamicFastFieldReader::::open(data_idx).unwrap(); + + let data_vals = fast_fields_composite.open_read_with_idx(*FIELD, 1).unwrap(); + let vals_reader = DynamicFastFieldReader::::open(data_vals).unwrap(); + let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader); + b.iter(|| { + let mut sum = 0u64; + for i in 0u32..num_docs as u32 { + sum += fast_field_reader.get_vals(i).sum::(); + } + sum + }); + } + } #[bench] fn bench_intfastfield_linear_veclookup(b: &mut Bencher) { diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 8c3dea9ab..d1291e9fb 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -18,7 +18,9 @@ pub struct MultiValuedFastFieldReader { vals_reader: DynamicFastFieldReader, } -impl MultiValuedFastFieldReader { +impl MultiValuedFastFieldReader +where DynamicFastFieldReader: Column +{ pub(crate) fn open( idx_reader: DynamicFastFieldReader, vals_reader: DynamicFastFieldReader, @@ -41,7 +43,7 @@ impl MultiValuedFastFieldReader { /// Returns the array of values associated to the given `doc`. #[inline] - pub fn get_vals(&self, doc: DocId) -> Box + '_> { + pub fn get_vals(&self, doc: DocId) -> impl Iterator + '_ { let range = self.range(doc); self.vals_reader.get_range(range) } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 685488995..585cb59b9 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -40,40 +40,39 @@ impl DynamicFastFieldReader { mut bytes: OwnedBytes, codec_type: FastFieldCodecType, ) -> crate::Result> { - let reader = match codec_type { - FastFieldCodecType::Bitpacked => { - DynamicFastFieldReader::Bitpacked(BitpackedCodec::open_from_bytes(bytes)?.into()) - } - FastFieldCodecType::Linear => { - DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into()) - } - FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear( - BlockwiseLinearCodec::open_from_bytes(bytes)?.into(), - ), - FastFieldCodecType::Gcd => { - let codec_type = FastFieldCodecType::deserialize(&mut bytes)?; - match codec_type { - FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD( - open_gcd_from_bytes::(bytes)?.into(), - ), - FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD( - open_gcd_from_bytes::(bytes)?.into(), - ), - FastFieldCodecType::BlockwiseLinear => { - DynamicFastFieldReader::BlockwiseLinearGCD( - open_gcd_from_bytes::(bytes)?.into(), - ) - } - FastFieldCodecType::Gcd => { - return Err(DataCorruption::comment_only( + let reader = + match codec_type { + FastFieldCodecType::Bitpacked => DynamicFastFieldReader::Bitpacked( + BitpackedCodec::open_from_bytes(bytes)?.into(), + ), + FastFieldCodecType::Linear => { + DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into()) + } + FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear( + BlockwiseLinearCodec::open_from_bytes(bytes)?.into(), + ), + FastFieldCodecType::Gcd => { + let codec_type = FastFieldCodecType::deserialize(&mut bytes)?; + match codec_type { + FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD( + open_gcd_from_bytes::(bytes)?.into(), + ), + FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD( + open_gcd_from_bytes::(bytes)?.into(), + ), + FastFieldCodecType::BlockwiseLinear => { + DynamicFastFieldReader::BlockwiseLinearGCD( + open_gcd_from_bytes::(bytes)?.into(), + ) + } + FastFieldCodecType::Gcd => return Err(DataCorruption::comment_only( "Gcd codec wrapped into another gcd codec. This combination is not \ allowed.", ) - .into()) + .into()), } } - } - }; + }; Ok(reader) } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 31a3cfcf8..f394751dd 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -578,6 +578,7 @@ impl IndexMerger { stats: FastFieldStats, } impl<'a> Column for FieldIndexAccessProvider<'a> { + #[inline] fn get_val(&self, doc: u64) -> u64 { self.offsets[doc as usize] }