diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index 7bdc9d06b..01f3cc9d5 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -7,10 +7,11 @@ // Of course, you can have a look at the tantivy's built-in collectors // such as the `CountCollector` for more examples. +use fastfield_codecs::Column; // --- // Importing tantivy... use tantivy::collector::{Collector, SegmentCollector}; -use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader}; +use tantivy::fastfield::DynamicFastFieldReader; use tantivy::query::QueryParser; use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT}; use tantivy::{doc, Index, Score, SegmentReader}; @@ -103,7 +104,7 @@ impl SegmentCollector for StatsSegmentCollector { type Fruit = Option; fn collect(&mut self, doc: u32, _score: Score) { - let value = self.fast_field_reader.get(doc) as f64; + let value = self.fast_field_reader.get_val(doc as u64) as f64; self.stats.count += 1; self.stats.sum += value; self.stats.squared_sum += value * value; diff --git a/examples/warmer.rs b/examples/warmer.rs index 5ca3597d0..2f5ee56d2 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -2,8 +2,8 @@ use std::cmp::Reverse; use std::collections::{HashMap, HashSet}; use std::sync::{Arc, RwLock, Weak}; +use fastfield_codecs::Column; use tantivy::collector::TopDocs; -use tantivy::fastfield::FastFieldReader; use tantivy::query::QueryParser; use tantivy::schema::{Field, Schema, FAST, TEXT}; use tantivy::{ @@ -52,7 +52,7 @@ impl Warmer for DynamicPriceColumn { let product_id_reader = segment.fast_fields().u64(self.field)?; let product_ids: Vec = segment .doc_ids_alive() - .map(|doc| product_id_reader.get(doc)) + .map(|doc| product_id_reader.get_val(doc as u64)) .collect(); let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter(); let mut price_vals: Vec = Vec::new(); diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 4270877bd..7b696777a 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -4,7 +4,7 @@ use common::BinarySerializable; use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; -use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess}; +use crate::{Column, FastFieldCodec, FastFieldCodecType}; /// Depending on the field type, a different /// fast field is required. @@ -17,7 +17,7 @@ pub struct BitpackedReader { num_vals: u64, } -impl FastFieldDataAccess for BitpackedReader { +impl Column for BitpackedReader { #[inline] fn get_val(&self, doc: u64) -> u64 { self.min_value_u64 + self.bit_unpacker.get(doc, &self.data) @@ -124,10 +124,7 @@ impl FastFieldCodec for BitpackedCodec { /// It requires a `min_value` and a `max_value` to compute /// compute the minimum number of bits required to encode /// values. - fn serialize( - write: &mut impl Write, - fastfield_accessor: &dyn FastFieldDataAccess, - ) -> io::Result<()> { + fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> { let mut serializer = BitpackedSerializerLegacy::open( write, fastfield_accessor.min_value(), @@ -142,7 +139,7 @@ impl FastFieldCodec for BitpackedCodec { Ok(()) } - fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option { + fn estimate(fastfield_accessor: &impl Column) -> Option { let amplitude = fastfield_accessor.max_value() - fastfield_accessor.min_value(); let num_bits = compute_num_bits(amplitude); let num_bits_uncompressed = 64; diff --git a/fastfield_codecs/src/blockwise_linear.rs b/fastfield_codecs/src/blockwise_linear.rs index 619d1faca..64dde9b45 100644 --- a/fastfield_codecs/src/blockwise_linear.rs +++ b/fastfield_codecs/src/blockwise_linear.rs @@ -18,7 +18,7 @@ use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::linear::{get_calculated_value, get_slope}; -use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess}; +use crate::{Column, FastFieldCodec, FastFieldCodecType}; const CHUNK_SIZE: u64 = 512; @@ -146,7 +146,7 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio &interpolations[get_interpolation_position(doc)] } -impl FastFieldDataAccess for BlockwiseLinearReader { +impl Column for BlockwiseLinearReader { #[inline] fn get_val(&self, idx: u64) -> u64 { let interpolation = get_interpolation_function(idx, &self.footer.interpolations); @@ -195,10 +195,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { } /// Creates a new fast field serializer. - fn serialize( - write: &mut impl Write, - fastfield_accessor: &dyn FastFieldDataAccess, - ) -> io::Result<()> { + fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> { assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value()); let first_val = fastfield_accessor.get_val(0); @@ -292,7 +289,7 @@ impl FastFieldCodec for BlockwiseLinearCodec { /// estimation for linear interpolation is hard because, you don't know /// where the local maxima are for the deviation of the calculated value and /// the offset is also unknown. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option { + fn estimate(fastfield_accessor: &impl Column) -> Option { if fastfield_accessor.num_vals() < 10 * CHUNK_SIZE { return None; } diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs new file mode 100644 index 000000000..a62bee5b0 --- /dev/null +++ b/fastfield_codecs/src/column.rs @@ -0,0 +1,49 @@ +pub trait Column { + /// Return the value associated to the given idx. + /// + /// This accessor should return as fast as possible. + /// + /// # Panics + /// + /// May panic if `idx` is greater than the column length. + fn get_val(&self, idx: u64) -> T; + + /// Fills an output buffer with the fast field values + /// associated with the `DocId` going from + /// `start` to `start + output.len()`. + /// + /// Regardless of the type of `Item`, this method works + /// - transmuting the output array + /// - extracting the `Item`s as if they were `u64` + /// - possibly converting the `u64` value to the right type. + /// + /// # Panics + /// + /// May panic if `start + output.len()` is greater than + /// the segment's `maxdoc`. + fn get_range(&self, start: u64, output: &mut [T]) { + for (out, idx) in output.iter_mut().zip(start..) { + *out = self.get_val(idx); + } + } + + /// Returns the minimum value for this fast field. + /// + /// The min value does not take in account of possible + /// deleted document, and should be considered as a lower bound + /// of the actual minimum value. + fn min_value(&self) -> T; + + /// Returns the maximum value for this fast field. + /// + /// The max value does not take in account of possible + /// deleted document, and should be considered as an upper bound + /// of the actual maximum value + fn max_value(&self) -> T; + + fn num_vals(&self) -> u64; + /// Returns a iterator over the data + fn iter<'a>(&'a self) -> Box + 'a> { + Box::new((0..self.num_vals()).map(|idx| self.get_val(idx))) + } +} diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 172f7e0d9..9bd842bef 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -12,16 +12,9 @@ pub mod bitpacked; pub mod blockwise_linear; pub mod linear; -pub trait FastFieldDataAccess { - fn get_val(&self, doc: u64) -> u64; - fn min_value(&self) -> u64; - fn max_value(&self) -> u64; - fn num_vals(&self) -> u64; - /// Returns a iterator over the data - fn iter<'a>(&'a self) -> Box + 'a> { - Box::new((0..self.num_vals()).map(|idx| self.get_val(idx))) - } -} +mod column; + +pub use self::column::Column; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] #[repr(u8)] @@ -68,7 +61,7 @@ pub trait FastFieldCodec { /// used for debugging and de/serialization. const CODEC_TYPE: FastFieldCodecType; - type Reader: FastFieldDataAccess; + type Reader: Column; /// Reads the metadata and returns the CodecReader fn open_from_bytes(bytes: OwnedBytes) -> io::Result; @@ -77,10 +70,7 @@ pub trait FastFieldCodec { /// /// The fastfield_accessor iterator should be preferred over using fastfield_accessor for /// performance reasons. - fn serialize( - write: &mut impl Write, - fastfield_accessor: &dyn FastFieldDataAccess, - ) -> io::Result<()>; + fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()>; /// Returns an estimate of the compression ratio. /// If the codec is not applicable, returns `None`. @@ -89,7 +79,7 @@ pub trait FastFieldCodec { /// /// It could make sense to also return a value representing /// computational complexity. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option; + fn estimate(fastfield_accessor: &impl Column) -> Option; } #[derive(Debug, Clone)] @@ -100,7 +90,7 @@ pub struct FastFieldStats { pub num_vals: u64, } -impl<'a> FastFieldDataAccess for &'a [u64] { +impl<'a> Column for &'a [u64] { fn get_val(&self, position: u64) -> u64 { self[position as usize] } @@ -122,7 +112,7 @@ impl<'a> FastFieldDataAccess for &'a [u64] { } } -impl FastFieldDataAccess for Vec { +impl Column for Vec { fn get_val(&self, position: u64) -> u64 { self[position as usize] } diff --git a/fastfield_codecs/src/linear.rs b/fastfield_codecs/src/linear.rs index e49b202d8..87ba8098b 100644 --- a/fastfield_codecs/src/linear.rs +++ b/fastfield_codecs/src/linear.rs @@ -5,7 +5,7 @@ use common::{BinarySerializable, FixedSize}; use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; -use crate::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess}; +use crate::{Column, FastFieldCodec, FastFieldCodecType}; /// Depending on the field type, a different /// fast field is required. @@ -57,7 +57,7 @@ impl FixedSize for LinearFooter { const SIZE_IN_BYTES: usize = 56; } -impl FastFieldDataAccess for LinearReader { +impl Column for LinearReader { #[inline] fn get_val(&self, doc: u64) -> u64 { let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope); @@ -143,10 +143,7 @@ impl FastFieldCodec for LinearCodec { } /// Creates a new fast field serializer. - fn serialize( - write: &mut impl Write, - fastfield_accessor: &dyn FastFieldDataAccess, - ) -> io::Result<()> { + fn serialize(write: &mut impl Write, fastfield_accessor: &dyn Column) -> io::Result<()> { assert!(fastfield_accessor.min_value() <= fastfield_accessor.max_value()); let first_val = fastfield_accessor.get_val(0); @@ -196,7 +193,7 @@ impl FastFieldCodec for LinearCodec { /// estimation for linear interpolation is hard because, you don't know /// where the local maxima for the deviation of the calculated value are and /// the offset to shift all values to >=0 is also unknown. - fn estimate(fastfield_accessor: &impl FastFieldDataAccess) -> Option { + fn estimate(fastfield_accessor: &impl Column) -> Option { if fastfield_accessor.num_vals() < 3 { return None; // disable compressor for this case } diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index 6e9b0f9b4..7f60efcfa 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -1,6 +1,7 @@ use std::cmp::Ordering; use std::fmt::Display; +use fastfield_codecs::Column; use itertools::Itertools; use serde::{Deserialize, Serialize}; @@ -14,7 +15,7 @@ use crate::aggregation::intermediate_agg_result::{ IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry, }; use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader}; +use crate::fastfield::DynamicFastFieldReader; use crate::schema::Type; use crate::{DocId, TantivyError}; @@ -331,10 +332,10 @@ impl SegmentHistogramCollector { .expect("unexpected fast field cardinatility"); let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val0 = self.f64_from_fastfield_u64(accessor.get(docs[0])); - let val1 = self.f64_from_fastfield_u64(accessor.get(docs[1])); - let val2 = self.f64_from_fastfield_u64(accessor.get(docs[2])); - let val3 = self.f64_from_fastfield_u64(accessor.get(docs[3])); + let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0] as u64)); + let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1] as u64)); + let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2] as u64)); + let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3] as u64)); let bucket_pos0 = get_bucket_num(val0); let bucket_pos1 = get_bucket_num(val1); @@ -370,8 +371,8 @@ impl SegmentHistogramCollector { &bucket_with_accessor.sub_aggregation, )?; } - for doc in iter.remainder() { - let val = f64_from_fastfield_u64(accessor.get(*doc), &self.field_type); + for &doc in iter.remainder() { + let val = f64_from_fastfield_u64(accessor.get_val(doc as u64), &self.field_type); if !bounds.contains(val) { continue; } @@ -382,7 +383,7 @@ impl SegmentHistogramCollector { self.buckets[bucket_pos].key, get_bucket_val(val, self.interval, self.offset) as f64 ); - self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?; + self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?; } if force_flush { if let Some(sub_aggregations) = self.sub_aggregations.as_mut() { diff --git a/src/aggregation/bucket/range.rs b/src/aggregation/bucket/range.rs index bc381c7dc..1019c4294 100644 --- a/src/aggregation/bucket/range.rs +++ b/src/aggregation/bucket/range.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; use std::ops::Range; +use fastfield_codecs::Column; use fnv::FnvHashMap; use serde::{Deserialize, Serialize}; @@ -12,7 +13,6 @@ use crate::aggregation::intermediate_agg_result::{ }; use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector}; use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey}; -use crate::fastfield::FastFieldReader; use crate::schema::Type; use crate::{DocId, TantivyError}; @@ -264,10 +264,10 @@ impl SegmentRangeCollector { .as_single() .expect("unexpected fast field cardinatility"); for docs in iter.by_ref() { - let val1 = accessor.get(docs[0]); - let val2 = accessor.get(docs[1]); - let val3 = accessor.get(docs[2]); - let val4 = accessor.get(docs[3]); + let val1 = accessor.get_val(docs[0] as u64); + let val2 = accessor.get_val(docs[1] as u64); + let val3 = accessor.get_val(docs[2] as u64); + let val4 = accessor.get_val(docs[3] as u64); let bucket_pos1 = self.get_bucket_pos(val1); let bucket_pos2 = self.get_bucket_pos(val2); let bucket_pos3 = self.get_bucket_pos(val3); @@ -278,10 +278,10 @@ impl SegmentRangeCollector { self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?; self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?; } - for doc in iter.remainder() { - let val = accessor.get(*doc); + for &doc in iter.remainder() { + let val = accessor.get_val(doc as u64); let bucket_pos = self.get_bucket_pos(val); - self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?; + self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?; } if force_flush { for bucket in &mut self.buckets { diff --git a/src/aggregation/metric/average.rs b/src/aggregation/metric/average.rs index e5e5bb3f7..f10f713ee 100644 --- a/src/aggregation/metric/average.rs +++ b/src/aggregation/metric/average.rs @@ -1,9 +1,10 @@ use std::fmt::Debug; +use fastfield_codecs::Column; use serde::{Deserialize, Serialize}; use crate::aggregation::f64_from_fastfield_u64; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader}; +use crate::fastfield::DynamicFastFieldReader; use crate::schema::Type; use crate::DocId; @@ -60,10 +61,10 @@ impl SegmentAverageCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get(docs[0]); - let val2 = field.get(docs[1]); - let val3 = field.get(docs[2]); - let val4 = field.get(docs[3]); + let val1 = field.get_val(docs[0] as u64); + let val2 = field.get_val(docs[1] as u64); + let val3 = field.get_val(docs[2] as u64); + let val4 = field.get_val(docs[3] as u64); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -73,8 +74,8 @@ impl SegmentAverageCollector { self.data.collect(val3); self.data.collect(val4); } - for doc in iter.remainder() { - let val = field.get(*doc); + for &doc in iter.remainder() { + let val = field.get_val(doc as u64); let val = f64_from_fastfield_u64(val, &self.field_type); self.data.collect(val); } diff --git a/src/aggregation/metric/stats.rs b/src/aggregation/metric/stats.rs index edfec4788..702c09e51 100644 --- a/src/aggregation/metric/stats.rs +++ b/src/aggregation/metric/stats.rs @@ -1,7 +1,8 @@ +use fastfield_codecs::Column; use serde::{Deserialize, Serialize}; use crate::aggregation::f64_from_fastfield_u64; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader}; +use crate::fastfield::DynamicFastFieldReader; use crate::schema::Type; use crate::{DocId, TantivyError}; @@ -166,10 +167,10 @@ impl SegmentStatsCollector { pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader) { let mut iter = doc.chunks_exact(4); for docs in iter.by_ref() { - let val1 = field.get(docs[0]); - let val2 = field.get(docs[1]); - let val3 = field.get(docs[2]); - let val4 = field.get(docs[3]); + let val1 = field.get_val(docs[0] as u64); + let val2 = field.get_val(docs[1] as u64); + let val3 = field.get_val(docs[2] as u64); + let val4 = field.get_val(docs[3] as u64); let val1 = f64_from_fastfield_u64(val1, &self.field_type); let val2 = f64_from_fastfield_u64(val2, &self.field_type); let val3 = f64_from_fastfield_u64(val3, &self.field_type); @@ -179,8 +180,8 @@ impl SegmentStatsCollector { self.stats.collect(val3); self.stats.collect(val4); } - for doc in iter.remainder() { - let val = field.get(*doc); + for &doc in iter.remainder() { + let val = field.get_val(doc as u64); let val = f64_from_fastfield_u64(val, &self.field_type); self.stats.collect(val); } diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index b1dbaaa20..487385b28 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -11,8 +11,10 @@ // Importing tantivy... use std::marker::PhantomData; +use fastfield_codecs::Column; + use crate::collector::{Collector, SegmentCollector}; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue}; +use crate::fastfield::{DynamicFastFieldReader, FastValue}; use crate::schema::Field; use crate::{Score, SegmentReader, TantivyError}; @@ -174,7 +176,7 @@ where type Fruit = TSegmentCollector::Fruit; fn collect(&mut self, doc: u32, score: Score) { - let value = self.fast_field_reader.get(doc); + let value = self.fast_field_reader.get_val(doc as u64); if (self.predicate)(value) { self.segment_collector.collect(doc, score) } diff --git a/src/collector/histogram_collector.rs b/src/collector/histogram_collector.rs index c4dfba59a..e6d7fd41b 100644 --- a/src/collector/histogram_collector.rs +++ b/src/collector/histogram_collector.rs @@ -1,7 +1,8 @@ use fastdivide::DividerU64; +use fastfield_codecs::Column; use crate::collector::{Collector, SegmentCollector}; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue}; +use crate::fastfield::{DynamicFastFieldReader, FastValue}; use crate::schema::{Field, Type}; use crate::{DocId, Score}; @@ -91,7 +92,7 @@ impl SegmentCollector for SegmentHistogramCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let value = self.ff_reader.get(doc); + let value = self.ff_reader.get_val(doc as u64); self.histogram_computer.add_value(value); } diff --git a/src/collector/tests.rs b/src/collector/tests.rs index c6b03f362..102399716 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -1,7 +1,9 @@ +use fastfield_codecs::Column; + use super::*; use crate::collector::{Count, FilterCollector, TopDocs}; use crate::core::SegmentReader; -use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader}; +use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader}; use crate::query::{AllQuery, QueryParser}; use crate::schema::{Field, Schema, FAST, TEXT}; use crate::time::format_description::well_known::Rfc3339; @@ -197,7 +199,7 @@ impl SegmentCollector for FastFieldSegmentCollector { type Fruit = Vec; fn collect(&mut self, doc: DocId, _score: Score) { - let val = self.reader.get(doc); + let val = self.reader.get_val(doc as u64); self.vals.push(val); } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 0af6b0bdb..68ba8e8f5 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -2,6 +2,8 @@ use std::collections::BinaryHeap; use std::fmt; use std::marker::PhantomData; +use fastfield_codecs::Column; + use super::Collector; use crate::collector::custom_score_top_collector::CustomScoreTopCollector; use crate::collector::top_collector::{ComparableDoc, TopCollector, TopSegmentCollector}; @@ -9,7 +11,7 @@ use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector; use crate::collector::{ CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector, }; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue}; +use crate::fastfield::{DynamicFastFieldReader, FastValue}; use crate::query::Weight; use crate::schema::Field; use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError}; @@ -134,7 +136,7 @@ struct ScorerByFastFieldReader { impl CustomSegmentScorer for ScorerByFastFieldReader { fn score(&mut self, doc: DocId) -> u64 { - self.ff_reader.get(doc) + self.ff_reader.get_val(doc as u64) } } diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs index 6726202d6..859184887 100644 --- a/src/fastfield/bytes/reader.rs +++ b/src/fastfield/bytes/reader.rs @@ -1,5 +1,7 @@ +use fastfield_codecs::Column; + use crate::directory::{FileSlice, OwnedBytes}; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, MultiValueLength}; +use crate::fastfield::{DynamicFastFieldReader, MultiValueLength}; use crate::DocId; /// Reader for byte array fast fields @@ -28,8 +30,9 @@ impl BytesFastFieldReader { } fn range(&self, doc: DocId) -> (usize, usize) { - let start = self.idx_reader.get(doc) as usize; - let stop = self.idx_reader.get(doc + 1) as usize; + let idx = doc as u64; + let start = self.idx_reader.get_val(idx) as usize; + let stop = self.idx_reader.get_val(idx + 1) as usize; (start, stop) } diff --git a/src/fastfield/gcd.rs b/src/fastfield/gcd.rs index 84c46a47f..1e5fa1a19 100644 --- a/src/fastfield/gcd.rs +++ b/src/fastfield/gcd.rs @@ -3,7 +3,7 @@ use std::num::NonZeroU64; use common::BinarySerializable; use fastdivide::DividerU64; -use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess}; +use fastfield_codecs::{Column, FastFieldCodec}; use ownedbytes::OwnedBytes; pub const GCD_DEFAULT: u64 = 1; @@ -12,7 +12,7 @@ pub const GCD_DEFAULT: u64 = 1; /// /// Holds the data and the codec to the read the data. #[derive(Clone)] -pub struct GCDReader { +pub struct GCDReader { gcd_params: GCDParams, reader: CodecReader, } @@ -60,7 +60,7 @@ pub fn open_gcd_from_bytes( Ok(GCDReader { gcd_params, reader }) } -impl FastFieldDataAccess for GCDReader { +impl Column for GCDReader { #[inline] fn get_val(&self, doc: u64) -> u64 { let val = self.reader.get_val(doc); @@ -137,6 +137,7 @@ mod tests { use std::time::{Duration, SystemTime}; use common::HasLen; + use fastfield_codecs::Column; use crate::directory::{CompositeFile, RamDirectory, WritePtr}; use crate::fastfield::gcd::compute_gcd; @@ -144,7 +145,7 @@ mod tests { use crate::fastfield::tests::{FIELD, FIELDI64, SCHEMA, SCHEMAI64}; use crate::fastfield::{ find_gcd, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldCodecType, - FastFieldReader, FastFieldsWriter, ALL_CODECS, + FastFieldsWriter, ALL_CODECS, }; use crate::schema::{Cardinality, Schema}; use crate::{DateOptions, DatePrecision, DateTime, Directory}; @@ -188,9 +189,9 @@ mod tests { let file = composite_file.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; - assert_eq!(fast_field_reader.get(0), -4000i64); - assert_eq!(fast_field_reader.get(1), -3000i64); - assert_eq!(fast_field_reader.get(2), -2000i64); + assert_eq!(fast_field_reader.get_val(0), -4000i64); + assert_eq!(fast_field_reader.get_val(1), -3000i64); + assert_eq!(fast_field_reader.get_val(2), -2000i64); assert_eq!(fast_field_reader.max_value(), (num_vals as i64 - 5) * 1000); assert_eq!(fast_field_reader.min_value(), -4000i64); let file = directory.open_read(path).unwrap(); @@ -229,9 +230,9 @@ mod tests { let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; - assert_eq!(fast_field_reader.get(0), 1000u64); - assert_eq!(fast_field_reader.get(1), 2000u64); - assert_eq!(fast_field_reader.get(2), 3000u64); + assert_eq!(fast_field_reader.get_val(0), 1000u64); + assert_eq!(fast_field_reader.get_val(1), 2000u64); + assert_eq!(fast_field_reader.get_val(2), 3000u64); assert_eq!(fast_field_reader.max_value(), num_vals as u64 * 1000); assert_eq!(fast_field_reader.min_value(), 1000u64); let file = directory.open_read(path).unwrap(); @@ -258,9 +259,9 @@ mod tests { #[test] pub fn test_fastfield2() { let test_fastfield = DynamicFastFieldReader::::from(vec![100, 200, 300]); - assert_eq!(test_fastfield.get(0), 100); - assert_eq!(test_fastfield.get(1), 200); - assert_eq!(test_fastfield.get(2), 300); + assert_eq!(test_fastfield.get_val(0), 100); + assert_eq!(test_fastfield.get_val(1), 200); + assert_eq!(test_fastfield.get_val(2), 300); } #[test] @@ -325,9 +326,9 @@ mod tests { let len = file.len(); let test_fastfield = DynamicFastFieldReader::::open(file)?; - assert_eq!(test_fastfield.get(0), time1.truncate(precision)); - assert_eq!(test_fastfield.get(1), time2.truncate(precision)); - assert_eq!(test_fastfield.get(2), time3.truncate(precision)); + assert_eq!(test_fastfield.get_val(0), time1.truncate(precision)); + assert_eq!(test_fastfield.get_val(1), time2.truncate(precision)); + assert_eq!(test_fastfield.get_val(2), time3.truncate(precision)); Ok(len) } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index c76cd7e4e..fd94f2404 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -28,10 +28,10 @@ pub use self::error::{FastFieldNotAvailableError, Result}; pub use self::facet_reader::FacetReader; pub(crate) use self::gcd::{find_gcd, GCDReader, GCD_DEFAULT}; pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter}; -pub use self::reader::{DynamicFastFieldReader, FastFieldReader}; +pub use self::reader::DynamicFastFieldReader; pub use self::readers::FastFieldReaders; pub(crate) use self::readers::{type_and_cardinality, FastType}; -pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats}; +pub use self::serializer::{Column, CompositeFastFieldSerializer, FastFieldStats}; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use crate::schema::{Cardinality, FieldType, Type, Value}; use crate::{DateTime, DocId}; @@ -298,9 +298,9 @@ mod tests { #[test] pub fn test_fastfield() { let test_fastfield = DynamicFastFieldReader::::from(vec![100, 200, 300]); - assert_eq!(test_fastfield.get(0), 100); - assert_eq!(test_fastfield.get(1), 200); - assert_eq!(test_fastfield.get(2), 300); + assert_eq!(test_fastfield.get_val(0u64), 100); + assert_eq!(test_fastfield.get_val(1u64), 200); + assert_eq!(test_fastfield.get_val(2u64), 300); } #[test] @@ -330,9 +330,9 @@ mod tests { let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; - assert_eq!(fast_field_reader.get(0), 13u64); - assert_eq!(fast_field_reader.get(1), 14u64); - assert_eq!(fast_field_reader.get(2), 2u64); + assert_eq!(fast_field_reader.get_val(0), 13u64); + assert_eq!(fast_field_reader.get_val(1), 14u64); + assert_eq!(fast_field_reader.get_val(2), 2u64); Ok(()) } @@ -362,15 +362,15 @@ mod tests { let fast_fields_composite = CompositeFile::open(&file)?; let data = fast_fields_composite.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(data)?; - assert_eq!(fast_field_reader.get(0), 4u64); - assert_eq!(fast_field_reader.get(1), 14_082_001u64); - assert_eq!(fast_field_reader.get(2), 3_052u64); - assert_eq!(fast_field_reader.get(3), 9002u64); - assert_eq!(fast_field_reader.get(4), 15_001u64); - assert_eq!(fast_field_reader.get(5), 777u64); - assert_eq!(fast_field_reader.get(6), 1_002u64); - assert_eq!(fast_field_reader.get(7), 1_501u64); - assert_eq!(fast_field_reader.get(8), 215u64); + assert_eq!(fast_field_reader.get_val(0), 4u64); + assert_eq!(fast_field_reader.get_val(1), 14_082_001u64); + assert_eq!(fast_field_reader.get_val(2), 3_052u64); + assert_eq!(fast_field_reader.get_val(3), 9002u64); + assert_eq!(fast_field_reader.get_val(4), 15_001u64); + assert_eq!(fast_field_reader.get_val(5), 777u64); + assert_eq!(fast_field_reader.get_val(6), 1_002u64); + assert_eq!(fast_field_reader.get_val(7), 1_501u64); + assert_eq!(fast_field_reader.get_val(8), 215u64); } Ok(()) } @@ -399,7 +399,7 @@ mod tests { let data = fast_fields_composite.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(data)?; for doc in 0..10_000 { - assert_eq!(fast_field_reader.get(doc), 100_000u64); + assert_eq!(fast_field_reader.get_val(doc), 100_000u64); } } Ok(()) @@ -430,10 +430,10 @@ mod tests { let fast_fields_composite = CompositeFile::open(&file)?; let data = fast_fields_composite.open_read(*FIELD).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(data)?; - assert_eq!(fast_field_reader.get(0), 0u64); + assert_eq!(fast_field_reader.get_val(0), 0u64); for doc in 1..10_001 { assert_eq!( - fast_field_reader.get(doc), + fast_field_reader.get_val(doc), 5_000_000_000_000_000_000u64 + doc as u64 - 1u64 ); } @@ -475,7 +475,7 @@ mod tests { assert_eq!(fast_field_reader.min_value(), -100i64); assert_eq!(fast_field_reader.max_value(), 9_999i64); for (doc, i) in (-100i64..10_000i64).enumerate() { - assert_eq!(fast_field_reader.get(doc as u32), i); + assert_eq!(fast_field_reader.get_val(doc as u64), i); } let mut buffer = vec![0i64; 100]; fast_field_reader.get_range(53, &mut buffer[..]); @@ -511,7 +511,7 @@ mod tests { let fast_fields_composite = CompositeFile::open(&file).unwrap(); let data = fast_fields_composite.open_read(i64_field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(data)?; - assert_eq!(fast_field_reader.get(0u32), 0i64); + assert_eq!(fast_field_reader.get_val(0), 0i64); } Ok(()) } @@ -551,7 +551,7 @@ mod tests { let fast_field_reader = DynamicFastFieldReader::::open(data)?; for a in 0..n { - assert_eq!(fast_field_reader.get(a as u32), permutation[a as usize]); + assert_eq!(fast_field_reader.get_val(a as u64), permutation[a as usize]); } } Ok(()) @@ -842,19 +842,19 @@ mod tests { let dates_fast_field = fast_fields.dates(multi_date_field).unwrap(); let mut dates = vec![]; { - assert_eq!(date_fast_field.get(0u32).into_timestamp_micros(), 1i64); + assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64); dates_fast_field.get_vals(0u32, &mut dates); assert_eq!(dates.len(), 2); assert_eq!(dates[0].into_timestamp_micros(), 2i64); assert_eq!(dates[1].into_timestamp_micros(), 3i64); } { - assert_eq!(date_fast_field.get(1u32).into_timestamp_micros(), 4i64); + assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64); dates_fast_field.get_vals(1u32, &mut dates); assert!(dates.is_empty()); } { - assert_eq!(date_fast_field.get(2u32).into_timestamp_micros(), 0i64); + assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64); dates_fast_field.get_vals(2u32, &mut dates); assert_eq!(dates.len(), 2); assert_eq!(dates[0].into_timestamp_micros(), 5i64); @@ -866,10 +866,10 @@ mod tests { #[test] pub fn test_fastfield_bool() { let test_fastfield = DynamicFastFieldReader::::from(vec![true, false, true, false]); - assert_eq!(test_fastfield.get(0), true); - assert_eq!(test_fastfield.get(1), false); - assert_eq!(test_fastfield.get(2), true); - assert_eq!(test_fastfield.get(3), false); + assert_eq!(test_fastfield.get_val(0), true); + assert_eq!(test_fastfield.get_val(1), false); + assert_eq!(test_fastfield.get_val(2), true); + assert_eq!(test_fastfield.get_val(3), false); } #[test] @@ -900,10 +900,10 @@ mod tests { let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; - assert_eq!(fast_field_reader.get(0), true); - assert_eq!(fast_field_reader.get(1), false); - assert_eq!(fast_field_reader.get(2), true); - assert_eq!(fast_field_reader.get(3), false); + assert_eq!(fast_field_reader.get_val(0), true); + assert_eq!(fast_field_reader.get_val(1), false); + assert_eq!(fast_field_reader.get_val(2), true); + assert_eq!(fast_field_reader.get_val(3), false); Ok(()) } @@ -937,8 +937,8 @@ mod tests { let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; for i in 0..25 { - assert_eq!(fast_field_reader.get(i * 2), true); - assert_eq!(fast_field_reader.get(i * 2 + 1), false); + assert_eq!(fast_field_reader.get_val(i * 2), true); + assert_eq!(fast_field_reader.get_val(i * 2 + 1), false); } Ok(()) @@ -970,7 +970,7 @@ mod tests { let composite_file = CompositeFile::open(&file)?; let file = composite_file.open_read(field).unwrap(); let fast_field_reader = DynamicFastFieldReader::::open(file)?; - assert_eq!(fast_field_reader.get(0), false); + assert_eq!(fast_field_reader.get_val(0), false); Ok(()) } diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 70241429d..77fa9e73a 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -1,6 +1,8 @@ use std::ops::Range; -use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue, MultiValueLength}; +use fastfield_codecs::Column; + +use crate::fastfield::{DynamicFastFieldReader, FastValue, MultiValueLength}; use crate::DocId; /// Reader for a multivalued `u64` fast field. @@ -31,8 +33,9 @@ impl MultiValuedFastFieldReader { /// to the given document are `start..end`. #[inline] fn range(&self, doc: DocId) -> Range { - let start = self.idx_reader.get(doc); - let end = self.idx_reader.get(doc + 1); + let idx = doc as u64; + let start = self.idx_reader.get_val(idx); + let end = self.idx_reader.get_val(idx + 1); start..end } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 70da86e64..9f12d60a3 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -6,7 +6,7 @@ use common::BinarySerializable; use fastfield_codecs::bitpacked::{BitpackedCodec, BitpackedReader}; use fastfield_codecs::blockwise_linear::{BlockwiseLinearCodec, BlockwiseLinearReader}; use fastfield_codecs::linear::{LinearCodec, LinearReader}; -use fastfield_codecs::{FastFieldCodec, FastFieldCodecType, FastFieldDataAccess}; +use fastfield_codecs::{Column, FastFieldCodec, FastFieldCodecType}; use super::gcd::open_gcd_from_bytes; use super::FastValue; @@ -14,48 +14,6 @@ use crate::directory::{CompositeFile, Directory, FileSlice, OwnedBytes, RamDirec use crate::error::DataCorruption; use crate::fastfield::{CompositeFastFieldSerializer, FastFieldsWriter, GCDReader}; use crate::schema::{Schema, FAST}; -use crate::DocId; - -/// FastFieldReader is the trait to access fast field data. -pub trait FastFieldReader: Clone { - /// Return the value associated to the given document. - /// - /// This accessor should return as fast as possible. - /// - /// # Panics - /// - /// May panic if `doc` is greater than the segment - fn get(&self, doc: DocId) -> Item; - - /// Fills an output buffer with the fast field values - /// associated with the `DocId` going from - /// `start` to `start + output.len()`. - /// - /// Regardless of the type of `Item`, this method works - /// - transmuting the output array - /// - extracting the `Item`s as if they were `u64` - /// - possibly converting the `u64` value to the right type. - /// - /// # Panics - /// - /// May panic if `start + output.len()` is greater than - /// the segment's `maxdoc`. - fn get_range(&self, start: u64, output: &mut [Item]); - - /// Returns the minimum value for this fast field. - /// - /// The min value does not take in account of possible - /// deleted document, and should be considered as a lower bound - /// of the actual minimum value. - fn min_value(&self) -> Item; - - /// Returns the maximum value for this fast field. - /// - /// The max value does not take in account of possible - /// deleted document, and should be considered as an upper bound - /// of the actual maximum value. - fn max_value(&self) -> Item; -} #[derive(Clone)] /// DynamicFastFieldReader wraps different readers to access @@ -127,16 +85,16 @@ impl DynamicFastFieldReader { } } -impl FastFieldReader for DynamicFastFieldReader { +impl Column for DynamicFastFieldReader { #[inline] - fn get(&self, doc: DocId) -> Item { + fn get_val(&self, idx: u64) -> Item { match self { - Self::Bitpacked(reader) => reader.get(doc), - Self::Linear(reader) => reader.get(doc), - Self::BlockwiseLinear(reader) => reader.get(doc), - Self::BitpackedGCD(reader) => reader.get(doc), - Self::LinearGCD(reader) => reader.get(doc), - Self::BlockwiseLinearGCD(reader) => reader.get(doc), + Self::Bitpacked(reader) => reader.get_val(idx), + Self::Linear(reader) => reader.get_val(idx), + Self::BlockwiseLinear(reader) => reader.get_val(idx), + Self::BitpackedGCD(reader) => reader.get_val(idx), + Self::LinearGCD(reader) => reader.get_val(idx), + Self::BlockwiseLinearGCD(reader) => reader.get_val(idx), } } #[inline] @@ -170,6 +128,17 @@ impl FastFieldReader for DynamicFastFieldReader { Self::BlockwiseLinearGCD(reader) => reader.max_value(), } } + + fn num_vals(&self) -> u64 { + match self { + Self::Bitpacked(reader) => reader.num_vals(), + Self::Linear(reader) => reader.num_vals(), + Self::BlockwiseLinear(reader) => reader.num_vals(), + Self::BitpackedGCD(reader) => reader.num_vals(), + Self::LinearGCD(reader) => reader.num_vals(), + Self::BlockwiseLinearGCD(reader) => reader.num_vals(), + } + } } /// Wrapper for accessing a fastfield. @@ -192,10 +161,10 @@ impl From } } -impl FastFieldReaderCodecWrapper { +impl FastFieldReaderCodecWrapper { #[inline] - pub(crate) fn get_u64(&self, doc: u64) -> Item { - let data = self.reader.get_val(doc); + pub(crate) fn get_u64(&self, idx: u64) -> Item { + let data = self.reader.get_val(idx); Item::from_u64(data) } @@ -218,9 +187,7 @@ impl FastFieldReaderCodecWrapper FastFieldReader - for FastFieldReaderCodecWrapper -{ +impl Column for FastFieldReaderCodecWrapper { /// Return the value associated to the given document. /// /// This accessor should return as fast as possible. @@ -229,8 +196,8 @@ impl FastFieldReader /// /// May panic if `doc` is greater than the segment // `maxdoc`. - fn get(&self, doc: DocId) -> Item { - self.get_u64(u64::from(doc)) + fn get_val(&self, idx: u64) -> Item { + self.get_u64(idx) } /// Fills an output buffer with the fast field values @@ -267,6 +234,10 @@ impl FastFieldReader fn max_value(&self) -> Item { Item::from_u64(self.reader.max_value()) } + + fn num_vals(&self) -> u64 { + self.reader.num_vals() + } } impl From> for DynamicFastFieldReader { diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index fbda73b5a..f4efea3cd 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -7,7 +7,7 @@ pub use fastfield_codecs::bitpacked::{BitpackedCodec, BitpackedSerializerLegacy} use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec; use fastfield_codecs::linear::LinearCodec; use fastfield_codecs::FastFieldCodecType; -pub use fastfield_codecs::{FastFieldCodec, FastFieldDataAccess, FastFieldStats}; +pub use fastfield_codecs::{Column, FastFieldCodec, FastFieldStats}; use super::{find_gcd, ALL_CODECS, GCD_DEFAULT}; use crate::directory::{CompositeWrite, WritePtr}; @@ -65,7 +65,7 @@ impl From for FastFieldCodecEnableCheck { // use this, when this is merged and stabilized explicit_generic_args_with_impl_trait // https://github.com/rust-lang/rust/pull/86176 fn codec_estimation( - fastfield_accessor: &impl FastFieldDataAccess, + fastfield_accessor: &impl Column, estimations: &mut Vec<(f32, FastFieldCodecType)>, ) { if let Some(ratio) = C::estimate(fastfield_accessor) { @@ -97,7 +97,7 @@ impl CompositeFastFieldSerializer { pub fn create_auto_detect_u64_fast_field( &mut self, field: Field, - fastfield_accessor: impl FastFieldDataAccess, + fastfield_accessor: impl Column, ) -> io::Result<()> { self.create_auto_detect_u64_fast_field_with_idx(field, fastfield_accessor, 0) } @@ -117,7 +117,7 @@ impl CompositeFastFieldSerializer { pub fn create_auto_detect_u64_fast_field_with_idx( &mut self, field: Field, - fastfield_accessor: impl FastFieldDataAccess, + fastfield_accessor: impl Column, idx: usize, ) -> io::Result<()> { let min_value = fastfield_accessor.min_value(); @@ -136,7 +136,7 @@ impl CompositeFastFieldSerializer { } Self::write_header(field_write, FastFieldCodecType::Gcd)?; - struct GCDWrappedFFAccess { + struct GCDWrappedFFAccess { fastfield_accessor: T, base_value: u64, max_value: u64, @@ -144,7 +144,7 @@ impl CompositeFastFieldSerializer { gcd: DividerU64, } - impl FastFieldDataAccess for GCDWrappedFFAccess { + impl Column for GCDWrappedFFAccess { fn get_val(&self, position: u64) -> u64 { self.gcd .divide(self.fastfield_accessor.get_val(position) - self.base_value) @@ -197,7 +197,7 @@ impl CompositeFastFieldSerializer { codec_enable_checker: FastFieldCodecEnableCheck, field: Field, field_write: &mut CountingWriter, - fastfield_accessor: impl FastFieldDataAccess, + fastfield_accessor: impl Column, ) -> io::Result<()> { let mut estimations = vec![]; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 6d0000e5c..c5b4c1b8b 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::io; use common; -use fastfield_codecs::FastFieldDataAccess; +use fastfield_codecs::Column; use fnv::FnvHashMap; use tantivy_bitpacker::BlockedBitpacker; @@ -384,7 +384,7 @@ struct WriterFastFieldAccessProvider<'map, 'bitp> { vals: &'bitp BlockedBitpacker, stats: FastFieldStats, } -impl<'map, 'bitp> FastFieldDataAccess for WriterFastFieldAccessProvider<'map, 'bitp> { +impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> { /// Return the value associated to the given doc. /// /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index 81b137253..914a144a7 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -143,8 +143,9 @@ pub(crate) fn get_doc_id_mapping_from_field( #[cfg(test)] mod tests_indexsorting { + use fastfield_codecs::Column; + use crate::collector::TopDocs; - use crate::fastfield::FastFieldReader; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::query::QueryParser; use crate::schema::{Schema, *}; @@ -464,9 +465,9 @@ mod tests_indexsorting { let my_number = index.schema().get_field("my_number").unwrap(); let fast_field = fast_fields.u64(my_number).unwrap(); - assert_eq!(fast_field.get(0u32), 10u64); - assert_eq!(fast_field.get(1u32), 20u64); - assert_eq!(fast_field.get(2u32), 30u64); + assert_eq!(fast_field.get_val(0), 10u64); + assert_eq!(fast_field.get_val(1), 20u64); + assert_eq!(fast_field.get_val(2), 30u64); let multi_numbers = index.schema().get_field("multi_numbers").unwrap(); let multifield = fast_fields.u64s(multi_numbers).unwrap(); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index b4d5f187f..dcc9cba9e 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -777,6 +777,7 @@ impl Drop for IndexWriter { mod tests { use std::collections::{HashMap, HashSet}; + use fastfield_codecs::Column; use proptest::prelude::*; use proptest::prop_oneof; use proptest::strategy::Strategy; @@ -785,7 +786,6 @@ mod tests { use crate::collector::TopDocs; use crate::directory::error::LockError; use crate::error::*; - use crate::fastfield::FastFieldReader; use crate::indexer::NoMergePolicy; use crate::query::{QueryParser, TermQuery}; use crate::schema::{ @@ -1327,7 +1327,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get(doc)) + .map(|doc| fast_field_reader.get_val(doc as u64)) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]); Ok(()) @@ -1493,7 +1493,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get(doc)) + .map(move |doc| ff_reader.get_val(doc as u64)) }) .collect(); @@ -1504,7 +1504,7 @@ mod tests { let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); segment_reader .doc_ids_alive() - .map(move |doc| ff_reader.get(doc)) + .map(move |doc| ff_reader.get_val(doc as u64)) }) .collect(); @@ -1622,7 +1622,7 @@ mod tests { facet_reader .facet_from_ord(facet_ords[0], &mut facet) .unwrap(); - let id = ff_reader.get(doc_id); + let id = ff_reader.get_val(doc_id as u64); let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); assert_eq!(facet, facet_expected); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index d1c71dae7..a3d6f9408 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -4,14 +4,13 @@ use std::sync::Arc; use itertools::Itertools; use measure_time::debug_time; -use tantivy_bitpacker::minmax; use crate::core::{Segment, SegmentReader}; use crate::docset::{DocSet, TERMINATED}; use crate::error::DataCorruption; use crate::fastfield::{ - AliveBitSet, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldDataAccess, - FastFieldReader, FastFieldStats, MultiValueLength, MultiValuedFastFieldReader, + AliveBitSet, Column, CompositeFastFieldSerializer, DynamicFastFieldReader, FastFieldStats, + MultiValueLength, MultiValuedFastFieldReader, }; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; use crate::indexer::doc_id_mapping::{expect_field_id_for_sort_field, SegmentDocIdMapping}; @@ -88,7 +87,7 @@ pub struct IndexMerger { } fn compute_min_max_val( - u64_reader: &impl FastFieldReader, + u64_reader: &impl Column, segment_reader: &SegmentReader, ) -> Option<(u64, u64)> { if segment_reader.max_doc() == 0 { @@ -102,11 +101,11 @@ fn compute_min_max_val( } // some deleted documents, // we need to recompute the max / min - minmax( - segment_reader - .doc_ids_alive() - .map(|doc_id| u64_reader.get(doc_id)), - ) + segment_reader + .doc_ids_alive() + .map(|doc_id| u64_reader.get_val(doc_id as u64)) + .minmax() + .into_option() } struct TermOrdinalMapping { @@ -376,13 +375,13 @@ impl IndexMerger { fast_field_readers: &'a Vec>, stats: FastFieldStats, } - impl<'a> FastFieldDataAccess for SortedDocIdFieldAccessProvider<'a> { + impl<'a> Column for SortedDocIdFieldAccessProvider<'a> { fn get_val(&self, doc: u64) -> u64 { let DocAddress { doc_id, segment_ord, } = self.doc_id_mapping.get_old_doc_addr(doc as u32); - self.fast_field_readers[segment_ord as usize].get(doc_id) + self.fast_field_readers[segment_ord as usize].get_val(doc_id as u64) } fn iter(&self) -> Box + '_> { @@ -392,7 +391,7 @@ impl IndexMerger { .map(|old_doc_addr| { let fast_field_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; - fast_field_reader.get(old_doc_addr.doc_id) + fast_field_reader.get_val(old_doc_addr.doc_id as u64) }), ) } @@ -429,7 +428,7 @@ impl IndexMerger { let everything_is_in_order = reader_ordinal_and_field_accessors .into_iter() - .map(|reader| reader.1) + .map(|(_, col)| Arc::new(col)) .tuple_windows() .all(|(field_accessor1, field_accessor2)| { if sort_by_field.order.is_asc() { @@ -444,7 +443,7 @@ impl IndexMerger { pub(crate) fn get_sort_field_accessor( reader: &SegmentReader, sort_by_field: &IndexSortByField, - ) -> crate::Result> { + ) -> crate::Result { let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required let value_accessor = reader.fast_fields().u64_lenient(field_id)?; Ok(value_accessor) @@ -453,7 +452,7 @@ impl IndexMerger { pub(crate) fn get_reader_with_sort_field_accessor( &self, sort_by_field: &IndexSortByField, - ) -> crate::Result + Clone)>> { + ) -> crate::Result> { let reader_ordinal_and_field_accessors = self .readers .iter() @@ -506,8 +505,8 @@ impl IndexMerger { doc_id_reader_pair .into_iter() .kmerge_by(|a, b| { - let val1 = a.2.get(a.0); - let val2 = b.2.get(b.0); + let val1 = a.2.get_val(a.0 as u64); + let val2 = b.2.get_val(b.0 as u64); if sort_by_field.order == Order::Asc { val1 < val2 } else { @@ -578,7 +577,7 @@ impl IndexMerger { offsets: &'a [u64], stats: FastFieldStats, } - impl<'a> FastFieldDataAccess for FieldIndexAccessProvider<'a> { + impl<'a> Column for FieldIndexAccessProvider<'a> { fn get_val(&self, doc: u64) -> u64 { self.offsets[doc as usize] } @@ -778,7 +777,7 @@ impl IndexMerger { offsets: Vec, stats: FastFieldStats, } - impl<'a> FastFieldDataAccess for SortedDocIdMultiValueAccessProvider<'a> { + impl<'a> Column for SortedDocIdMultiValueAccessProvider<'a> { fn get_val(&self, pos: u64) -> u64 { // use the offsets index to find the doc_id which will contain the position. // the offsets are strictly increasing so we can do a simple search on it. @@ -1200,6 +1199,7 @@ impl IndexMerger { #[cfg(test)] mod tests { use byteorder::{BigEndian, ReadBytesExt}; + use fastfield_codecs::Column; use schema::FAST; use crate::collector::tests::{ @@ -1207,7 +1207,6 @@ mod tests { }; use crate::collector::{Count, FacetCollector}; use crate::core::Index; - use crate::fastfield::FastFieldReader; use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery}; use crate::schema::{ Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term, diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index d8a80534c..a2805d5cc 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -1,8 +1,10 @@ #[cfg(test)] mod tests { + use fastfield_codecs::Column; + use crate::collector::TopDocs; use crate::core::Index; - use crate::fastfield::{AliveBitSet, FastFieldReader, MultiValuedFastFieldReader}; + use crate::fastfield::{AliveBitSet, MultiValuedFastFieldReader}; use crate::query::QueryParser; use crate::schema::{ self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, NumericOptions, @@ -186,17 +188,17 @@ mod tests { let fast_fields = segment_reader.fast_fields(); let fast_field = fast_fields.u64(int_field).unwrap(); - assert_eq!(fast_field.get(5u32), 1u64); - assert_eq!(fast_field.get(4u32), 2u64); - assert_eq!(fast_field.get(3u32), 3u64); + assert_eq!(fast_field.get_val(5), 1u64); + assert_eq!(fast_field.get_val(4), 2u64); + assert_eq!(fast_field.get_val(3), 3u64); if force_disjunct_segment_sort_values { - assert_eq!(fast_field.get(2u32), 20u64); - assert_eq!(fast_field.get(1u32), 100u64); + assert_eq!(fast_field.get_val(2u64), 20u64); + assert_eq!(fast_field.get_val(1u64), 100u64); } else { - assert_eq!(fast_field.get(2u32), 10u64); - assert_eq!(fast_field.get(1u32), 20u64); + assert_eq!(fast_field.get_val(2u64), 10u64); + assert_eq!(fast_field.get_val(1u64), 20u64); } - assert_eq!(fast_field.get(0u32), 1_000u64); + assert_eq!(fast_field.get_val(0u64), 1_000u64); // test new field norm mapping { @@ -373,12 +375,12 @@ mod tests { let fast_fields = segment_reader.fast_fields(); let fast_field = fast_fields.u64(int_field).unwrap(); - assert_eq!(fast_field.get(0u32), 1u64); - assert_eq!(fast_field.get(1u32), 2u64); - assert_eq!(fast_field.get(2u32), 3u64); - assert_eq!(fast_field.get(3u32), 10u64); - assert_eq!(fast_field.get(4u32), 20u64); - assert_eq!(fast_field.get(5u32), 1_000u64); + assert_eq!(fast_field.get_val(0), 1u64); + assert_eq!(fast_field.get_val(1), 2u64); + assert_eq!(fast_field.get_val(2), 3u64); + assert_eq!(fast_field.get_val(3), 10u64); + assert_eq!(fast_field.get_val(4), 20u64); + assert_eq!(fast_field.get_val(5), 1_000u64); let get_vals = |fast_field: &MultiValuedFastFieldReader, doc_id: u32| -> Vec { let mut vals = vec![]; diff --git a/src/lib.rs b/src/lib.rs index dbcd94f84..40ff11028 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -421,6 +421,7 @@ pub struct DocAddress { #[cfg(test)] pub mod tests { use common::{BinarySerializable, FixedSize}; + use fastfield_codecs::Column; use rand::distributions::{Bernoulli, Uniform}; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -429,7 +430,6 @@ pub mod tests { use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE; use crate::core::SegmentReader; use crate::docset::{DocSet, TERMINATED}; - use crate::fastfield::FastFieldReader; use crate::merge_policy::NoMergePolicy; use crate::query::BooleanQuery; use crate::schema::*; @@ -1036,21 +1036,21 @@ pub mod tests { let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned); assert!(fast_field_reader_opt.is_ok()); let fast_field_reader = fast_field_reader_opt.unwrap(); - assert_eq!(fast_field_reader.get(0), 4u64) + assert_eq!(fast_field_reader.get_val(0), 4u64) } { let fast_field_reader_res = segment_reader.fast_fields().i64(fast_field_signed); assert!(fast_field_reader_res.is_ok()); let fast_field_reader = fast_field_reader_res.unwrap(); - assert_eq!(fast_field_reader.get(0), 4i64) + assert_eq!(fast_field_reader.get_val(0), 4i64) } { let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float); assert!(fast_field_reader_res.is_ok()); let fast_field_reader = fast_field_reader_res.unwrap(); - assert_eq!(fast_field_reader.get(0), 4f64) + assert_eq!(fast_field_reader.get_val(0), 4f64) } Ok(()) }