diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index a162839c0..9276f167e 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -9,7 +9,7 @@ use std::sync::Arc; -use fastfield_codecs::Column; +use fastfield_codecs::OptionalColumn; // --- // Importing tantivy... use tantivy::collector::{Collector, SegmentCollector}; @@ -97,7 +97,7 @@ impl Collector for StatsCollector { } struct StatsSegmentCollector { - fast_field_reader: Arc>, + fast_field_reader: Arc>, stats: Stats, } @@ -105,10 +105,12 @@ impl SegmentCollector for StatsSegmentCollector { type Fruit = Option; fn collect(&mut self, doc: u32, _score: Score) { - let value = self.fast_field_reader.get_val(doc) as f64; - self.stats.count += 1; - self.stats.sum += value; - self.stats.squared_sum += value * value; + if let Some(value) = self.fast_field_reader.get_val(doc) { + let value = value as f64; + self.stats.count += 1; + self.stats.sum += value; + self.stats.squared_sum += value * value; + } } fn harvest(self) -> ::Fruit { diff --git a/examples/warmer.rs b/examples/warmer.rs index c9dc699f2..a68b1d9f7 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -51,7 +51,7 @@ impl Warmer for DynamicPriceColumn { let product_id_reader = segment.fast_fields().u64(self.field)?; let product_ids: Vec = segment .doc_ids_alive() - .map(|doc| product_id_reader.get_val(doc)) + .flat_map(|doc| product_id_reader.get_val(doc)) .collect(); let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter(); let mut price_vals: Vec = Vec::new(); diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index bc7a25359..d6985e2b4 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -103,7 +103,7 @@ mod tests { let iter_gen = || data.iter().cloned(); serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap(); let out = OwnedBytes::new(out); - open_u128::(out).unwrap() + open_u128::(out).unwrap().to_full().unwrap() } #[bench] diff --git a/fastfield_codecs/src/compact_space/mod.rs b/fastfield_codecs/src/compact_space/mod.rs index 3e744562c..a6c6d9d6a 100644 --- a/fastfield_codecs/src/compact_space/mod.rs +++ b/fastfield_codecs/src/compact_space/mod.rs @@ -731,7 +731,10 @@ mod tests { ]; let mut out = Vec::new(); serialize_u128(|| vals.iter().cloned(), vals.len() as u32, &mut out).unwrap(); - let decomp = open_u128::(OwnedBytes::new(out)).unwrap(); + let decomp = open_u128::(OwnedBytes::new(out)) + .unwrap() + .to_full() + .unwrap(); let complete_range = 0..vals.len() as u32; assert_eq!( diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 8e2415cb2..bb5c8898a 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -133,13 +133,16 @@ impl U128FastFieldCodecType { /// Returns the correct codec reader wrapped in the `Arc` for the data. pub fn open_u128( mut bytes: OwnedBytes, -) -> io::Result>> { +) -> io::Result>> { let header = U128Header::deserialize(&mut bytes)?; assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace); let reader = CompactSpaceDecompressor::open(bytes)?; let inverted: StrictlyMonotonicMappingInverter> = StrictlyMonotonicMappingToInternal::::new().into(); - Ok(Arc::new(monotonic_map_column(reader, inverted))) + + Ok(Arc::new(ToOptionalColumn::new(Arc::new( + monotonic_map_column(reader, inverted), + )))) } /// Returns the correct codec reader wrapped in the `Arc` for the data. diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 1c26bbe02..4127b30a6 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -113,7 +113,10 @@ fn bench_ip() { (data.len() * 8) as f32 / dataset.len() as f32 ); - let decompressor = open_u128::(OwnedBytes::new(data)).unwrap(); + let decompressor = open_u128::(OwnedBytes::new(data)) + .unwrap() + .to_full() + .unwrap(); // Sample some ranges let mut doc_values = Vec::new(); for value in dataset.iter().take(1110).skip(1100).cloned() { diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 2dd194245..46e3ab2f3 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use fastfield_codecs::Column; +use fastfield_codecs::OptionalColumn; use super::*; use crate::collector::{Count, FilterCollector, TopDocs}; @@ -160,7 +160,7 @@ pub struct FastFieldTestCollector { pub struct FastFieldSegmentCollector { vals: Vec, - reader: Arc>, + reader: Arc>, } impl FastFieldTestCollector { @@ -202,7 +202,9 @@ impl SegmentCollector for FastFieldSegmentCollector { fn collect(&mut self, doc: DocId, _score: Score) { let val = self.reader.get_val(doc); - self.vals.push(val); + if let Some(val) = val { + self.vals.push(val); + } } fn harvest(self) -> Vec { diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 57a82c691..8ac589e3d 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -460,7 +460,7 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId, original_score: Score| { - /// let popularity: u64 = popularity_reader.get_val(doc); + /// let popularity: u64 = popularity_reader.get_val(doc).unwrap(); /// // Well.. For the sake of the example we use a simple logarithm /// // function. /// let popularity_boost_score = ((2u64 + popularity) as Score).log2(); @@ -569,8 +569,8 @@ impl TopDocs { /// /// // We can now define our actual scoring function /// move |doc: DocId| { - /// let popularity: u64 = popularity_reader.get_val(doc); - /// let boosted: u64 = boosted_reader.get_val(doc); + /// let popularity: u64 = popularity_reader.get_val(doc).unwrap(); + /// let boosted: u64 = boosted_reader.get_val(doc).unwrap(); /// // Score do not have to be `f64` in tantivy. /// // Here we return a couple to get lexicographical order /// // for free. diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 7114fdbe9..5739b7b58 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -151,18 +151,15 @@ impl FastFieldReaders { /// Returns the `u64` fast field reader reader associated with `field`. /// /// If `field` is not a u64 fast field, this method returns an Error. - pub fn u64(&self, field: Field) -> crate::Result>> { + pub fn u64(&self, field: Field) -> crate::Result>> { self.check_type(field, FastType::U64, Cardinality::SingleValue)?; - Ok(self - .typed_fast_field_reader(field)? - .to_full() - .expect("temp migration solution")) + self.typed_fast_field_reader(field) } /// Returns the `ip` fast field reader reader associated to `field`. /// /// If `field` is not a u128 fast field, this method returns an Error. - pub fn ip_addr(&self, field: Field) -> crate::Result>> { + pub fn ip_addr(&self, field: Field) -> crate::Result>> { self.check_type(field, FastType::U128, Cardinality::SingleValue)?; let bytes = self.fast_field_data(field, 0)?.read_bytes()?; Ok(open_u128::(bytes)?) @@ -182,7 +179,9 @@ impl FastFieldReaders { .expect("multivalue fast fields are always full"); let bytes = self.fast_field_data(field, 1)?.read_bytes()?; - let vals_reader = open_u128::(bytes)?; + let vals_reader = open_u128::(bytes)? + .to_full() + .expect("multivalue fields are always full"); Ok(MultiValuedU128FastFieldReader::open( idx_reader, @@ -192,8 +191,9 @@ impl FastFieldReaders { /// Returns the `u128` fast field reader reader associated to `field`. /// - /// If `field` is not a u128 fast field, this method returns an Error. - pub(crate) fn u128(&self, field: Field) -> crate::Result>> { + /// If `field` is not a u128 base type fast field, this method returns an Error. + /// Ip addresses use u128 as base type. + pub(crate) fn u128(&self, field: Field) -> crate::Result>> { self.check_type(field, FastType::U128, Cardinality::SingleValue)?; let bytes = self.fast_field_data(field, 0)?.read_bytes()?; Ok(open_u128::(bytes)?) @@ -210,7 +210,9 @@ impl FastFieldReaders { .expect("multivalue fast fields are always full"); let bytes = self.fast_field_data(field, 1)?.read_bytes()?; - let vals_reader = open_u128::(bytes)?; + let vals_reader = open_u128::(bytes)? + .to_full() + .expect("multivalue fast fields are always full"); Ok(MultiValuedU128FastFieldReader::open( idx_reader, @@ -246,12 +248,9 @@ impl FastFieldReaders { /// Returns the `f64` fast field reader reader associated with `field`. /// /// If `field` is not a f64 fast field, this method returns an Error. - pub fn f64(&self, field: Field) -> crate::Result>> { + pub fn f64(&self, field: Field) -> crate::Result>> { self.check_type(field, FastType::F64, Cardinality::SingleValue)?; - Ok(self - .typed_fast_field_reader(field)? - .to_full() - .expect("temp migration solution")) + Ok(self.typed_fast_field_reader(field)?) } /// Returns the `bool` fast field reader reader associated with `field`. diff --git a/src/indexer/doc_id_mapping.rs b/src/indexer/doc_id_mapping.rs index 1f25af1ad..14a49f8f0 100644 --- a/src/indexer/doc_id_mapping.rs +++ b/src/indexer/doc_id_mapping.rs @@ -465,9 +465,9 @@ mod tests_indexsorting { let my_number = index.schema().get_field("my_number").unwrap(); let fast_field = fast_fields.u64(my_number).unwrap(); - assert_eq!(fast_field.get_val(0), 10u64); - assert_eq!(fast_field.get_val(1), 20u64); - assert_eq!(fast_field.get_val(2), 30u64); + assert_eq!(fast_field.get_val(0), Some(10u64)); + assert_eq!(fast_field.get_val(1), Some(20u64)); + assert_eq!(fast_field.get_val(2), Some(30u64)); let multi_numbers = index.schema().get_field("multi_numbers").unwrap(); let multifield = fast_fields.u64s(multi_numbers).unwrap(); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 8786ab6cd..8bac71a83 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1467,7 +1467,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc)) + .map(|doc| fast_field_reader.get_val(doc).unwrap()) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 1, 0]); Ok(()) @@ -1528,7 +1528,7 @@ mod tests { let fast_field_reader = segment_reader.fast_fields().u64(id_field)?; let in_order_alive_ids: Vec = segment_reader .doc_ids_alive() - .map(|doc| fast_field_reader.get_val(doc)) + .map(|doc| fast_field_reader.get_val(doc).unwrap()) .collect(); assert_eq!(&in_order_alive_ids[..], &[9, 8, 7, 6, 5, 4, 2, 0]); Ok(()) @@ -1777,7 +1777,12 @@ mod tests { .segment_readers() .iter() .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + let ff_reader = segment_reader + .fast_fields() + .u64(id_field) + .unwrap() + .to_full() + .unwrap(); segment_reader .doc_ids_alive() .map(move |doc| ff_reader.get_val(doc)) @@ -1788,7 +1793,12 @@ mod tests { .segment_readers() .iter() .flat_map(|segment_reader| { - let ff_reader = segment_reader.fast_fields().u64(id_field).unwrap(); + let ff_reader = segment_reader + .fast_fields() + .u64(id_field) + .unwrap() + .to_full() + .unwrap(); segment_reader .doc_ids_alive() .map(move |doc| ff_reader.get_val(doc)) @@ -1864,7 +1874,7 @@ mod tests { .flat_map(|segment_reader| { let ff_reader = segment_reader.fast_fields().ip_addr(ip_field).unwrap(); segment_reader.doc_ids_alive().flat_map(move |doc| { - let val = ff_reader.get_val(doc); + let val = ff_reader.get_val(doc).unwrap(); // TODO handle null if val == Ipv6Addr::from_u128(0) { // TODO Fix null handling None @@ -1921,7 +1931,7 @@ mod tests { ff_reader.get_vals(doc, &mut vals); assert_eq!(vals.len(), 2); assert_eq!(vals[0], vals[1]); - assert_eq!(id_reader.get_val(doc), vals[0]); + assert_eq!(id_reader.get_val(doc).unwrap(), vals[0]); let mut bool_vals = vec![]; bool_ff_reader.get_vals(doc, &mut bool_vals); @@ -2117,7 +2127,7 @@ mod tests { facet_reader .facet_from_ord(facet_ords[0], &mut facet) .unwrap(); - let id = ff_reader.get_val(doc_id); + let id = ff_reader.get_val(doc_id).unwrap(); let facet_expected = Facet::from(&("/cola/".to_string() + &id.to_string())); assert_eq!(facet, facet_expected); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index de2340600..30121e1e4 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -401,10 +401,15 @@ impl IndexMerger { .readers .iter() .map(|reader| { - let u128_reader: Arc> = reader.fast_fields().u128(field).expect( - "Failed to find a reader for single fast field. This is a tantivy bug and it \ - should never happen.", - ); + let u128_reader: Arc> = reader + .fast_fields() + .u128(field) + .expect( + "Failed to find a reader for single fast field. This is a tantivy bug and \ + it should never happen.", + ) + .to_full() + .expect("temp migration solution"); u128_reader }) .collect::>(); @@ -1372,16 +1377,16 @@ mod tests { .fast_fields() .u64(score_field) .unwrap(); - assert_eq!(score_field_reader.min_value(), 4000); - assert_eq!(score_field_reader.max_value(), 7000); + assert_eq!(score_field_reader.min_value(), Some(4000)); + assert_eq!(score_field_reader.max_value(), Some(7000)); let score_field_reader = searcher .segment_reader(1) .fast_fields() .u64(score_field) .unwrap(); - assert_eq!(score_field_reader.min_value(), 1); - assert_eq!(score_field_reader.max_value(), 3); + assert_eq!(score_field_reader.min_value(), Some(1)); + assert_eq!(score_field_reader.max_value(), Some(3)); } { // merging the segments @@ -1426,8 +1431,8 @@ mod tests { .fast_fields() .u64(score_field) .unwrap(); - assert_eq!(score_field_reader.min_value(), 3); - assert_eq!(score_field_reader.max_value(), 7000); + assert_eq!(score_field_reader.min_value(), Some(3)); + assert_eq!(score_field_reader.max_value(), Some(7000)); } { // test a commit with only deletes @@ -1473,8 +1478,8 @@ mod tests { .fast_fields() .u64(score_field) .unwrap(); - assert_eq!(score_field_reader.min_value(), 3); - assert_eq!(score_field_reader.max_value(), 7000); + assert_eq!(score_field_reader.min_value(), Some(3)); + assert_eq!(score_field_reader.max_value(), Some(7000)); } { // Test merging a single segment in order to remove deletes. @@ -1520,8 +1525,8 @@ mod tests { .fast_fields() .u64(score_field) .unwrap(); - assert_eq!(score_field_reader.min_value(), 6000); - assert_eq!(score_field_reader.max_value(), 7000); + assert_eq!(score_field_reader.min_value(), Some(6000)); + assert_eq!(score_field_reader.max_value(), Some(7000)); } { diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 700047480..95014cea2 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -186,17 +186,17 @@ mod tests { let fast_fields = segment_reader.fast_fields(); let fast_field = fast_fields.u64(int_field).unwrap(); - assert_eq!(fast_field.get_val(5), 1u64); - assert_eq!(fast_field.get_val(4), 2u64); - assert_eq!(fast_field.get_val(3), 3u64); + assert_eq!(fast_field.get_val(5), Some(1u64)); + assert_eq!(fast_field.get_val(4), Some(2u64)); + assert_eq!(fast_field.get_val(3), Some(3u64)); if force_disjunct_segment_sort_values { - assert_eq!(fast_field.get_val(2), 20u64); - assert_eq!(fast_field.get_val(1), 100u64); + assert_eq!(fast_field.get_val(2), Some(20u64)); + assert_eq!(fast_field.get_val(1), Some(100u64)); } else { - assert_eq!(fast_field.get_val(2), 10u64); - assert_eq!(fast_field.get_val(1), 20u64); + assert_eq!(fast_field.get_val(2), Some(10u64)); + assert_eq!(fast_field.get_val(1), Some(20u64)); } - assert_eq!(fast_field.get_val(0), 1_000u64); + assert_eq!(fast_field.get_val(0), Some(1_000u64)); // test new field norm mapping { @@ -373,12 +373,12 @@ mod tests { let fast_fields = segment_reader.fast_fields(); let fast_field = fast_fields.u64(int_field).unwrap(); - assert_eq!(fast_field.get_val(0), 1u64); - assert_eq!(fast_field.get_val(1), 2u64); - assert_eq!(fast_field.get_val(2), 3u64); - assert_eq!(fast_field.get_val(3), 10u64); - assert_eq!(fast_field.get_val(4), 20u64); - assert_eq!(fast_field.get_val(5), 1_000u64); + assert_eq!(fast_field.get_val(0), Some(1u64)); + assert_eq!(fast_field.get_val(1), Some(2u64)); + assert_eq!(fast_field.get_val(2), Some(3u64)); + assert_eq!(fast_field.get_val(3), Some(10u64)); + assert_eq!(fast_field.get_val(4), Some(20u64)); + assert_eq!(fast_field.get_val(5), Some(1_000u64)); let get_vals = |fast_field: &MultiValuedFastFieldReader, doc_id: u32| -> Vec { let mut vals = vec![]; diff --git a/src/lib.rs b/src/lib.rs index 8144a0719..459a52fae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1037,7 +1037,7 @@ pub mod tests { let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned); assert!(fast_field_reader_opt.is_ok()); let fast_field_reader = fast_field_reader_opt.unwrap(); - assert_eq!(fast_field_reader.get_val(0), 4u64) + assert_eq!(fast_field_reader.get_val(0), Some(4u64)) } { @@ -1051,7 +1051,7 @@ pub mod tests { let fast_field_reader_res = segment_reader.fast_fields().f64(fast_field_float); assert!(fast_field_reader_res.is_ok()); let fast_field_reader = fast_field_reader_res.unwrap(); - assert_eq!(fast_field_reader.get_val(0), 4f64) + assert_eq!(fast_field_reader.get_val(0), Some(4f64)) } Ok(()) } diff --git a/src/query/range_query_ip_fastfield.rs b/src/query/range_query_ip_fastfield.rs index f73e0cf83..f5d1050a1 100644 --- a/src/query/range_query_ip_fastfield.rs +++ b/src/query/range_query_ip_fastfield.rs @@ -7,7 +7,7 @@ use std::ops::{Bound, RangeInclusive}; use std::sync::Arc; use common::BinarySerializable; -use fastfield_codecs::{Column, MonotonicallyMappableToU128}; +use fastfield_codecs::{MonotonicallyMappableToU128, OptionalColumn}; use super::range_query::map_bound; use super::{ConstScorer, Explanation, Scorer, Weight}; @@ -45,12 +45,10 @@ impl Weight for IPFastFieldRangeWeight { match field_type.fastfield_cardinality().unwrap() { Cardinality::SingleValue => { let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?; - let value_range = bound_to_value_range( - &self.left_bound, - &self.right_bound, - ip_addr_fast_field.min_value(), - ip_addr_fast_field.max_value(), - ); + let minmax = ip_addr_fast_field + .min_value() + .zip(ip_addr_fast_field.max_value()); + let value_range = bound_to_value_range(&self.left_bound, &self.right_bound, minmax); let docset = IpRangeDocSet::new( value_range, IpFastFieldCardinality::SingleValue(ip_addr_fast_field), @@ -62,8 +60,10 @@ impl Weight for IPFastFieldRangeWeight { let value_range = bound_to_value_range( &self.left_bound, &self.right_bound, - ip_addr_fast_field.min_value(), - ip_addr_fast_field.max_value(), + Some(( + ip_addr_fast_field.min_value(), + ip_addr_fast_field.max_value(), + )), ); let docset = IpRangeDocSet::new( value_range, @@ -91,9 +91,10 @@ impl Weight for IPFastFieldRangeWeight { fn bound_to_value_range( left_bound: &Bound, right_bound: &Bound, - min_value: Ipv6Addr, - max_value: Ipv6Addr, + min_max: Option<(Ipv6Addr, Ipv6Addr)>, ) -> RangeInclusive { + let (min_value, max_value) = + min_max.unwrap_or((Ipv6Addr::from(u128::MIN), Ipv6Addr::from(u128::MAX))); let start_value = match left_bound { Bound::Included(ip_addr) => *ip_addr, Bound::Excluded(ip_addr) => Ipv6Addr::from(ip_addr.to_u128() + 1), @@ -142,7 +143,7 @@ impl VecCursor { } pub(crate) enum IpFastFieldCardinality { - SingleValue(Arc>), + SingleValue(Arc>), MultiValue(MultiValuedU128FastFieldReader), }