From c8c80d21cbb5e3f752f8ea71b23e45c07125b52b Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 14 Nov 2022 11:42:49 +0900 Subject: [PATCH] Removing the need for column on Multivalued blabla --- src/fastfield/multivalued/reader.rs | 54 ++++--------- src/query/range_query_ip_fastfield.rs | 111 ++++++++++++++++---------- 2 files changed, 83 insertions(+), 82 deletions(-) diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 1137b7be5..7cf454738 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -108,6 +108,22 @@ impl MultiValuedU128FastFieldReader { } } + #[inline] + fn get_docids_for_value_range( + &self, + value_range: RangeInclusive, + doc_id_range: Range, + positions: &mut Vec, + ) { + let position_range = self + .get_index_reader() + .docid_range_to_position_range(doc_id_range.clone()); + self.vals_reader + .get_docids_for_value_range(value_range, position_range, positions); + + self.idx_reader.positions_to_docids(doc_id_range, positions); + } + /// Returns the array of values associated to the given `doc`. #[inline] pub fn get_first_val(&self, doc: DocId) -> Option { @@ -179,44 +195,6 @@ impl MultiValuedU128FastFieldReader { } } -impl Column for MultiValuedU128FastFieldReader { - fn get_val(&self, _idx: u32) -> T { - panic!("calling get_val on a multivalue field indicates a bug") - } - - fn min_value(&self) -> T { - (self as &MultiValuedU128FastFieldReader).min_value() - } - - fn max_value(&self) -> T { - (self as &MultiValuedU128FastFieldReader).max_value() - } - - fn num_vals(&self) -> u32 { - self.total_num_vals() as u32 - } - - fn num_docs(&self) -> u32 { - self.get_index_reader().num_docs() - } - - #[inline] - fn get_docids_for_value_range( - &self, - value_range: RangeInclusive, - doc_id_range: Range, - positions: &mut Vec, - ) { - let position_range = self - .get_index_reader() - .docid_range_to_position_range(doc_id_range.clone()); - self.vals_reader - .get_docids_for_value_range(value_range, position_range, positions); - - self.idx_reader.positions_to_docids(doc_id_range, positions); - } -} - #[cfg(test)] mod tests { diff --git a/src/query/range_query_ip_fastfield.rs b/src/query/range_query_ip_fastfield.rs index 3cf406828..2868eb6e8 100644 --- a/src/query/range_query_ip_fastfield.rs +++ b/src/query/range_query_ip_fastfield.rs @@ -11,6 +11,7 @@ use fastfield_codecs::{Column, MonotonicallyMappableToU128}; use super::range_query::map_bound; use super::{ConstScorer, Explanation, Scorer, Weight}; +use crate::fastfield::MultiValuedU128FastFieldReader; use crate::schema::{Cardinality, Field}; use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, TERMINATED}; @@ -43,25 +44,27 @@ impl Weight for IPFastFieldRangeWeight { let field_type = reader.schema().get_field_entry(self.field).field_type(); match field_type.fastfield_cardinality().unwrap() { Cardinality::SingleValue => { - let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?; + let ip_addr_fast_field: Arc> = + reader.fast_fields().ip_addr(self.field)?; let value_range = bound_to_value_range( &self.left_bound, &self.right_bound, ip_addr_fast_field.min_value(), ip_addr_fast_field.max_value(), ); - let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field, false); + let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field); Ok(Box::new(ConstScorer::new(docset, boost))) } Cardinality::MultiValues => { - let ip_addr_fast_field = reader.fast_fields().ip_addrs(self.field)?; + let ip_addr_fast_field: MultiValuedU128FastFieldReader = + reader.fast_fields().ip_addrs(self.field)?; let value_range = bound_to_value_range( &self.left_bound, &self.right_bound, ip_addr_fast_field.min_value(), ip_addr_fast_field.max_value(), ); - let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field), true); + let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field)); Ok(Box::new(ConstScorer::new(docset, boost))) } } @@ -134,10 +137,10 @@ impl VecCursor { } } -struct IpRangeDocSet { +struct IpRangeDocSet { /// The range filter on the values. value_range: RangeInclusive, - ip_addr_fast_field: Arc>, + ip_addrs: T, /// The next docid start range to fetch (inclusive). next_fetch_start: u32, /// Number of docs range checked in a batch. @@ -152,25 +155,20 @@ struct IpRangeDocSet { /// Current batch of loaded docs. loaded_docs: VecCursor, last_seek_pos_opt: Option, - /// If fast field is multivalue. - is_multivalue: bool, } const DEFAULT_FETCH_HORIZON: u32 = 128; -impl IpRangeDocSet { - fn new( - value_range: RangeInclusive, - ip_addr_fast_field: Arc>, - is_multivalue: bool, - ) -> Self { +impl IpRangeDocSet +where Self: SingleOrMultivalued +{ + fn new(value_range: RangeInclusive, ip_addrs: T) -> Self { let mut ip_range_docset = Self { value_range, - ip_addr_fast_field, + ip_addrs, loaded_docs: VecCursor::new(), next_fetch_start: 0, fetch_horizon: DEFAULT_FETCH_HORIZON, last_seek_pos_opt: None, - is_multivalue, }; ip_range_docset.reset_fetch_range(); ip_range_docset.fetch_block(); @@ -202,47 +200,72 @@ impl IpRangeDocSet { true } } +} - /// Fetches a block for docid range [next_fetch_start .. next_fetch_start + HORIZON] +trait SingleOrMultivalued { + fn num_docs(&self) -> u32; fn fetch_horizon(&mut self, horizon: u32) -> bool { - let mut finished_to_end = false; + // Have different implem for single value and multivalue + todo!(); + // let mut finished_to_end = false; - let limit = self.ip_addr_fast_field.num_docs(); - let mut end = self.next_fetch_start + horizon; - if end >= limit { - end = limit; - finished_to_end = true; - } + // let limit = self.num_docs(); + // let mut end = self.next_fetch_start + horizon; + // if end >= limit { + // end = limit; + // finished_to_end = true; + // } - let last_loaded_docs_val = self - .is_multivalue - .then(|| self.loaded_docs.last_value()) - .flatten(); + // let last_loaded_docs_val = self + // .is_multivalue + // .then(|| self.loaded_docs.last_value()) + // .flatten(); - let loaded_docs_data = self.loaded_docs.get_cleared_data(); - self.ip_addr_fast_field.get_docids_for_value_range( - self.value_range.clone(), - self.next_fetch_start..end, - loaded_docs_data, - ); - // In case of multivalues, we may have an overlap of the same docid between fetching blocks - if let Some(last_value) = last_loaded_docs_val { - while self.loaded_docs.current() == Some(last_value) { - self.loaded_docs.next(); - } - } - self.next_fetch_start = end; - finished_to_end + // let last_loaded_docs_val = + // if self.is_multivalue { + // self.loaded_docs.last_value() + // } else { + // None + // }; + + // let loaded_docs_data = self.loaded_docs.get_cleared_data(); + // self.ip_addr_fast_field.get_docids_for_value_range( + // self.value_range.clone(), + // self.next_fetch_start..end, + // loaded_docs_data, + // ); + // // In case of multivalues, we may have an overlap of the same docid between fetching + // blocks if let Some(last_value) = last_loaded_docs_val { + // while self.loaded_docs.current() == Some(last_value) { + // self.loaded_docs.next(); + // } + // } + // self.next_fetch_start = end; + // finished_to_end } } -impl DocSet for IpRangeDocSet { +impl SingleOrMultivalued for IpRangeDocSet>> { + fn num_docs(&self) -> u32 { + self.ip_addrs.num_docs() + } +} + +impl SingleOrMultivalued for IpRangeDocSet>> { + fn num_docs(&self) -> u32 { + self.ip_addrs.get_index_reader().num_docs() + } +} + +impl DocSet for IpRangeDocSet +where Self: SingleOrMultivalued +{ #[inline] fn advance(&mut self) -> DocId { if let Some(docid) = self.loaded_docs.next() { docid as u32 } else { - if self.next_fetch_start >= self.ip_addr_fast_field.num_docs() as u32 { + if self.next_fetch_start >= self.num_docs() as u32 { return TERMINATED; } self.fetch_block();