Removing the need for column on Multivalued blabla

This commit is contained in:
Paul Masurel
2022-11-14 11:42:49 +09:00
parent a1c1f6a764
commit c8c80d21cb
2 changed files with 83 additions and 82 deletions

View File

@@ -108,6 +108,22 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
}
}
#[inline]
fn get_docids_for_value_range(
&self,
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
positions: &mut Vec<u32>,
) {
let position_range = self
.get_index_reader()
.docid_range_to_position_range(doc_id_range.clone());
self.vals_reader
.get_docids_for_value_range(value_range, position_range, positions);
self.idx_reader.positions_to_docids(doc_id_range, positions);
}
/// Returns the array of values associated to the given `doc`.
#[inline]
pub fn get_first_val(&self, doc: DocId) -> Option<T> {
@@ -179,44 +195,6 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
}
}
impl<T: MonotonicallyMappableToU128> Column<T> for MultiValuedU128FastFieldReader<T> {
fn get_val(&self, _idx: u32) -> T {
panic!("calling get_val on a multivalue field indicates a bug")
}
fn min_value(&self) -> T {
(self as &MultiValuedU128FastFieldReader<T>).min_value()
}
fn max_value(&self) -> T {
(self as &MultiValuedU128FastFieldReader<T>).max_value()
}
fn num_vals(&self) -> u32 {
self.total_num_vals() as u32
}
fn num_docs(&self) -> u32 {
self.get_index_reader().num_docs()
}
#[inline]
fn get_docids_for_value_range(
&self,
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
positions: &mut Vec<u32>,
) {
let position_range = self
.get_index_reader()
.docid_range_to_position_range(doc_id_range.clone());
self.vals_reader
.get_docids_for_value_range(value_range, position_range, positions);
self.idx_reader.positions_to_docids(doc_id_range, positions);
}
}
#[cfg(test)]
mod tests {

View File

@@ -11,6 +11,7 @@ use fastfield_codecs::{Column, MonotonicallyMappableToU128};
use super::range_query::map_bound;
use super::{ConstScorer, Explanation, Scorer, Weight};
use crate::fastfield::MultiValuedU128FastFieldReader;
use crate::schema::{Cardinality, Field};
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, TERMINATED};
@@ -43,25 +44,27 @@ impl Weight for IPFastFieldRangeWeight {
let field_type = reader.schema().get_field_entry(self.field).field_type();
match field_type.fastfield_cardinality().unwrap() {
Cardinality::SingleValue => {
let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?;
let ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>> =
reader.fast_fields().ip_addr(self.field)?;
let value_range = bound_to_value_range(
&self.left_bound,
&self.right_bound,
ip_addr_fast_field.min_value(),
ip_addr_fast_field.max_value(),
);
let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field, false);
let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field);
Ok(Box::new(ConstScorer::new(docset, boost)))
}
Cardinality::MultiValues => {
let ip_addr_fast_field = reader.fast_fields().ip_addrs(self.field)?;
let ip_addr_fast_field: MultiValuedU128FastFieldReader<Ipv6Addr> =
reader.fast_fields().ip_addrs(self.field)?;
let value_range = bound_to_value_range(
&self.left_bound,
&self.right_bound,
ip_addr_fast_field.min_value(),
ip_addr_fast_field.max_value(),
);
let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field), true);
let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field));
Ok(Box::new(ConstScorer::new(docset, boost)))
}
}
@@ -134,10 +137,10 @@ impl VecCursor {
}
}
struct IpRangeDocSet {
struct IpRangeDocSet<T> {
/// The range filter on the values.
value_range: RangeInclusive<Ipv6Addr>,
ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>>,
ip_addrs: T,
/// The next docid start range to fetch (inclusive).
next_fetch_start: u32,
/// Number of docs range checked in a batch.
@@ -152,25 +155,20 @@ struct IpRangeDocSet {
/// Current batch of loaded docs.
loaded_docs: VecCursor,
last_seek_pos_opt: Option<u32>,
/// If fast field is multivalue.
is_multivalue: bool,
}
const DEFAULT_FETCH_HORIZON: u32 = 128;
impl IpRangeDocSet {
fn new(
value_range: RangeInclusive<Ipv6Addr>,
ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>>,
is_multivalue: bool,
) -> Self {
impl<T> IpRangeDocSet<T>
where Self: SingleOrMultivalued
{
fn new(value_range: RangeInclusive<Ipv6Addr>, ip_addrs: T) -> Self {
let mut ip_range_docset = Self {
value_range,
ip_addr_fast_field,
ip_addrs,
loaded_docs: VecCursor::new(),
next_fetch_start: 0,
fetch_horizon: DEFAULT_FETCH_HORIZON,
last_seek_pos_opt: None,
is_multivalue,
};
ip_range_docset.reset_fetch_range();
ip_range_docset.fetch_block();
@@ -202,47 +200,72 @@ impl IpRangeDocSet {
true
}
}
}
/// Fetches a block for docid range [next_fetch_start .. next_fetch_start + HORIZON]
trait SingleOrMultivalued {
fn num_docs(&self) -> u32;
fn fetch_horizon(&mut self, horizon: u32) -> bool {
let mut finished_to_end = false;
// Have different implem for single value and multivalue
todo!();
// let mut finished_to_end = false;
let limit = self.ip_addr_fast_field.num_docs();
let mut end = self.next_fetch_start + horizon;
if end >= limit {
end = limit;
finished_to_end = true;
}
// let limit = self.num_docs();
// let mut end = self.next_fetch_start + horizon;
// if end >= limit {
// end = limit;
// finished_to_end = true;
// }
let last_loaded_docs_val = self
.is_multivalue
.then(|| self.loaded_docs.last_value())
.flatten();
// let last_loaded_docs_val = self
// .is_multivalue
// .then(|| self.loaded_docs.last_value())
// .flatten();
let loaded_docs_data = self.loaded_docs.get_cleared_data();
self.ip_addr_fast_field.get_docids_for_value_range(
self.value_range.clone(),
self.next_fetch_start..end,
loaded_docs_data,
);
// In case of multivalues, we may have an overlap of the same docid between fetching blocks
if let Some(last_value) = last_loaded_docs_val {
while self.loaded_docs.current() == Some(last_value) {
self.loaded_docs.next();
}
}
self.next_fetch_start = end;
finished_to_end
// let last_loaded_docs_val =
// if self.is_multivalue {
// self.loaded_docs.last_value()
// } else {
// None
// };
// let loaded_docs_data = self.loaded_docs.get_cleared_data();
// self.ip_addr_fast_field.get_docids_for_value_range(
// self.value_range.clone(),
// self.next_fetch_start..end,
// loaded_docs_data,
// );
// // In case of multivalues, we may have an overlap of the same docid between fetching
// blocks if let Some(last_value) = last_loaded_docs_val {
// while self.loaded_docs.current() == Some(last_value) {
// self.loaded_docs.next();
// }
// }
// self.next_fetch_start = end;
// finished_to_end
}
}
impl DocSet for IpRangeDocSet {
impl SingleOrMultivalued for IpRangeDocSet<Arc<dyn Column<Ipv6Addr>>> {
fn num_docs(&self) -> u32 {
self.ip_addrs.num_docs()
}
}
impl SingleOrMultivalued for IpRangeDocSet<Arc<MultiValuedU128FastFieldReader<Ipv6Addr>>> {
fn num_docs(&self) -> u32 {
self.ip_addrs.get_index_reader().num_docs()
}
}
impl<T: Send> DocSet for IpRangeDocSet<T>
where Self: SingleOrMultivalued
{
#[inline]
fn advance(&mut self) -> DocId {
if let Some(docid) = self.loaded_docs.next() {
docid as u32
} else {
if self.next_fetch_start >= self.ip_addr_fast_field.num_docs() as u32 {
if self.next_fetch_start >= self.num_docs() as u32 {
return TERMINATED;
}
self.fetch_block();