mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 21:20:40 +00:00
Removing the need for column on Multivalued blabla
This commit is contained in:
@@ -108,6 +108,22 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_docids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let position_range = self
|
||||
.get_index_reader()
|
||||
.docid_range_to_position_range(doc_id_range.clone());
|
||||
self.vals_reader
|
||||
.get_docids_for_value_range(value_range, position_range, positions);
|
||||
|
||||
self.idx_reader.positions_to_docids(doc_id_range, positions);
|
||||
}
|
||||
|
||||
/// Returns the array of values associated to the given `doc`.
|
||||
#[inline]
|
||||
pub fn get_first_val(&self, doc: DocId) -> Option<T> {
|
||||
@@ -179,44 +195,6 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: MonotonicallyMappableToU128> Column<T> for MultiValuedU128FastFieldReader<T> {
|
||||
fn get_val(&self, _idx: u32) -> T {
|
||||
panic!("calling get_val on a multivalue field indicates a bug")
|
||||
}
|
||||
|
||||
fn min_value(&self) -> T {
|
||||
(self as &MultiValuedU128FastFieldReader<T>).min_value()
|
||||
}
|
||||
|
||||
fn max_value(&self) -> T {
|
||||
(self as &MultiValuedU128FastFieldReader<T>).max_value()
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u32 {
|
||||
self.total_num_vals() as u32
|
||||
}
|
||||
|
||||
fn num_docs(&self) -> u32 {
|
||||
self.get_index_reader().num_docs()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_docids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let position_range = self
|
||||
.get_index_reader()
|
||||
.docid_range_to_position_range(doc_id_range.clone());
|
||||
self.vals_reader
|
||||
.get_docids_for_value_range(value_range, position_range, positions);
|
||||
|
||||
self.idx_reader.positions_to_docids(doc_id_range, positions);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ use fastfield_codecs::{Column, MonotonicallyMappableToU128};
|
||||
|
||||
use super::range_query::map_bound;
|
||||
use super::{ConstScorer, Explanation, Scorer, Weight};
|
||||
use crate::fastfield::MultiValuedU128FastFieldReader;
|
||||
use crate::schema::{Cardinality, Field};
|
||||
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, TERMINATED};
|
||||
|
||||
@@ -43,25 +44,27 @@ impl Weight for IPFastFieldRangeWeight {
|
||||
let field_type = reader.schema().get_field_entry(self.field).field_type();
|
||||
match field_type.fastfield_cardinality().unwrap() {
|
||||
Cardinality::SingleValue => {
|
||||
let ip_addr_fast_field = reader.fast_fields().ip_addr(self.field)?;
|
||||
let ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>> =
|
||||
reader.fast_fields().ip_addr(self.field)?;
|
||||
let value_range = bound_to_value_range(
|
||||
&self.left_bound,
|
||||
&self.right_bound,
|
||||
ip_addr_fast_field.min_value(),
|
||||
ip_addr_fast_field.max_value(),
|
||||
);
|
||||
let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field, false);
|
||||
let docset = IpRangeDocSet::new(value_range, ip_addr_fast_field);
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
}
|
||||
Cardinality::MultiValues => {
|
||||
let ip_addr_fast_field = reader.fast_fields().ip_addrs(self.field)?;
|
||||
let ip_addr_fast_field: MultiValuedU128FastFieldReader<Ipv6Addr> =
|
||||
reader.fast_fields().ip_addrs(self.field)?;
|
||||
let value_range = bound_to_value_range(
|
||||
&self.left_bound,
|
||||
&self.right_bound,
|
||||
ip_addr_fast_field.min_value(),
|
||||
ip_addr_fast_field.max_value(),
|
||||
);
|
||||
let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field), true);
|
||||
let docset = IpRangeDocSet::new(value_range, Arc::new(ip_addr_fast_field));
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
}
|
||||
}
|
||||
@@ -134,10 +137,10 @@ impl VecCursor {
|
||||
}
|
||||
}
|
||||
|
||||
struct IpRangeDocSet {
|
||||
struct IpRangeDocSet<T> {
|
||||
/// The range filter on the values.
|
||||
value_range: RangeInclusive<Ipv6Addr>,
|
||||
ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>>,
|
||||
ip_addrs: T,
|
||||
/// The next docid start range to fetch (inclusive).
|
||||
next_fetch_start: u32,
|
||||
/// Number of docs range checked in a batch.
|
||||
@@ -152,25 +155,20 @@ struct IpRangeDocSet {
|
||||
/// Current batch of loaded docs.
|
||||
loaded_docs: VecCursor,
|
||||
last_seek_pos_opt: Option<u32>,
|
||||
/// If fast field is multivalue.
|
||||
is_multivalue: bool,
|
||||
}
|
||||
|
||||
const DEFAULT_FETCH_HORIZON: u32 = 128;
|
||||
impl IpRangeDocSet {
|
||||
fn new(
|
||||
value_range: RangeInclusive<Ipv6Addr>,
|
||||
ip_addr_fast_field: Arc<dyn Column<Ipv6Addr>>,
|
||||
is_multivalue: bool,
|
||||
) -> Self {
|
||||
impl<T> IpRangeDocSet<T>
|
||||
where Self: SingleOrMultivalued
|
||||
{
|
||||
fn new(value_range: RangeInclusive<Ipv6Addr>, ip_addrs: T) -> Self {
|
||||
let mut ip_range_docset = Self {
|
||||
value_range,
|
||||
ip_addr_fast_field,
|
||||
ip_addrs,
|
||||
loaded_docs: VecCursor::new(),
|
||||
next_fetch_start: 0,
|
||||
fetch_horizon: DEFAULT_FETCH_HORIZON,
|
||||
last_seek_pos_opt: None,
|
||||
is_multivalue,
|
||||
};
|
||||
ip_range_docset.reset_fetch_range();
|
||||
ip_range_docset.fetch_block();
|
||||
@@ -202,47 +200,72 @@ impl IpRangeDocSet {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches a block for docid range [next_fetch_start .. next_fetch_start + HORIZON]
|
||||
trait SingleOrMultivalued {
|
||||
fn num_docs(&self) -> u32;
|
||||
fn fetch_horizon(&mut self, horizon: u32) -> bool {
|
||||
let mut finished_to_end = false;
|
||||
// Have different implem for single value and multivalue
|
||||
todo!();
|
||||
// let mut finished_to_end = false;
|
||||
|
||||
let limit = self.ip_addr_fast_field.num_docs();
|
||||
let mut end = self.next_fetch_start + horizon;
|
||||
if end >= limit {
|
||||
end = limit;
|
||||
finished_to_end = true;
|
||||
}
|
||||
// let limit = self.num_docs();
|
||||
// let mut end = self.next_fetch_start + horizon;
|
||||
// if end >= limit {
|
||||
// end = limit;
|
||||
// finished_to_end = true;
|
||||
// }
|
||||
|
||||
let last_loaded_docs_val = self
|
||||
.is_multivalue
|
||||
.then(|| self.loaded_docs.last_value())
|
||||
.flatten();
|
||||
// let last_loaded_docs_val = self
|
||||
// .is_multivalue
|
||||
// .then(|| self.loaded_docs.last_value())
|
||||
// .flatten();
|
||||
|
||||
let loaded_docs_data = self.loaded_docs.get_cleared_data();
|
||||
self.ip_addr_fast_field.get_docids_for_value_range(
|
||||
self.value_range.clone(),
|
||||
self.next_fetch_start..end,
|
||||
loaded_docs_data,
|
||||
);
|
||||
// In case of multivalues, we may have an overlap of the same docid between fetching blocks
|
||||
if let Some(last_value) = last_loaded_docs_val {
|
||||
while self.loaded_docs.current() == Some(last_value) {
|
||||
self.loaded_docs.next();
|
||||
}
|
||||
}
|
||||
self.next_fetch_start = end;
|
||||
finished_to_end
|
||||
// let last_loaded_docs_val =
|
||||
// if self.is_multivalue {
|
||||
// self.loaded_docs.last_value()
|
||||
// } else {
|
||||
// None
|
||||
// };
|
||||
|
||||
// let loaded_docs_data = self.loaded_docs.get_cleared_data();
|
||||
// self.ip_addr_fast_field.get_docids_for_value_range(
|
||||
// self.value_range.clone(),
|
||||
// self.next_fetch_start..end,
|
||||
// loaded_docs_data,
|
||||
// );
|
||||
// // In case of multivalues, we may have an overlap of the same docid between fetching
|
||||
// blocks if let Some(last_value) = last_loaded_docs_val {
|
||||
// while self.loaded_docs.current() == Some(last_value) {
|
||||
// self.loaded_docs.next();
|
||||
// }
|
||||
// }
|
||||
// self.next_fetch_start = end;
|
||||
// finished_to_end
|
||||
}
|
||||
}
|
||||
|
||||
impl DocSet for IpRangeDocSet {
|
||||
impl SingleOrMultivalued for IpRangeDocSet<Arc<dyn Column<Ipv6Addr>>> {
|
||||
fn num_docs(&self) -> u32 {
|
||||
self.ip_addrs.num_docs()
|
||||
}
|
||||
}
|
||||
|
||||
impl SingleOrMultivalued for IpRangeDocSet<Arc<MultiValuedU128FastFieldReader<Ipv6Addr>>> {
|
||||
fn num_docs(&self) -> u32 {
|
||||
self.ip_addrs.get_index_reader().num_docs()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Send> DocSet for IpRangeDocSet<T>
|
||||
where Self: SingleOrMultivalued
|
||||
{
|
||||
#[inline]
|
||||
fn advance(&mut self) -> DocId {
|
||||
if let Some(docid) = self.loaded_docs.next() {
|
||||
docid as u32
|
||||
} else {
|
||||
if self.next_fetch_start >= self.ip_addr_fast_field.num_docs() as u32 {
|
||||
if self.next_fetch_start >= self.num_docs() as u32 {
|
||||
return TERMINATED;
|
||||
}
|
||||
self.fetch_block();
|
||||
|
||||
Reference in New Issue
Block a user