rename, add position to docid function

This commit is contained in:
Pascal Seitz
2022-09-06 12:47:49 +08:00
parent ed85ba62b3
commit c62ddb61b7
4 changed files with 52 additions and 27 deletions

View File

@@ -52,6 +52,11 @@ impl BytesFastFieldReader {
}
impl MultiValueLength for BytesFastFieldReader {
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64> {
let (start, stop) = self.range(doc_id);
start as u64..stop as u64
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_bytes(doc_id) as u64
}

View File

@@ -16,7 +16,6 @@ pub trait FastValueU128: Clone + Copy + Send + Sync + PartialOrd + 'static {
/// Converts a value from u128
///
/// Internally all fast field values are encoded as u128.
/// **Note: To be used for converting encoded Term, Posting values.**
fn from_u128(val: u128) -> Self;
/// Converts a value to u128.

View File

@@ -20,6 +20,8 @@
//!
//! Read access performance is comparable to that of an array lookup.
use std::collections::btree_map::Range;
pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::error::{FastFieldNotAvailableError, Result};
@@ -75,6 +77,8 @@ fn value_to_u64(value: &Value) -> u64 {
/// Trait for `BytesFastFieldReader` and `MultiValuedFastFieldReader` to return the length of data
/// for a doc_id
pub trait MultiValueLength {
/// returns the positions of values associated to a doc_id
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64>;
/// returns the num of values associated to a doc_id
fn get_len(&self, doc_id: DocId) -> u64;
/// returns the sum of num values for all doc_ids
@@ -510,8 +514,8 @@ mod tests {
// multi value
let ip_addr_fast_field = fast_fields.ip_addrs(ips_field).unwrap();
assert_eq!(ip_addr_fast_field.get_val(0), None);
assert_eq!(ip_addr_fast_field.get_val(1), Some(ip2));
assert_eq!(ip_addr_fast_field.get_first_val(0), None);
assert_eq!(ip_addr_fast_field.get_first_val(1), Some(ip2));
let mut out = vec![];
ip_addr_fast_field.get_vals(0, &mut out);

View File

@@ -89,10 +89,12 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
}
impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_vals(doc_id) as u64
}
fn get_total_len(&self) -> u64 {
self.total_num_vals() as u64
}
@@ -133,7 +135,7 @@ impl<Item: FastValueU128> MultiValuedU128FastFieldReader<Item> {
/// Returns the array of values associated to the given `doc`.
#[inline]
pub fn get_val(&self, doc: DocId) -> Option<Item> {
pub fn get_first_val(&self, doc: DocId) -> Option<Item> {
let range = self.range(doc);
if range.is_empty() {
return None;
@@ -157,30 +159,10 @@ impl<Item: FastValueU128> MultiValuedU128FastFieldReader<Item> {
}
/// Returns all docids which are in the provided value range
pub fn get_between_vals(&self, range: RangeInclusive<Item>) -> Vec<usize> {
pub fn get_between_vals(&self, range: RangeInclusive<Item>) -> Vec<DocId> {
let positions = self.vals_reader.get_between_vals(range);
// Now we need to convert the positions to docids
let mut docs = vec![];
let mut cursor = 0usize;
let mut last_doc = None;
for pos in positions {
loop {
let range = self.range(cursor as u32);
if range.contains(&(pos as u64)) {
// avoid duplicates
if Some(cursor) == last_doc {
break;
}
docs.push(cursor);
last_doc = Some(cursor);
break;
}
cursor += 1;
}
}
docs
positions_to_docids(&positions, self)
}
/// Iterates over all elements in the fast field
@@ -220,7 +202,42 @@ impl<Item: FastValueU128> MultiValuedU128FastFieldReader<Item> {
}
}
/// Converts a list of positions of values in a 1:n index to the corresponding list of DocIds.
///
/// Since there is no index for value pos -> docid, but docid -> value pos range, we scan the index.
///
/// Correctness: positions needs to be sorted.
///
/// TODO: Instead of a linear scan we can employ a binary search to match a docid to its value
/// position.
fn positions_to_docids<T: MultiValueLength>(positions: &[usize], multival_idx: &T) -> Vec<DocId> {
let mut docs = vec![];
let mut cur_doc = 0u32;
let mut last_doc = None;
for pos in positions {
loop {
let range = multival_idx.get_range(cur_doc);
if range.contains(&(*pos as u64)) {
// avoid duplicates
if Some(cur_doc) == last_doc {
break;
}
docs.push(cur_doc);
last_doc = Some(cur_doc);
break;
}
cur_doc += 1;
}
}
docs
}
impl<Item: FastValueU128> MultiValueLength for MultiValuedU128FastFieldReader<Item> {
fn get_range(&self, doc_id: DocId) -> std::ops::Range<u64> {
self.range(doc_id)
}
fn get_len(&self, doc_id: DocId) -> u64 {
self.num_vals(doc_id) as u64
}