Compare commits

...

3 Commits

Author SHA1 Message Date
Pascal Seitz
242f554cc6 add impls 2022-09-07 12:16:29 +08:00
Pascal Seitz
ae7b72ad6f remove dynamic dispatch 2022-08-31 11:21:02 +02:00
Pascal Seitz
b977f763d7 fastfield return iterator instead fill vec
return iterator from get_vals method. This will allow to save on unnecessary vec allocations.
2022-08-29 11:09:48 +02:00
15 changed files with 234 additions and 252 deletions

View File

@@ -300,13 +300,9 @@ impl FastFieldCodec for BlockwiseLinearCodec {
// If this doesn't overflow the algorithm should be fine
let theorethical_maximum_offset =
fastfield_accessor.max_value() - fastfield_accessor.min_value();
if fastfield_accessor
fastfield_accessor
.max_value()
.checked_add(theorethical_maximum_offset)
.is_none()
{
return None;
}
.checked_add(theorethical_maximum_offset)?;
let first_val_in_first_block = fastfield_accessor.get_val(0);
let last_elem_in_first_chunk = CHUNK_SIZE.min(fastfield_accessor.num_vals());

View File

@@ -1,3 +1,7 @@
use std::ops::Range;
use crate::ColumnIter;
pub trait Column<T = u64> {
/// Return the value associated to the given idx.
///
@@ -8,23 +12,18 @@ pub trait Column<T = u64> {
/// May panic if `idx` is greater than the column length.
fn get_val(&self, idx: u64) -> T;
/// Fills an output buffer with the fast field values
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
///
/// Regardless of the type of `Item`, this method works
/// - transmuting the output array
/// - extracting the `Item`s as if they were `u64`
/// - possibly converting the `u64` value to the right type.
/// Returns an iterator over given doc range.
///
/// # Panics
///
/// May panic if `start + output.len()` is greater than
/// May panic if `range.end()` is greater than
/// the segment's `maxdoc`.
fn get_range(&self, start: u64, output: &mut [T]) {
for (out, idx) in output.iter_mut().zip(start..) {
*out = self.get_val(idx);
}
#[inline]
fn get_range(&self, range: Range<u64>) -> ColumnIter<'_, Self, T>
where
Self: Sized,
{
ColumnIter::new(self, range)
}
/// Returns the minimum value for this fast field.

View File

@@ -4,6 +4,9 @@ extern crate more_asserts;
use std::io;
use std::io::Write;
use std::iter::FusedIterator;
use std::marker::PhantomData;
use std::ops::Range;
use common::BinarySerializable;
use ownedbytes::OwnedBytes;
@@ -112,6 +115,56 @@ impl<'a> Column for &'a [u64] {
}
}
pub struct ColumnIter<'a, C: Column<I>, I> {
column: &'a C,
range: Range<u64>,
_phantom: PhantomData<I>,
}
impl<'a, C: Column<I>, I> ColumnIter<'a, C, I> {
#[inline]
pub fn new(col: &'a C, range: Range<u64>) -> Self {
Self {
column: col,
range,
_phantom: PhantomData,
}
}
}
impl<'a, C: Column<I>, I> Iterator for ColumnIter<'a, C, I> {
type Item = I;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
Some(self.column.get_val(self.range.next()?))
}
#[inline]
fn fold<Acc, G>(self, init: Acc, mut g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.range
.fold(init, move |acc, idx| g(acc, self.column.get_val(idx)))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let size = (self.range.end - self.range.start) as usize;
(size, Some(size))
}
}
impl<'a, C: Column<I>, I> ExactSizeIterator for ColumnIter<'a, C, I> {
#[inline]
fn len(&self) -> usize {
let size = (self.range.end - self.range.start) as usize;
size as usize
}
}
impl<'a, C: Column<I>, I> FusedIterator for ColumnIter<'a, C, I> {}
impl Column for Vec<u64> {
fn get_val(&self, position: u64) -> u64 {
self[position as usize]

View File

@@ -204,13 +204,9 @@ impl FastFieldCodec for LinearCodec {
// If this doesn't overflow the algorithm should be fine
let theorethical_maximum_offset =
fastfield_accessor.max_value() - fastfield_accessor.min_value();
if fastfield_accessor
fastfield_accessor
.max_value()
.checked_add(theorethical_maximum_offset)
.is_none()
{
return None;
}
.checked_add(theorethical_maximum_offset)?;
let first_val = fastfield_accessor.get_val(0);
let last_val = fastfield_accessor.get_val(fastfield_accessor.num_vals() as u64 - 1);

View File

@@ -242,13 +242,13 @@ impl TermBuckets {
fn increment_bucket(
&mut self,
term_ids: &[u64],
term_ids: impl Iterator<Item = u64>,
doc: DocId,
sub_aggregation: &AggregationsWithAccessor,
bucket_count: &BucketCount,
blueprint: &Option<SegmentAggregationResultsCollector>,
) -> crate::Result<()> {
for &term_id in term_ids {
for term_id in term_ids {
let entry = self.entries.entry(term_id as u32).or_insert_with(|| {
bucket_count.add_count(1);
@@ -432,39 +432,30 @@ impl SegmentTermCollector {
.as_multi()
.expect("unexpected fast field cardinatility");
let mut iter = doc.chunks_exact(4);
let mut vals1 = vec![];
let mut vals2 = vec![];
let mut vals3 = vec![];
let mut vals4 = vec![];
for docs in iter.by_ref() {
accessor.get_vals(docs[0], &mut vals1);
accessor.get_vals(docs[1], &mut vals2);
accessor.get_vals(docs[2], &mut vals3);
accessor.get_vals(docs[3], &mut vals4);
self.term_buckets.increment_bucket(
&vals1,
accessor.get_vals(docs[0]),
docs[0],
&bucket_with_accessor.sub_aggregation,
&bucket_with_accessor.bucket_count,
&self.blueprint,
)?;
self.term_buckets.increment_bucket(
&vals2,
accessor.get_vals(docs[1]),
docs[1],
&bucket_with_accessor.sub_aggregation,
&bucket_with_accessor.bucket_count,
&self.blueprint,
)?;
self.term_buckets.increment_bucket(
&vals3,
accessor.get_vals(docs[2]),
docs[2],
&bucket_with_accessor.sub_aggregation,
&bucket_with_accessor.bucket_count,
&self.blueprint,
)?;
self.term_buckets.increment_bucket(
&vals4,
accessor.get_vals(docs[3]),
docs[3],
&bucket_with_accessor.sub_aggregation,
&bucket_with_accessor.bucket_count,
@@ -472,10 +463,8 @@ impl SegmentTermCollector {
)?;
}
for &doc in iter.remainder() {
accessor.get_vals(doc, &mut vals1);
self.term_buckets.increment_bucket(
&vals1,
accessor.get_vals(doc),
doc,
&bucket_with_accessor.sub_aggregation,
&bucket_with_accessor.bucket_count,
@@ -1334,11 +1323,15 @@ mod bench {
max_bucket_count: 1_000_001u32,
};
b.iter(|| {
for &val in &vals {
collector
.increment_bucket(&[val], 0, &aggregations_with_accessor, &bucket_count, &None)
.unwrap();
}
collector
.increment_bucket(
vals.iter().cloned(),
0,
&aggregations_with_accessor,
&bucket_count,
&None,
)
.unwrap();
})
}

View File

@@ -76,7 +76,8 @@ impl FacetReader {
/// Return the list of facet ordinals associated to a document.
pub fn facet_ords(&self, doc: DocId, output: &mut Vec<u64>) {
self.term_ords.get_vals(doc, output);
output.clear();
output.extend(self.term_ords.get_vals(doc))
}
}

View File

@@ -477,8 +477,7 @@ mod tests {
for (doc, i) in (-100i64..10_000i64).enumerate() {
assert_eq!(fast_field_reader.get_val(doc as u64), i);
}
let mut buffer = vec![0i64; 100];
fast_field_reader.get_range(53, &mut buffer[..]);
let buffer: Vec<i64> = fast_field_reader.get_range(53..154).collect();
for i in 0..100 {
assert_eq!(buffer[i], -100i64 + 53i64 + i as i64);
}
@@ -607,9 +606,7 @@ mod tests {
let mut all = vec![];
for doc in docs {
let mut out = vec![];
ff.get_vals(doc, &mut out);
all.extend(out);
all.extend(ff.get_vals(doc));
}
all
}
@@ -654,8 +651,7 @@ mod tests {
vec![1, 0, 0, 0, 1, 2]
);
let mut out = vec![];
text_fast_field.get_vals(3, &mut out);
let out = text_fast_field.get_vals(3u32).collect::<Vec<_>>();
assert_eq!(out, vec![0, 1]);
let inverted_index = segment_reader.inverted_index(text_field)?;
@@ -840,22 +836,20 @@ mod tests {
let fast_fields = segment_reader.fast_fields();
let date_fast_field = fast_fields.date(date_field).unwrap();
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
let mut dates = vec![];
{
assert_eq!(date_fast_field.get_val(0).into_timestamp_micros(), 1i64);
dates_fast_field.get_vals(0u32, &mut dates);
let dates = dates_fast_field.get_vals(0u32).collect::<Vec<_>>();
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
}
{
assert_eq!(date_fast_field.get_val(1).into_timestamp_micros(), 4i64);
dates_fast_field.get_vals(1u32, &mut dates);
assert!(dates.is_empty());
assert!(dates_fast_field.get_vals(1u32).next().is_none());
}
{
assert_eq!(date_fast_field.get_val(2).into_timestamp_micros(), 0i64);
dates_fast_field.get_vals(2u32, &mut dates);
let dates = dates_fast_field.get_vals(2u32).collect::<Vec<_>>();
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
assert_eq!(dates[1].into_timestamp_micros(), 6i64);
@@ -988,6 +982,67 @@ mod bench {
use super::*;
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::fastfield::tests::generate_permutation_gcd;
use crate::schema::{NumericOptions, Schema};
use crate::Document;
fn multi_values(num_docs: usize, vals_per_doc: usize) -> Vec<Vec<u64>> {
let mut vals = vec![];
for _i in 0..num_docs {
let mut block = vec![];
for j in 0..vals_per_doc {
block.push(j as u64);
}
vals.push(block);
}
vals
}
#[bench]
fn bench_multi_value_fflookup(b: &mut Bencher) {
let num_docs = 100_000;
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
{
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", options);
let schema = schema_builder.build();
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
for block in &multi_values(num_docs, 3) {
let mut doc = Document::new();
for val in block {
doc.add_u64(field, *val);
}
fast_field_writers.add_document(&doc);
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
serializer.close().unwrap();
}
let file = directory.open_read(&path).unwrap();
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data_idx = fast_fields_composite.open_read_with_idx(*FIELD, 0).unwrap();
let idx_reader = DynamicFastFieldReader::<u64>::open(data_idx).unwrap();
let data_vals = fast_fields_composite.open_read_with_idx(*FIELD, 1).unwrap();
let vals_reader = DynamicFastFieldReader::<u64>::open(data_vals).unwrap();
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
b.iter(|| {
let mut sum = 0u64;
for i in 0u32..num_docs as u32 {
sum += fast_field_reader.get_vals(i).sum::<u64>();
}
sum
});
}
}
#[bench]
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {

View File

@@ -36,19 +36,17 @@ mod tests {
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
let multi_value_reader = segment_reader.fast_fields().u64s(field)?;
{
multi_value_reader.get_vals(2, &mut vals);
let vals = multi_value_reader.get_vals(2u32).collect::<Vec<_>>();
assert_eq!(&vals, &[4u64]);
}
{
multi_value_reader.get_vals(0, &mut vals);
let vals = multi_value_reader.get_vals(0u32).collect::<Vec<_>>();
assert_eq!(&vals, &[1u64, 3u64]);
}
{
multi_value_reader.get_vals(1, &mut vals);
assert!(vals.is_empty());
assert!(multi_value_reader.get_vals(1u32).next().is_none());
}
Ok(())
}
@@ -213,15 +211,13 @@ mod tests {
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
let multi_value_reader = segment_reader.fast_fields().i64s(field).unwrap();
multi_value_reader.get_vals(2, &mut vals);
let vals = multi_value_reader.get_vals(2u32).collect::<Vec<_>>();
assert_eq!(&vals, &[-4i64]);
multi_value_reader.get_vals(0, &mut vals);
let vals = multi_value_reader.get_vals(0u32).collect::<Vec<_>>();
assert_eq!(&vals, &[1i64, 3i64]);
multi_value_reader.get_vals(1, &mut vals);
assert!(vals.is_empty());
multi_value_reader.get_vals(3, &mut vals);
assert!(multi_value_reader.get_vals(1u32).next().is_none());
let vals = multi_value_reader.get_vals(3u32).collect::<Vec<_>>();
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
Ok(())
}
@@ -245,15 +241,13 @@ mod tests {
let searcher = index.reader()?.searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
let multi_value_reader = segment_reader.fast_fields().bools(bool_field).unwrap();
multi_value_reader.get_vals(2, &mut vals);
let vals = multi_value_reader.get_vals(2u32).collect::<Vec<_>>();
assert_eq!(&vals, &[false]);
multi_value_reader.get_vals(0, &mut vals);
let vals = multi_value_reader.get_vals(0u32).collect::<Vec<_>>();
assert_eq!(&vals, &[true, false]);
multi_value_reader.get_vals(1, &mut vals);
assert!(vals.is_empty());
multi_value_reader.get_vals(3, &mut vals);
assert!(multi_value_reader.get_vals(1u32).next().is_none());
let vals = multi_value_reader.get_vals(3u32).collect::<Vec<_>>();
assert_eq!(&vals, &[true, true, false]);
Ok(())
}

View File

@@ -18,7 +18,9 @@ pub struct MultiValuedFastFieldReader<Item: FastValue> {
vals_reader: DynamicFastFieldReader<Item>,
}
impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
impl<Item: FastValue> MultiValuedFastFieldReader<Item>
where DynamicFastFieldReader<Item>: Column<Item>
{
pub(crate) fn open(
idx_reader: DynamicFastFieldReader<u64>,
vals_reader: DynamicFastFieldReader<Item>,
@@ -41,17 +43,9 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
/// Returns the array of values associated to the given `doc`.
#[inline]
fn get_vals_for_range(&self, range: Range<u64>, vals: &mut Vec<Item>) {
let len = (range.end - range.start) as usize;
vals.resize(len, Item::make_zero());
self.vals_reader.get_range(range.start, &mut vals[..]);
}
/// Returns the array of values associated to the given `doc`.
#[inline]
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
pub fn get_vals(&self, doc: DocId) -> impl Iterator<Item = Item> + '_ {
let range = self.range(doc);
self.get_vals_for_range(range, vals);
self.vals_reader.get_range(range)
}
/// Returns the minimum value for this fast field.

View File

@@ -40,40 +40,39 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
mut bytes: OwnedBytes,
codec_type: FastFieldCodecType,
) -> crate::Result<DynamicFastFieldReader<Item>> {
let reader = match codec_type {
FastFieldCodecType::Bitpacked => {
DynamicFastFieldReader::Bitpacked(BitpackedCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::Linear => {
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Gcd => {
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
),
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
),
FastFieldCodecType::BlockwiseLinear => {
DynamicFastFieldReader::BlockwiseLinearGCD(
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
)
}
FastFieldCodecType::Gcd => {
return Err(DataCorruption::comment_only(
let reader =
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::Bitpacked(
BitpackedCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Linear => {
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Gcd => {
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
),
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
),
FastFieldCodecType::BlockwiseLinear => {
DynamicFastFieldReader::BlockwiseLinearGCD(
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
)
}
FastFieldCodecType::Gcd => return Err(DataCorruption::comment_only(
"Gcd codec wrapped into another gcd codec. This combination is not \
allowed.",
)
.into())
.into()),
}
}
}
};
};
Ok(reader)
}
@@ -97,17 +96,6 @@ impl<Item: FastValue> Column<Item> for DynamicFastFieldReader<Item> {
Self::BlockwiseLinearGCD(reader) => reader.get_val(idx),
}
}
#[inline]
fn get_range(&self, start: u64, output: &mut [Item]) {
match self {
Self::Bitpacked(reader) => reader.get_range(start, output),
Self::Linear(reader) => reader.get_range(start, output),
Self::BlockwiseLinear(reader) => reader.get_range(start, output),
Self::BitpackedGCD(reader) => reader.get_range(start, output),
Self::LinearGCD(reader) => reader.get_range(start, output),
Self::BlockwiseLinearGCD(reader) => reader.get_range(start, output),
}
}
fn min_value(&self) -> Item {
match self {
Self::Bitpacked(reader) => reader.min_value(),
@@ -167,24 +155,6 @@ impl<Item: FastValue, D: Column> FastFieldReaderCodecWrapper<Item, D> {
let data = self.reader.get_val(idx);
Item::from_u64(data)
}
/// Internally `multivalued` also use SingleValue Fast fields.
/// It works as follows... A first column contains the list of start index
/// for each document, a second column contains the actual values.
///
/// The values associated to a given doc, are then
/// `second_column[first_column.get(doc)..first_column.get(doc+1)]`.
///
/// Which means single value fast field reader can be indexed internally with
/// something different from a `DocId`. For this use case, we want to use `u64`
/// values.
///
/// See `get_range` for an actual documentation about this method.
pub(crate) fn get_range_u64(&self, start: u64, output: &mut [Item]) {
for (i, out) in output.iter_mut().enumerate() {
*out = self.get_u64(start + (i as u64));
}
}
}
impl<Item: FastValue, C: Column + Clone> Column<Item> for FastFieldReaderCodecWrapper<Item, C> {
@@ -200,23 +170,6 @@ impl<Item: FastValue, C: Column + Clone> Column<Item> for FastFieldReaderCodecWr
self.get_u64(idx)
}
/// Fills an output buffer with the fast field values
/// associated with the `DocId` going from
/// `start` to `start + output.len()`.
///
/// Regardless of the type of `Item`, this method works
/// - transmuting the output array
/// - extracting the `Item`s as if they were `u64`
/// - possibly converting the `u64` value to the right type.
///
/// # Panics
///
/// May panic if `start + output.len()` is greater than
/// the segment's `maxdoc`.
fn get_range(&self, start: u64, output: &mut [Item]) {
self.get_range_u64(start, output);
}
/// Returns the minimum value for this fast field.
///
/// The max value does not take in account of possible

View File

@@ -471,15 +471,13 @@ mod tests_indexsorting {
let multi_numbers = index.schema().get_field("multi_numbers").unwrap();
let multifield = fast_fields.u64s(multi_numbers).unwrap();
let mut vals = vec![];
multifield.get_vals(0u32, &mut vals);
let vals = multifield.get_vals(0u32).collect::<Vec<_>>();
assert_eq!(vals, &[] as &[u64]);
let mut vals = vec![];
multifield.get_vals(1u32, &mut vals);
let vals = multifield.get_vals(1u32).collect::<Vec<_>>();
assert_eq!(vals, &[5, 6]);
let mut vals = vec![];
multifield.get_vals(2u32, &mut vals);
let vals = multifield.get_vals(2u32).collect::<Vec<_>>();
assert_eq!(vals, &[3]);
Ok(())
}

View File

@@ -1535,13 +1535,11 @@ mod tests {
let ff_reader = segment_reader.fast_fields().u64s(multi_numbers).unwrap();
let bool_ff_reader = segment_reader.fast_fields().bools(multi_bools).unwrap();
for doc in segment_reader.doc_ids_alive() {
let mut vals = vec![];
ff_reader.get_vals(doc, &mut vals);
let vals = ff_reader.get_vals(doc).collect::<Vec<_>>();
assert_eq!(vals.len(), 2);
assert_eq!(vals[0], vals[1]);
let mut bool_vals = vec![];
bool_ff_reader.get_vals(doc, &mut bool_vals);
let bool_vals = bool_ff_reader.get_vals(doc).collect::<Vec<_>>();
assert_eq!(bool_vals.len(), 2);
assert_ne!(bool_vals[0], bool_vals[1]);

View File

@@ -578,6 +578,7 @@ impl IndexMerger {
stats: FastFieldStats,
}
impl<'a> Column for FieldIndexAccessProvider<'a> {
#[inline]
fn get_val(&self, doc: u64) -> u64 {
self.offsets[doc as usize]
}
@@ -668,15 +669,13 @@ impl IndexMerger {
{
let mut serialize_vals =
fast_field_serializer.new_u64_fast_field_with_idx(field, 0u64, max_term_ord, 1)?;
let mut vals = Vec::with_capacity(100);
for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() {
let term_ordinal_mapping: &[TermOrdinal] =
term_ordinal_mappings.get_segment(old_doc_addr.segment_ord as usize);
let ff_reader = &fast_field_reader[old_doc_addr.segment_ord as usize];
ff_reader.get_vals(old_doc_addr.doc_id, &mut vals);
for &prev_term_ord in &vals {
for prev_term_ord in ff_reader.get_vals(old_doc_addr.doc_id) {
let new_term_ord = term_ordinal_mapping[prev_term_ord as usize];
serialize_vals.add_val(new_term_ord)?;
}
@@ -729,8 +728,6 @@ impl IndexMerger {
let mut max_value = u64::MIN;
let mut num_vals = 0;
let mut vals = Vec::with_capacity(100);
let mut ff_readers = Vec::new();
// Our values are bitpacked and we need to know what should be
@@ -748,12 +745,11 @@ impl IndexMerger {
Please report.",
);
for doc in reader.doc_ids_alive() {
ff_reader.get_vals(doc, &mut vals);
for &val in &vals {
for val in ff_reader.get_vals(doc) {
min_value = cmp::min(val, min_value);
max_value = cmp::max(val, max_value);
num_vals += 1;
}
num_vals += vals.len();
}
ff_readers.push(ff_reader);
// TODO optimize when no deletes
@@ -796,11 +792,10 @@ impl IndexMerger {
let num_vals = self.fast_field_readers[old_doc_addr.segment_ord as usize]
.get_len(old_doc_addr.doc_id);
assert!(num_vals >= pos_in_values);
let mut vals = Vec::new();
self.fast_field_readers[old_doc_addr.segment_ord as usize]
.get_vals(old_doc_addr.doc_id, &mut vals);
vals[pos_in_values as usize]
.get_vals(old_doc_addr.doc_id)
.nth(pos_in_values as usize)
.expect("computation error in SortedDocIdMultiValueAccessProvider")
}
fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
@@ -810,9 +805,7 @@ impl IndexMerger {
.flat_map(|old_doc_addr| {
let ff_reader =
&self.fast_field_readers[old_doc_addr.segment_ord as usize];
let mut vals = Vec::new();
ff_reader.get_vals(old_doc_addr.doc_id, &mut vals);
vals.into_iter()
ff_reader.get_vals(old_doc_addr.doc_id)
}),
)
}
@@ -1975,49 +1968,32 @@ mod tests {
}
let reader = index.reader()?;
let searcher = reader.searcher();
let mut vals: Vec<u64> = Vec::new();
{
let segment = searcher.segment_reader(0u32);
let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
ff_reader.get_vals(0, &mut vals);
assert_eq!(&vals, &[1, 2]);
ff_reader.get_vals(1, &mut vals);
assert_eq!(&vals, &[1, 2, 3]);
ff_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[4, 5]);
ff_reader.get_vals(3, &mut vals);
assert_eq!(&vals, &[1, 2]);
ff_reader.get_vals(4, &mut vals);
assert_eq!(&vals, &[1, 5]);
ff_reader.get_vals(5, &mut vals);
assert_eq!(&vals, &[3]);
ff_reader.get_vals(6, &mut vals);
assert_eq!(&vals, &[17]);
assert_eq!(&ff_reader.get_vals(0).collect::<Vec<_>>(), &[1, 2]);
assert_eq!(&ff_reader.get_vals(1).collect::<Vec<_>>(), &[1, 2, 3]);
assert_eq!(&ff_reader.get_vals(2).collect::<Vec<_>>(), &[4, 5]);
assert_eq!(&ff_reader.get_vals(3).collect::<Vec<_>>(), &[1, 2]);
assert_eq!(&ff_reader.get_vals(4).collect::<Vec<_>>(), &[1, 5]);
assert_eq!(&ff_reader.get_vals(5).collect::<Vec<_>>(), &[3]);
assert_eq!(&ff_reader.get_vals(6).collect::<Vec<_>>(), &[17]);
}
{
let segment = searcher.segment_reader(1u32);
let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
ff_reader.get_vals(0, &mut vals);
assert_eq!(&vals, &[28, 27]);
assert_eq!(&ff_reader.get_vals(0).collect::<Vec<_>>(), &[28, 27]);
ff_reader.get_vals(1, &mut vals);
assert_eq!(&vals, &[1_000]);
assert_eq!(&ff_reader.get_vals(1).collect::<Vec<_>>(), &[1000]);
}
{
let segment = searcher.segment_reader(2u32);
let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
ff_reader.get_vals(0, &mut vals);
assert_eq!(&vals, &[20]);
assert_eq!(&ff_reader.get_vals(0).collect::<Vec<_>>(), &[20]);
}
// Merging the segments
@@ -2034,35 +2010,16 @@ mod tests {
let segment = searcher.segment_reader(0u32);
let ff_reader = segment.fast_fields().u64s(int_field).unwrap();
ff_reader.get_vals(0, &mut vals);
assert_eq!(&vals, &[1, 2]);
ff_reader.get_vals(1, &mut vals);
assert_eq!(&vals, &[1, 2, 3]);
ff_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[4, 5]);
ff_reader.get_vals(3, &mut vals);
assert_eq!(&vals, &[1, 2]);
ff_reader.get_vals(4, &mut vals);
assert_eq!(&vals, &[1, 5]);
ff_reader.get_vals(5, &mut vals);
assert_eq!(&vals, &[3]);
ff_reader.get_vals(6, &mut vals);
assert_eq!(&vals, &[17]);
ff_reader.get_vals(7, &mut vals);
assert_eq!(&vals, &[28, 27]);
ff_reader.get_vals(8, &mut vals);
assert_eq!(&vals, &[1_000]);
ff_reader.get_vals(9, &mut vals);
assert_eq!(&vals, &[20]);
assert_eq!(&ff_reader.get_vals(0).collect::<Vec<_>>(), &[1, 2]);
assert_eq!(&ff_reader.get_vals(1).collect::<Vec<_>>(), &[1, 2, 3]);
assert_eq!(&ff_reader.get_vals(2).collect::<Vec<_>>(), &[4, 5]);
assert_eq!(&ff_reader.get_vals(3).collect::<Vec<_>>(), &[1, 2]);
assert_eq!(&ff_reader.get_vals(4).collect::<Vec<_>>(), &[1, 5]);
assert_eq!(&ff_reader.get_vals(5).collect::<Vec<_>>(), &[3]);
assert_eq!(&ff_reader.get_vals(6).collect::<Vec<_>>(), &[17]);
assert_eq!(&ff_reader.get_vals(7).collect::<Vec<_>>(), &[28, 27]);
assert_eq!(&ff_reader.get_vals(8).collect::<Vec<_>>(), &[1_000]);
assert_eq!(&ff_reader.get_vals(9).collect::<Vec<_>>(), &[20]);
}
Ok(())
}

View File

@@ -383,9 +383,7 @@ mod tests {
assert_eq!(fast_field.get_val(5), 1_000u64);
let get_vals = |fast_field: &MultiValuedFastFieldReader<u64>, doc_id: u32| -> Vec<u64> {
let mut vals = vec![];
fast_field.get_vals(doc_id, &mut vals);
vals
fast_field.get_vals(doc_id).collect()
};
let fast_fields = segment_reader.fast_fields();
let fast_field = fast_fields.u64s(multi_numbers).unwrap();

View File

@@ -639,10 +639,7 @@ Survey in 2016, 2017, and 2018."#;
#[test]
fn test_collapse_overlapped_ranges() {
assert_eq!(&collapse_overlapped_ranges(&[0..1, 2..3,]), &[0..1, 2..3]);
assert_eq!(
collapse_overlapped_ranges(&vec![0..1, 1..2,]),
vec![0..1, 1..2]
);
assert_eq!(collapse_overlapped_ranges(&[0..1, 1..2,]), &[0..1, 1..2]);
assert_eq!(collapse_overlapped_ranges(&[0..2, 1..2,]), vec![0..2]);
assert_eq!(collapse_overlapped_ranges(&[0..2, 1..3,]), vec![0..3]);
assert_eq!(collapse_overlapped_ranges(&[0..3, 1..2,]), vec![0..3]);