remove dynamic dispatch

This commit is contained in:
Pascal Seitz
2022-08-31 11:17:42 +02:00
parent b977f763d7
commit ae7b72ad6f
6 changed files with 139 additions and 33 deletions

View File

@@ -1,5 +1,7 @@
use std::ops::Range;
use crate::ColumnIter;
pub trait Column<T = u64> {
/// Return the value associated to the given idx.
///
@@ -16,8 +18,12 @@ pub trait Column<T = u64> {
///
/// May panic if `range.end()` is greater than
/// the segment's `maxdoc`.
fn get_range(&self, range: Range<u64>) -> Box<dyn Iterator<Item = T> + '_> {
Box::new(range.map(|idx| self.get_val(idx)))
#[inline]
fn get_range(&self, range: Range<u64>) -> ColumnIter<'_, Self, T>
where
Self: Sized,
{
ColumnIter::new(self, range)
}
/// Returns the minimum value for this fast field.

View File

@@ -4,6 +4,8 @@ extern crate more_asserts;
use std::io;
use std::io::Write;
use std::marker::PhantomData;
use std::ops::Range;
use common::BinarySerializable;
use ownedbytes::OwnedBytes;
@@ -112,6 +114,41 @@ impl<'a> Column for &'a [u64] {
}
}
pub struct ColumnIter<'a, C: Column<I>, I> {
column: &'a C,
end_pos: u64,
current_pos: u64,
_phantom: PhantomData<I>,
}
impl<'a, C: Column<I>, I> ColumnIter<'a, C, I> {
#[inline]
pub fn new(col: &'a C, range: Range<u64>) -> Self {
let current_pos = range.start;
Self {
column: col,
end_pos: range.end,
current_pos,
_phantom: PhantomData,
}
}
}
impl<'a, C: Column<I>, I> Iterator for ColumnIter<'a, C, I> {
type Item = I;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.current_pos < self.end_pos {
let val = self.column.get_val(self.current_pos);
self.current_pos += 1;
Some(val)
} else {
None
}
}
}
impl Column for Vec<u64> {
fn get_val(&self, position: u64) -> u64 {
self[position as usize]

View File

@@ -982,6 +982,67 @@ mod bench {
use super::*;
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::fastfield::tests::generate_permutation_gcd;
use crate::schema::{NumericOptions, Schema};
use crate::Document;
fn multi_values(num_docs: usize, vals_per_doc: usize) -> Vec<Vec<u64>> {
let mut vals = vec![];
for _i in 0..num_docs {
let mut block = vec![];
for j in 0..vals_per_doc {
block.push(j as u64);
}
vals.push(block);
}
vals
}
#[bench]
fn bench_multi_value_fflookup(b: &mut Bencher) {
let num_docs = 100_000;
let path = Path::new("test");
let directory: RamDirectory = RamDirectory::create();
{
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field("field", options);
let schema = schema_builder.build();
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
for block in &multi_values(num_docs, 3) {
let mut doc = Document::new();
for val in block {
doc.add_u64(field, *val);
}
fast_field_writers.add_document(&doc);
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
serializer.close().unwrap();
}
let file = directory.open_read(&path).unwrap();
{
let fast_fields_composite = CompositeFile::open(&file).unwrap();
let data_idx = fast_fields_composite.open_read_with_idx(*FIELD, 0).unwrap();
let idx_reader = DynamicFastFieldReader::<u64>::open(data_idx).unwrap();
let data_vals = fast_fields_composite.open_read_with_idx(*FIELD, 1).unwrap();
let vals_reader = DynamicFastFieldReader::<u64>::open(data_vals).unwrap();
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
b.iter(|| {
let mut sum = 0u64;
for i in 0u32..num_docs as u32 {
sum += fast_field_reader.get_vals(i).sum::<u64>();
}
sum
});
}
}
#[bench]
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {

View File

@@ -18,7 +18,9 @@ pub struct MultiValuedFastFieldReader<Item: FastValue> {
vals_reader: DynamicFastFieldReader<Item>,
}
impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
impl<Item: FastValue> MultiValuedFastFieldReader<Item>
where DynamicFastFieldReader<Item>: Column<Item>
{
pub(crate) fn open(
idx_reader: DynamicFastFieldReader<u64>,
vals_reader: DynamicFastFieldReader<Item>,
@@ -41,7 +43,7 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
/// Returns the array of values associated to the given `doc`.
#[inline]
pub fn get_vals(&self, doc: DocId) -> Box<dyn Iterator<Item = Item> + '_> {
pub fn get_vals(&self, doc: DocId) -> impl Iterator<Item = Item> + '_ {
let range = self.range(doc);
self.vals_reader.get_range(range)
}

View File

@@ -40,40 +40,39 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
mut bytes: OwnedBytes,
codec_type: FastFieldCodecType,
) -> crate::Result<DynamicFastFieldReader<Item>> {
let reader = match codec_type {
FastFieldCodecType::Bitpacked => {
DynamicFastFieldReader::Bitpacked(BitpackedCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::Linear => {
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Gcd => {
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
),
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
),
FastFieldCodecType::BlockwiseLinear => {
DynamicFastFieldReader::BlockwiseLinearGCD(
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
)
}
FastFieldCodecType::Gcd => {
return Err(DataCorruption::comment_only(
let reader =
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::Bitpacked(
BitpackedCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Linear => {
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
}
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
),
FastFieldCodecType::Gcd => {
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
match codec_type {
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
),
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
),
FastFieldCodecType::BlockwiseLinear => {
DynamicFastFieldReader::BlockwiseLinearGCD(
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
)
}
FastFieldCodecType::Gcd => return Err(DataCorruption::comment_only(
"Gcd codec wrapped into another gcd codec. This combination is not \
allowed.",
)
.into())
.into()),
}
}
}
};
};
Ok(reader)
}

View File

@@ -578,6 +578,7 @@ impl IndexMerger {
stats: FastFieldStats,
}
impl<'a> Column for FieldIndexAccessProvider<'a> {
#[inline]
fn get_val(&self, doc: u64) -> u64 {
self.offsets[doc as usize]
}