mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 21:20:40 +00:00
remove dynamic dispatch
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::ColumnIter;
|
||||
|
||||
pub trait Column<T = u64> {
|
||||
/// Return the value associated to the given idx.
|
||||
///
|
||||
@@ -16,8 +18,12 @@ pub trait Column<T = u64> {
|
||||
///
|
||||
/// May panic if `range.end()` is greater than
|
||||
/// the segment's `maxdoc`.
|
||||
fn get_range(&self, range: Range<u64>) -> Box<dyn Iterator<Item = T> + '_> {
|
||||
Box::new(range.map(|idx| self.get_val(idx)))
|
||||
#[inline]
|
||||
fn get_range(&self, range: Range<u64>) -> ColumnIter<'_, Self, T>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
ColumnIter::new(self, range)
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
|
||||
@@ -4,6 +4,8 @@ extern crate more_asserts;
|
||||
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Range;
|
||||
|
||||
use common::BinarySerializable;
|
||||
use ownedbytes::OwnedBytes;
|
||||
@@ -112,6 +114,41 @@ impl<'a> Column for &'a [u64] {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ColumnIter<'a, C: Column<I>, I> {
|
||||
column: &'a C,
|
||||
end_pos: u64,
|
||||
current_pos: u64,
|
||||
_phantom: PhantomData<I>,
|
||||
}
|
||||
|
||||
impl<'a, C: Column<I>, I> ColumnIter<'a, C, I> {
|
||||
#[inline]
|
||||
pub fn new(col: &'a C, range: Range<u64>) -> Self {
|
||||
let current_pos = range.start;
|
||||
Self {
|
||||
column: col,
|
||||
end_pos: range.end,
|
||||
current_pos,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C: Column<I>, I> Iterator for ColumnIter<'a, C, I> {
|
||||
type Item = I;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.current_pos < self.end_pos {
|
||||
let val = self.column.get_val(self.current_pos);
|
||||
self.current_pos += 1;
|
||||
Some(val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Column for Vec<u64> {
|
||||
fn get_val(&self, position: u64) -> u64 {
|
||||
self[position as usize]
|
||||
|
||||
@@ -982,6 +982,67 @@ mod bench {
|
||||
use super::*;
|
||||
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
||||
use crate::fastfield::tests::generate_permutation_gcd;
|
||||
use crate::schema::{NumericOptions, Schema};
|
||||
use crate::Document;
|
||||
|
||||
fn multi_values(num_docs: usize, vals_per_doc: usize) -> Vec<Vec<u64>> {
|
||||
let mut vals = vec![];
|
||||
for _i in 0..num_docs {
|
||||
let mut block = vec![];
|
||||
for j in 0..vals_per_doc {
|
||||
block.push(j as u64);
|
||||
}
|
||||
vals.push(block);
|
||||
}
|
||||
|
||||
vals
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_multi_value_fflookup(b: &mut Bencher) {
|
||||
let num_docs = 100_000;
|
||||
|
||||
let path = Path::new("test");
|
||||
let directory: RamDirectory = RamDirectory::create();
|
||||
{
|
||||
let options = NumericOptions::default().set_fast(Cardinality::MultiValues);
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder.add_u64_field("field", options);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
for block in &multi_values(num_docs, 3) {
|
||||
let mut doc = Document::new();
|
||||
for val in block {
|
||||
doc.add_u64(field, *val);
|
||||
}
|
||||
fast_field_writers.add_document(&doc);
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(&path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||
let data_idx = fast_fields_composite.open_read_with_idx(*FIELD, 0).unwrap();
|
||||
let idx_reader = DynamicFastFieldReader::<u64>::open(data_idx).unwrap();
|
||||
|
||||
let data_vals = fast_fields_composite.open_read_with_idx(*FIELD, 1).unwrap();
|
||||
let vals_reader = DynamicFastFieldReader::<u64>::open(data_vals).unwrap();
|
||||
let fast_field_reader = MultiValuedFastFieldReader::open(idx_reader, vals_reader);
|
||||
b.iter(|| {
|
||||
let mut sum = 0u64;
|
||||
for i in 0u32..num_docs as u32 {
|
||||
sum += fast_field_reader.get_vals(i).sum::<u64>();
|
||||
}
|
||||
sum
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_intfastfield_linear_veclookup(b: &mut Bencher) {
|
||||
|
||||
@@ -18,7 +18,9 @@ pub struct MultiValuedFastFieldReader<Item: FastValue> {
|
||||
vals_reader: DynamicFastFieldReader<Item>,
|
||||
}
|
||||
|
||||
impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
|
||||
impl<Item: FastValue> MultiValuedFastFieldReader<Item>
|
||||
where DynamicFastFieldReader<Item>: Column<Item>
|
||||
{
|
||||
pub(crate) fn open(
|
||||
idx_reader: DynamicFastFieldReader<u64>,
|
||||
vals_reader: DynamicFastFieldReader<Item>,
|
||||
@@ -41,7 +43,7 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
|
||||
|
||||
/// Returns the array of values associated to the given `doc`.
|
||||
#[inline]
|
||||
pub fn get_vals(&self, doc: DocId) -> Box<dyn Iterator<Item = Item> + '_> {
|
||||
pub fn get_vals(&self, doc: DocId) -> impl Iterator<Item = Item> + '_ {
|
||||
let range = self.range(doc);
|
||||
self.vals_reader.get_range(range)
|
||||
}
|
||||
|
||||
@@ -40,40 +40,39 @@ impl<Item: FastValue> DynamicFastFieldReader<Item> {
|
||||
mut bytes: OwnedBytes,
|
||||
codec_type: FastFieldCodecType,
|
||||
) -> crate::Result<DynamicFastFieldReader<Item>> {
|
||||
let reader = match codec_type {
|
||||
FastFieldCodecType::Bitpacked => {
|
||||
DynamicFastFieldReader::Bitpacked(BitpackedCodec::open_from_bytes(bytes)?.into())
|
||||
}
|
||||
FastFieldCodecType::Linear => {
|
||||
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
|
||||
}
|
||||
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
|
||||
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::Gcd => {
|
||||
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
|
||||
match codec_type {
|
||||
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
|
||||
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
|
||||
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::BlockwiseLinear => {
|
||||
DynamicFastFieldReader::BlockwiseLinearGCD(
|
||||
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
|
||||
)
|
||||
}
|
||||
FastFieldCodecType::Gcd => {
|
||||
return Err(DataCorruption::comment_only(
|
||||
let reader =
|
||||
match codec_type {
|
||||
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::Bitpacked(
|
||||
BitpackedCodec::open_from_bytes(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::Linear => {
|
||||
DynamicFastFieldReader::Linear(LinearCodec::open_from_bytes(bytes)?.into())
|
||||
}
|
||||
FastFieldCodecType::BlockwiseLinear => DynamicFastFieldReader::BlockwiseLinear(
|
||||
BlockwiseLinearCodec::open_from_bytes(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::Gcd => {
|
||||
let codec_type = FastFieldCodecType::deserialize(&mut bytes)?;
|
||||
match codec_type {
|
||||
FastFieldCodecType::Bitpacked => DynamicFastFieldReader::BitpackedGCD(
|
||||
open_gcd_from_bytes::<BitpackedCodec>(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::Linear => DynamicFastFieldReader::LinearGCD(
|
||||
open_gcd_from_bytes::<LinearCodec>(bytes)?.into(),
|
||||
),
|
||||
FastFieldCodecType::BlockwiseLinear => {
|
||||
DynamicFastFieldReader::BlockwiseLinearGCD(
|
||||
open_gcd_from_bytes::<BlockwiseLinearCodec>(bytes)?.into(),
|
||||
)
|
||||
}
|
||||
FastFieldCodecType::Gcd => return Err(DataCorruption::comment_only(
|
||||
"Gcd codec wrapped into another gcd codec. This combination is not \
|
||||
allowed.",
|
||||
)
|
||||
.into())
|
||||
.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
|
||||
@@ -578,6 +578,7 @@ impl IndexMerger {
|
||||
stats: FastFieldStats,
|
||||
}
|
||||
impl<'a> Column for FieldIndexAccessProvider<'a> {
|
||||
#[inline]
|
||||
fn get_val(&self, doc: u64) -> u64 {
|
||||
self.offsets[doc as usize]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user