mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
Probably a waste of time
This commit is contained in:
@@ -29,14 +29,15 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug>(
|
||||
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug, I>(
|
||||
column_index: SerializableColumnIndex<'_>,
|
||||
column_values: &impl Iterable<T>,
|
||||
column_values: &dyn Fn() -> I,
|
||||
output: &mut impl Write,
|
||||
) -> io::Result<()> {
|
||||
) -> io::Result<()>
|
||||
where I: Iterator<Item=T> {
|
||||
let column_index_num_bytes = serialize_column_index(column_index, output)?;
|
||||
serialize_u64_based_column_values(
|
||||
|| column_values.boxed_iter(),
|
||||
column_values,
|
||||
&[CodecType::Bitpacked, CodecType::BlockwiseLinear],
|
||||
output,
|
||||
)?;
|
||||
|
||||
@@ -25,19 +25,22 @@ pub fn stack_column_index<'a>(
|
||||
let cardinality = detect_cardinality(columns);
|
||||
match cardinality {
|
||||
Cardinality::Full => SerializableColumnIndex::Full,
|
||||
Cardinality::Optional => SerializableColumnIndex::Optional {
|
||||
non_null_row_ids: Box::new(StackedOptionalIndex {
|
||||
Cardinality::Optional => {
|
||||
let stacked_optional_index: StackedOptionalIndex<'a> = StackedOptionalIndex {
|
||||
columns,
|
||||
stack_merge_order,
|
||||
}),
|
||||
num_rows: stack_merge_order.num_rows(),
|
||||
};
|
||||
SerializableColumnIndex::Optional {
|
||||
non_null_row_ids: Box::new(move || Box::new(stacked_optional_index.iter())),
|
||||
num_rows: stack_merge_order.num_rows(),
|
||||
}
|
||||
},
|
||||
Cardinality::Multivalued => {
|
||||
let stacked_multivalued_index = StackedMultivaluedIndex {
|
||||
columns,
|
||||
stack_merge_order,
|
||||
};
|
||||
SerializableColumnIndex::Multivalued(Box::new(stacked_multivalued_index))
|
||||
SerializableColumnIndex::Multivalued(Box::new(move || stacked_multivalued_index.boxed_iter()))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -47,8 +50,8 @@ struct StackedOptionalIndex<'a> {
|
||||
stack_merge_order: &'a StackMergeOrder,
|
||||
}
|
||||
|
||||
impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
|
||||
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
|
||||
impl<'a> StackedOptionalIndex<'a> {
|
||||
fn iter(&self) -> impl Iterator<Item=RowId> + 'a {
|
||||
Box::new(
|
||||
self.columns
|
||||
.iter()
|
||||
@@ -100,8 +103,8 @@ fn convert_column_opt_to_multivalued_index<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterable<RowId> for StackedMultivaluedIndex<'a> {
|
||||
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + '_> {
|
||||
impl<'a> StackedMultivaluedIndex<'a> {
|
||||
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
|
||||
let multivalued_indexes =
|
||||
self.columns
|
||||
.iter()
|
||||
|
||||
@@ -7,15 +7,14 @@ use common::OwnedBytes;
|
||||
|
||||
use crate::column_values::u64_based::CodecType;
|
||||
use crate::column_values::ColumnValues;
|
||||
use crate::iterable::Iterable;
|
||||
use crate::RowId;
|
||||
|
||||
pub fn serialize_multivalued_index(
|
||||
multivalued_index: &dyn Iterable<RowId>,
|
||||
pub fn serialize_multivalued_index<'a>(
|
||||
multivalued_index: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
|
||||
output: &mut impl Write,
|
||||
) -> io::Result<()> {
|
||||
crate::column_values::u64_based::serialize_u64_based_column_values(
|
||||
|| multivalued_index.boxed_iter(),
|
||||
multivalued_index,
|
||||
&[CodecType::Bitpacked, CodecType::Linear],
|
||||
output,
|
||||
)?;
|
||||
|
||||
@@ -343,13 +343,13 @@ fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -
|
||||
}
|
||||
|
||||
pub fn serialize_optional_index<'a, W: io::Write>(
|
||||
non_null_rows: &dyn Iterable<RowId>,
|
||||
non_null_rows: &dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
|
||||
num_rows: RowId,
|
||||
output: &mut W,
|
||||
) -> io::Result<()> {
|
||||
VInt(num_rows as u64).serialize(output)?;
|
||||
|
||||
let mut rows_it = non_null_rows.boxed_iter();
|
||||
let mut rows_it = non_null_rows();
|
||||
let mut block_metadata: Vec<SerializedBlockMeta> = Vec::new();
|
||||
let mut current_block = Vec::new();
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::io::Write;
|
||||
|
||||
use common::{CountingWriter, OwnedBytes};
|
||||
|
||||
use crate::column_index::multivalued_index::serialize_multivalued_index;
|
||||
use crate::column_index::multivalued_index::{serialize_multivalued_index, self};
|
||||
use crate::column_index::optional_index::serialize_optional_index;
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::iterable::Iterable;
|
||||
@@ -12,12 +12,12 @@ use crate::{Cardinality, RowId};
|
||||
pub enum SerializableColumnIndex<'a> {
|
||||
Full,
|
||||
Optional {
|
||||
non_null_row_ids: Box<dyn Iterable<RowId> + 'a>,
|
||||
non_null_row_ids: Box<dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> + 'a>,
|
||||
num_rows: RowId,
|
||||
},
|
||||
// TODO remove the Arc<dyn> apart from serialization this is not
|
||||
// dynamic at all.
|
||||
Multivalued(Box<dyn Iterable<RowId> + 'a>),
|
||||
Multivalued(&'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>),
|
||||
}
|
||||
|
||||
impl<'a> SerializableColumnIndex<'a> {
|
||||
@@ -30,8 +30,8 @@ impl<'a> SerializableColumnIndex<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_column_index(
|
||||
column_index: SerializableColumnIndex,
|
||||
pub fn serialize_column_index<'a>(
|
||||
column_index: SerializableColumnIndex<'a>,
|
||||
output: &mut impl Write,
|
||||
) -> io::Result<u32> {
|
||||
let mut output = CountingWriter::wrap(output);
|
||||
@@ -44,7 +44,8 @@ pub fn serialize_column_index(
|
||||
num_rows,
|
||||
} => serialize_optional_index(non_null_row_ids.as_ref(), num_rows, &mut output)?,
|
||||
SerializableColumnIndex::Multivalued(multivalued_index) => {
|
||||
serialize_multivalued_index(&*multivalued_index, &mut output)?
|
||||
let multivalued_index_ref: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> = multivalued_index.as_ref();
|
||||
serialize_multivalued_index(multivalued_index_ref, &mut output)?
|
||||
}
|
||||
}
|
||||
let column_index_num_bytes = output.written_bytes() as u32;
|
||||
|
||||
@@ -80,12 +80,6 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Ord> Iterable<T> for &'a [Arc<dyn ColumnValues<T>>] {
|
||||
fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
|
||||
Box::new(self.iter().flat_map(|column_value| column_value.iter()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
|
||||
#[inline(always)]
|
||||
fn get_val(&self, idx: u32) -> T {
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::columnar::writer::CompatibleNumericalTypes;
|
||||
use crate::columnar::ColumnarReader;
|
||||
use crate::dynamic_column::DynamicColumn;
|
||||
use crate::{
|
||||
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, MonotonicallyMappableToU128,
|
||||
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues,
|
||||
NumericalType, NumericalValue,
|
||||
};
|
||||
|
||||
@@ -78,7 +78,10 @@ pub fn merge_column(
|
||||
}
|
||||
let merged_column_index =
|
||||
crate::column_index::stack_column_index(&column_indexes[..], merge_row_order);
|
||||
serialize_column_mappable_to_u64(merged_column_index, &&column_values[..], wrt)?;
|
||||
let stacked_columns_iterable = || column_values
|
||||
.iter()
|
||||
.flat_map(|column| column.iter());
|
||||
serialize_column_mappable_to_u64(merged_column_index, &stacked_columns_iterable, wrt)?;
|
||||
}
|
||||
ColumnType::IpAddr => {
|
||||
let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::columnar::column_type::{ColumnType, ColumnTypeCategory};
|
||||
use crate::columnar::writer::column_writers::{
|
||||
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
|
||||
};
|
||||
use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders};
|
||||
use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders, OptionalIndexBuilder};
|
||||
use crate::dictionary::{DictionaryBuilder, TermIdMapping, UnorderedId};
|
||||
use crate::value::{Coerce, NumericalType, NumericalValue};
|
||||
use crate::{Cardinality, RowId};
|
||||
@@ -572,17 +572,17 @@ where
|
||||
Cardinality::Optional => {
|
||||
let optional_index_builder = value_index_builders.borrow_optional_index_builder();
|
||||
consume_operation_iterator(op_iterator, optional_index_builder, values);
|
||||
let optional_index = optional_index_builder.finish(num_rows);
|
||||
let non_null_rows: &[u32] = optional_index_builder.finish(num_rows);
|
||||
SerializableColumnIndex::Optional {
|
||||
num_rows,
|
||||
non_null_row_ids: Box::new(optional_index),
|
||||
non_null_row_ids: Box::new(|| Box::new(non_null_rows.iter().copied())),
|
||||
}
|
||||
}
|
||||
Cardinality::Multivalued => {
|
||||
let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
|
||||
consume_operation_iterator(op_iterator, multivalued_index_builder, values);
|
||||
let multivalued_index = multivalued_index_builder.finish(num_rows);
|
||||
SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
|
||||
SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
|
||||
}
|
||||
};
|
||||
crate::column::serialize_column_mappable_to_u128(
|
||||
@@ -603,17 +603,17 @@ fn sort_values_within_row_in_place(multivalued_index: &[RowId], values: &mut Vec
|
||||
}
|
||||
}
|
||||
|
||||
fn send_to_serialize_column_mappable_to_u64(
|
||||
fn send_to_serialize_column_mappable_to_u64<'a>(
|
||||
op_iterator: impl Iterator<Item = ColumnOperation<u64>>,
|
||||
cardinality: Cardinality,
|
||||
num_rows: RowId,
|
||||
sort_values_within_row: bool,
|
||||
value_index_builders: &mut PreallocatedIndexBuilders,
|
||||
value_index_builders: &'a mut PreallocatedIndexBuilders,
|
||||
values: &mut Vec<u64>,
|
||||
mut wrt: impl io::Write,
|
||||
) -> io::Result<()>
|
||||
where
|
||||
for<'a> VecColumn<'a, u64>: ColumnValues<u64>,
|
||||
for<'b> VecColumn<'b, u64>: ColumnValues<u64>,
|
||||
{
|
||||
values.clear();
|
||||
let serializable_column_index = match cardinality {
|
||||
@@ -626,11 +626,11 @@ where
|
||||
SerializableColumnIndex::Full
|
||||
}
|
||||
Cardinality::Optional => {
|
||||
let optional_index_builder = value_index_builders.borrow_optional_index_builder();
|
||||
let optional_index_builder: &'a mut OptionalIndexBuilder = value_index_builders.borrow_optional_index_builder();
|
||||
consume_operation_iterator(op_iterator, optional_index_builder, values);
|
||||
let optional_index = optional_index_builder.finish(num_rows);
|
||||
let optional_index: &'a [u32] = optional_index_builder.finish(num_rows);
|
||||
SerializableColumnIndex::Optional {
|
||||
non_null_row_ids: Box::new(optional_index),
|
||||
non_null_row_ids: Box::new(move || Box::new(optional_index.iter().copied())),
|
||||
num_rows,
|
||||
}
|
||||
}
|
||||
@@ -641,12 +641,12 @@ where
|
||||
if sort_values_within_row {
|
||||
sort_values_within_row_in_place(multivalued_index, values);
|
||||
}
|
||||
SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
|
||||
SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
|
||||
}
|
||||
};
|
||||
crate::column::serialize_column_mappable_to_u64(
|
||||
serializable_column_index,
|
||||
&&values[..],
|
||||
&|| values.iter().copied(),
|
||||
&mut wrt,
|
||||
)?;
|
||||
Ok(())
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::iterable::Iterable;
|
||||
use crate::RowId;
|
||||
|
||||
/// The `IndexBuilder` interprets a sequence of
|
||||
@@ -29,7 +28,7 @@ pub struct OptionalIndexBuilder {
|
||||
}
|
||||
|
||||
impl OptionalIndexBuilder {
|
||||
pub fn finish<'a>(&'a mut self, num_rows: RowId) -> impl Iterable<RowId> + 'a {
|
||||
pub fn finish<'a>(&'a mut self, num_rows: RowId) -> &'a [RowId] {
|
||||
debug_assert!(self
|
||||
.docs
|
||||
.last()
|
||||
@@ -123,20 +122,14 @@ mod tests {
|
||||
opt_value_index_builder.record_row(0u32);
|
||||
opt_value_index_builder.record_value();
|
||||
assert_eq!(
|
||||
&opt_value_index_builder
|
||||
.finish(1u32)
|
||||
.boxed_iter()
|
||||
.collect::<Vec<u32>>(),
|
||||
&opt_value_index_builder.finish(1u32),
|
||||
&[0]
|
||||
);
|
||||
opt_value_index_builder.reset();
|
||||
opt_value_index_builder.record_row(1u32);
|
||||
opt_value_index_builder.record_value();
|
||||
assert_eq!(
|
||||
&opt_value_index_builder
|
||||
.finish(2u32)
|
||||
.boxed_iter()
|
||||
.collect::<Vec<u32>>(),
|
||||
&opt_value_index_builder.finish(2u32),
|
||||
&[1]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::iter::Map;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Range;
|
||||
|
||||
@@ -36,15 +35,6 @@ where F: Fn() -> Box<dyn Iterator<Item = T>>
|
||||
}
|
||||
}
|
||||
|
||||
// impl<F, I, T> Iterable<T> for F
|
||||
// where
|
||||
// I: Iterator<Item = T>,
|
||||
// F: Fn() -> I,
|
||||
//{
|
||||
// fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
|
||||
// Box::new(self())
|
||||
//}
|
||||
|
||||
pub fn map_iterable<U, V, F, I>(
|
||||
original_iterable: impl Fn() -> I,
|
||||
transform: F,
|
||||
|
||||
Reference in New Issue
Block a user