Probably a waste of time

This commit is contained in:
Paul Masurel
2023-02-02 11:32:56 +01:00
parent aa8408a979
commit e70acee748
10 changed files with 49 additions and 65 deletions

View File

@@ -29,14 +29,15 @@ where
Ok(())
}
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug>(
pub fn serialize_column_mappable_to_u64<T: MonotonicallyMappableToU64 + Debug, I>(
column_index: SerializableColumnIndex<'_>,
column_values: &impl Iterable<T>,
column_values: &dyn Fn() -> I,
output: &mut impl Write,
) -> io::Result<()> {
) -> io::Result<()>
where I: Iterator<Item=T> {
let column_index_num_bytes = serialize_column_index(column_index, output)?;
serialize_u64_based_column_values(
|| column_values.boxed_iter(),
column_values,
&[CodecType::Bitpacked, CodecType::BlockwiseLinear],
output,
)?;

View File

@@ -25,19 +25,22 @@ pub fn stack_column_index<'a>(
let cardinality = detect_cardinality(columns);
match cardinality {
Cardinality::Full => SerializableColumnIndex::Full,
Cardinality::Optional => SerializableColumnIndex::Optional {
non_null_row_ids: Box::new(StackedOptionalIndex {
Cardinality::Optional => {
let stacked_optional_index: StackedOptionalIndex<'a> = StackedOptionalIndex {
columns,
stack_merge_order,
}),
num_rows: stack_merge_order.num_rows(),
};
SerializableColumnIndex::Optional {
non_null_row_ids: Box::new(move || Box::new(stacked_optional_index.iter())),
num_rows: stack_merge_order.num_rows(),
}
},
Cardinality::Multivalued => {
let stacked_multivalued_index = StackedMultivaluedIndex {
columns,
stack_merge_order,
};
SerializableColumnIndex::Multivalued(Box::new(stacked_multivalued_index))
SerializableColumnIndex::Multivalued(Box::new(move || stacked_multivalued_index.boxed_iter()))
}
}
}
@@ -47,8 +50,8 @@ struct StackedOptionalIndex<'a> {
stack_merge_order: &'a StackMergeOrder,
}
impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
impl<'a> StackedOptionalIndex<'a> {
fn iter(&self) -> impl Iterator<Item=RowId> + 'a {
Box::new(
self.columns
.iter()
@@ -100,8 +103,8 @@ fn convert_column_opt_to_multivalued_index<'a>(
}
}
impl<'a> Iterable<RowId> for StackedMultivaluedIndex<'a> {
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + '_> {
impl<'a> StackedMultivaluedIndex<'a> {
fn boxed_iter(&self) -> Box<dyn Iterator<Item = RowId> + 'a> {
let multivalued_indexes =
self.columns
.iter()

View File

@@ -7,15 +7,14 @@ use common::OwnedBytes;
use crate::column_values::u64_based::CodecType;
use crate::column_values::ColumnValues;
use crate::iterable::Iterable;
use crate::RowId;
pub fn serialize_multivalued_index(
multivalued_index: &dyn Iterable<RowId>,
pub fn serialize_multivalued_index<'a>(
multivalued_index: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
output: &mut impl Write,
) -> io::Result<()> {
crate::column_values::u64_based::serialize_u64_based_column_values(
|| multivalued_index.boxed_iter(),
multivalued_index,
&[CodecType::Bitpacked, CodecType::Linear],
output,
)?;

View File

@@ -343,13 +343,13 @@ fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -
}
pub fn serialize_optional_index<'a, W: io::Write>(
non_null_rows: &dyn Iterable<RowId>,
non_null_rows: &dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>,
num_rows: RowId,
output: &mut W,
) -> io::Result<()> {
VInt(num_rows as u64).serialize(output)?;
let mut rows_it = non_null_rows.boxed_iter();
let mut rows_it = non_null_rows();
let mut block_metadata: Vec<SerializedBlockMeta> = Vec::new();
let mut current_block = Vec::new();

View File

@@ -3,7 +3,7 @@ use std::io::Write;
use common::{CountingWriter, OwnedBytes};
use crate::column_index::multivalued_index::serialize_multivalued_index;
use crate::column_index::multivalued_index::{serialize_multivalued_index, self};
use crate::column_index::optional_index::serialize_optional_index;
use crate::column_index::ColumnIndex;
use crate::iterable::Iterable;
@@ -12,12 +12,12 @@ use crate::{Cardinality, RowId};
pub enum SerializableColumnIndex<'a> {
Full,
Optional {
non_null_row_ids: Box<dyn Iterable<RowId> + 'a>,
non_null_row_ids: Box<dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> + 'a>,
num_rows: RowId,
},
// TODO remove the Arc<dyn> apart from serialization this is not
// dynamic at all.
Multivalued(Box<dyn Iterable<RowId> + 'a>),
Multivalued(&'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a>),
}
impl<'a> SerializableColumnIndex<'a> {
@@ -30,8 +30,8 @@ impl<'a> SerializableColumnIndex<'a> {
}
}
pub fn serialize_column_index(
column_index: SerializableColumnIndex,
pub fn serialize_column_index<'a>(
column_index: SerializableColumnIndex<'a>,
output: &mut impl Write,
) -> io::Result<u32> {
let mut output = CountingWriter::wrap(output);
@@ -44,7 +44,8 @@ pub fn serialize_column_index(
num_rows,
} => serialize_optional_index(non_null_row_ids.as_ref(), num_rows, &mut output)?,
SerializableColumnIndex::Multivalued(multivalued_index) => {
serialize_multivalued_index(&*multivalued_index, &mut output)?
let multivalued_index_ref: &'a dyn Fn() -> Box<dyn Iterator<Item=RowId> + 'a> = multivalued_index.as_ref();
serialize_multivalued_index(multivalued_index_ref, &mut output)?
}
}
let column_index_num_bytes = output.written_bytes() as u32;

View File

@@ -80,12 +80,6 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync {
}
}
impl<'a, T: Ord> Iterable<T> for &'a [Arc<dyn ColumnValues<T>>] {
fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
Box::new(self.iter().flat_map(|column_value| column_value.iter()))
}
}
impl<T: Copy + PartialOrd + Debug> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
#[inline(always)]
fn get_val(&self, idx: u32) -> T {

View File

@@ -19,7 +19,7 @@ use crate::columnar::writer::CompatibleNumericalTypes;
use crate::columnar::ColumnarReader;
use crate::dynamic_column::DynamicColumn;
use crate::{
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, MonotonicallyMappableToU128,
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues,
NumericalType, NumericalValue,
};
@@ -78,7 +78,10 @@ pub fn merge_column(
}
let merged_column_index =
crate::column_index::stack_column_index(&column_indexes[..], merge_row_order);
serialize_column_mappable_to_u64(merged_column_index, &&column_values[..], wrt)?;
let stacked_columns_iterable = || column_values
.iter()
.flat_map(|column| column.iter());
serialize_column_mappable_to_u64(merged_column_index, &stacked_columns_iterable, wrt)?;
}
ColumnType::IpAddr => {
let mut column_indexes: Vec<Option<ColumnIndex>> = Vec::with_capacity(columns.len());

View File

@@ -20,7 +20,7 @@ use crate::columnar::column_type::{ColumnType, ColumnTypeCategory};
use crate::columnar::writer::column_writers::{
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
};
use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders};
use crate::columnar::writer::value_index::{IndexBuilder, PreallocatedIndexBuilders, OptionalIndexBuilder};
use crate::dictionary::{DictionaryBuilder, TermIdMapping, UnorderedId};
use crate::value::{Coerce, NumericalType, NumericalValue};
use crate::{Cardinality, RowId};
@@ -572,17 +572,17 @@ where
Cardinality::Optional => {
let optional_index_builder = value_index_builders.borrow_optional_index_builder();
consume_operation_iterator(op_iterator, optional_index_builder, values);
let optional_index = optional_index_builder.finish(num_rows);
let non_null_rows: &[u32] = optional_index_builder.finish(num_rows);
SerializableColumnIndex::Optional {
num_rows,
non_null_row_ids: Box::new(optional_index),
non_null_row_ids: Box::new(|| Box::new(non_null_rows.iter().copied())),
}
}
Cardinality::Multivalued => {
let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
consume_operation_iterator(op_iterator, multivalued_index_builder, values);
let multivalued_index = multivalued_index_builder.finish(num_rows);
SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
}
};
crate::column::serialize_column_mappable_to_u128(
@@ -603,17 +603,17 @@ fn sort_values_within_row_in_place(multivalued_index: &[RowId], values: &mut Vec
}
}
fn send_to_serialize_column_mappable_to_u64(
fn send_to_serialize_column_mappable_to_u64<'a>(
op_iterator: impl Iterator<Item = ColumnOperation<u64>>,
cardinality: Cardinality,
num_rows: RowId,
sort_values_within_row: bool,
value_index_builders: &mut PreallocatedIndexBuilders,
value_index_builders: &'a mut PreallocatedIndexBuilders,
values: &mut Vec<u64>,
mut wrt: impl io::Write,
) -> io::Result<()>
where
for<'a> VecColumn<'a, u64>: ColumnValues<u64>,
for<'b> VecColumn<'b, u64>: ColumnValues<u64>,
{
values.clear();
let serializable_column_index = match cardinality {
@@ -626,11 +626,11 @@ where
SerializableColumnIndex::Full
}
Cardinality::Optional => {
let optional_index_builder = value_index_builders.borrow_optional_index_builder();
let optional_index_builder: &'a mut OptionalIndexBuilder = value_index_builders.borrow_optional_index_builder();
consume_operation_iterator(op_iterator, optional_index_builder, values);
let optional_index = optional_index_builder.finish(num_rows);
let optional_index: &'a [u32] = optional_index_builder.finish(num_rows);
SerializableColumnIndex::Optional {
non_null_row_ids: Box::new(optional_index),
non_null_row_ids: Box::new(move || Box::new(optional_index.iter().copied())),
num_rows,
}
}
@@ -641,12 +641,12 @@ where
if sort_values_within_row {
sort_values_within_row_in_place(multivalued_index, values);
}
SerializableColumnIndex::Multivalued(Box::new(multivalued_index))
SerializableColumnIndex::Multivalued(Box::new(|| Box::new(multivalued_index.iter().copied())))
}
};
crate::column::serialize_column_mappable_to_u64(
serializable_column_index,
&&values[..],
&|| values.iter().copied(),
&mut wrt,
)?;
Ok(())

View File

@@ -1,4 +1,3 @@
use crate::iterable::Iterable;
use crate::RowId;
/// The `IndexBuilder` interprets a sequence of
@@ -29,7 +28,7 @@ pub struct OptionalIndexBuilder {
}
impl OptionalIndexBuilder {
pub fn finish<'a>(&'a mut self, num_rows: RowId) -> impl Iterable<RowId> + 'a {
pub fn finish<'a>(&'a mut self, num_rows: RowId) -> &'a [RowId] {
debug_assert!(self
.docs
.last()
@@ -123,20 +122,14 @@ mod tests {
opt_value_index_builder.record_row(0u32);
opt_value_index_builder.record_value();
assert_eq!(
&opt_value_index_builder
.finish(1u32)
.boxed_iter()
.collect::<Vec<u32>>(),
&opt_value_index_builder.finish(1u32),
&[0]
);
opt_value_index_builder.reset();
opt_value_index_builder.record_row(1u32);
opt_value_index_builder.record_value();
assert_eq!(
&opt_value_index_builder
.finish(2u32)
.boxed_iter()
.collect::<Vec<u32>>(),
&opt_value_index_builder.finish(2u32),
&[1]
);
}

View File

@@ -1,4 +1,3 @@
use std::iter::Map;
use std::marker::PhantomData;
use std::ops::Range;
@@ -36,15 +35,6 @@ where F: Fn() -> Box<dyn Iterator<Item = T>>
}
}
// impl<F, I, T> Iterable<T> for F
// where
// I: Iterator<Item = T>,
// F: Fn() -> I,
//{
// fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
// Box::new(self())
//}
pub fn map_iterable<U, V, F, I>(
original_iterable: impl Fn() -> I,
transform: F,