mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
update edition to 2024 (#2620)
* update common to edition 2024 * update bitpacker to edition 2024 * update stacker to edition 2024 * update query-grammar to edition 2024 * update sstable to edition 2024 + fmt * fmt * update columnar to edition 2024 * cargo fmt * use None instead of _
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use binggan::{black_box, InputGroup};
|
||||
use binggan::{InputGroup, black_box};
|
||||
use common::*;
|
||||
use tantivy_columnar::Column;
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ extern crate test;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::prelude::*;
|
||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
||||
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||
use tantivy_columnar::*;
|
||||
use test::{black_box, Bencher};
|
||||
use test::{Bencher, black_box};
|
||||
|
||||
struct Columns {
|
||||
pub optional: Column,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
pub mod common;
|
||||
|
||||
use binggan::BenchRunner;
|
||||
use common::{generate_columnar_with_name, Card};
|
||||
use common::{Card, generate_columnar_with_name};
|
||||
use tantivy_columnar::*;
|
||||
|
||||
const NUM_DOCS: u32 = 100_000;
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
use common::OwnedBytes;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::{random, Rng, SeedableRng};
|
||||
use rand::{Rng, SeedableRng, random};
|
||||
use tantivy_columnar::ColumnValues;
|
||||
use test::Bencher;
|
||||
extern crate test;
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::ops::RangeInclusive;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::prelude::*;
|
||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
||||
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||
use tantivy_columnar::*;
|
||||
use test::Bencher;
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
||||
&'a self,
|
||||
docs: &'a [u32],
|
||||
accessor: &Column<T>,
|
||||
) -> impl Iterator<Item = (DocId, T)> + 'a {
|
||||
) -> impl Iterator<Item = (DocId, T)> + 'a + use<'a, T> {
|
||||
if accessor.index.get_cardinality().is_full() {
|
||||
docs.iter().cloned().zip(self.val_cache.iter().cloned())
|
||||
} else {
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::{fmt, io};
|
||||
|
||||
use sstable::{Dictionary, VoidSSTable};
|
||||
|
||||
use crate::column::Column;
|
||||
use crate::RowId;
|
||||
use crate::column::Column;
|
||||
|
||||
/// Dictionary encoded column.
|
||||
///
|
||||
|
||||
@@ -9,13 +9,14 @@ use std::sync::Arc;
|
||||
use common::BinarySerializable;
|
||||
pub use dictionary_encoded::{BytesColumn, StrColumn};
|
||||
pub use serialize::{
|
||||
open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
|
||||
open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
|
||||
open_column_bytes, open_column_str, open_column_u64, open_column_u128,
|
||||
open_column_u128_as_compact_u64, serialize_column_mappable_to_u64,
|
||||
serialize_column_mappable_to_u128,
|
||||
};
|
||||
|
||||
use crate::column_index::{ColumnIndex, Set};
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||
use crate::column_values::{monotonic_map_column, ColumnValues};
|
||||
use crate::column_values::{ColumnValues, monotonic_map_column};
|
||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -6,10 +6,10 @@ use common::OwnedBytes;
|
||||
use sstable::Dictionary;
|
||||
|
||||
use crate::column::{BytesColumn, Column};
|
||||
use crate::column_index::{serialize_column_index, SerializableColumnIndex};
|
||||
use crate::column_index::{SerializableColumnIndex, serialize_column_index};
|
||||
use crate::column_values::{
|
||||
CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
|
||||
CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{StrColumn, Version};
|
||||
|
||||
@@ -99,9 +99,9 @@ mod tests {
|
||||
|
||||
use crate::column_index::merge::detect_cardinality;
|
||||
use crate::column_index::multivalued_index::{
|
||||
open_multivalued_index, serialize_multivalued_index, MultiValueIndex,
|
||||
MultiValueIndex, open_multivalued_index, serialize_multivalued_index,
|
||||
};
|
||||
use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
|
||||
use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index};
|
||||
use crate::{
|
||||
Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
|
||||
};
|
||||
|
||||
@@ -137,8 +137,8 @@ impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::column_index::OptionalIndex;
|
||||
use crate::RowAddr;
|
||||
use crate::column_index::OptionalIndex;
|
||||
|
||||
#[test]
|
||||
fn test_integrate_num_vals_empty() {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::column_index::SerializableColumnIndex;
|
||||
use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
|
||||
use crate::column_index::serialize::SerializableOptionalIndex;
|
||||
use crate::column_index::SerializableColumnIndex;
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ pub use merge::merge_column_index;
|
||||
pub(crate) use multivalued_index::SerializableMultivalueIndex;
|
||||
pub use optional_index::{OptionalIndex, Set};
|
||||
pub use serialize::{
|
||||
open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex,
|
||||
SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index,
|
||||
};
|
||||
|
||||
use crate::column_index::multivalued_index::MultiValueIndex;
|
||||
|
||||
@@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes};
|
||||
use super::optional_index::{open_optional_index, serialize_optional_index};
|
||||
use super::{OptionalIndex, SerializableOptionalIndex, Set};
|
||||
use crate::column_values::{
|
||||
load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues,
|
||||
CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{DocId, RowId, Version};
|
||||
|
||||
@@ -7,7 +7,7 @@ mod set_block;
|
||||
use common::{BinarySerializable, OwnedBytes, VInt};
|
||||
pub use set::{SelectCursor, Set, SetCodec};
|
||||
use set_block::{
|
||||
DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
|
||||
DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec,
|
||||
};
|
||||
|
||||
use crate::iterable::Iterable;
|
||||
@@ -259,11 +259,13 @@ impl Set<RowId> for OptionalIndex {
|
||||
|
||||
impl OptionalIndex {
|
||||
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
|
||||
assert!(row_ids
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_row_id| last_row_id < num_rows)
|
||||
.unwrap_or(true));
|
||||
assert!(
|
||||
row_ids
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_row_id| last_row_id < num_rows)
|
||||
.unwrap_or(true)
|
||||
);
|
||||
let mut buffer = Vec::new();
|
||||
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
|
||||
let bytes = OwnedBytes::new(buffer);
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::io::{self, Write};
|
||||
|
||||
use common::BinarySerializable;
|
||||
|
||||
use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK};
|
||||
use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec};
|
||||
|
||||
#[inline(always)]
|
||||
fn get_bit_at(input: u64, n: u16) -> bool {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mod dense;
|
||||
mod sparse;
|
||||
|
||||
pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES};
|
||||
pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};
|
||||
pub use sparse::{SparseBlock, SparseBlockCodec};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -254,11 +254,7 @@ mod bench {
|
||||
let mut current = start;
|
||||
std::iter::from_fn(move || {
|
||||
current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
|
||||
if current >= end {
|
||||
None
|
||||
} else {
|
||||
Some(current)
|
||||
}
|
||||
if current >= end { None } else { Some(current) }
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
||||
|
||||
use common::{CountingWriter, OwnedBytes};
|
||||
|
||||
use super::multivalued_index::SerializableMultivalueIndex;
|
||||
use super::OptionalIndex;
|
||||
use super::multivalued_index::SerializableMultivalueIndex;
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::column_index::multivalued_index::serialize_multivalued_index;
|
||||
use crate::column_index::optional_index::serialize_optional_index;
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{Cardinality, RowId, Version};
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::column_values::u64_based::*;
|
||||
fn get_data() -> Vec<u64> {
|
||||
let mut rng = StdRng::seed_from_u64(2u64);
|
||||
let mut data: Vec<_> = (100..55000_u64)
|
||||
.map(|num| num + rng.gen::<u8>() as u64)
|
||||
.map(|num| num + rng.r#gen::<u8>() as u64)
|
||||
.collect();
|
||||
data.push(99_000);
|
||||
data.insert(1000, 2000);
|
||||
|
||||
@@ -26,13 +26,13 @@ mod monotonic_column;
|
||||
|
||||
pub(crate) use merge::MergedColumnValues;
|
||||
pub use stats::ColumnStats;
|
||||
pub use u128_based::{
|
||||
open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
|
||||
CompactSpaceU64Accessor,
|
||||
};
|
||||
pub use u64_based::{
|
||||
load_u64_based_column_values, serialize_and_load_u64_based_column_values,
|
||||
serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
|
||||
ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
|
||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
};
|
||||
pub use u128_based::{
|
||||
CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
|
||||
serialize_column_values_u128,
|
||||
};
|
||||
pub use vec_column::VecColumn;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||
use crate::ColumnValues;
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||
|
||||
struct MonotonicMappingColumn<C, T, Input> {
|
||||
from_column: C,
|
||||
@@ -99,10 +99,10 @@ where
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::VecColumn;
|
||||
use crate::column_values::monotonic_mapping::{
|
||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
||||
};
|
||||
use crate::column_values::VecColumn;
|
||||
|
||||
#[test]
|
||||
fn test_monotonic_mapping_iter() {
|
||||
|
||||
@@ -24,8 +24,8 @@ use build_compact_space::get_compact_space;
|
||||
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker};
|
||||
|
||||
use crate::column_values::ColumnValues;
|
||||
use crate::RowId;
|
||||
use crate::column_values::ColumnValues;
|
||||
|
||||
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
|
||||
/// blanks depends on the number of blanks.
|
||||
@@ -653,12 +653,14 @@ mod tests {
|
||||
),
|
||||
&[3]
|
||||
);
|
||||
assert!(get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=99998u128,
|
||||
complete_range.clone()
|
||||
)
|
||||
.is_empty());
|
||||
assert!(
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=99998u128,
|
||||
complete_range.clone()
|
||||
)
|
||||
.is_empty()
|
||||
);
|
||||
assert_eq!(
|
||||
&get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
|
||||
@@ -130,11 +130,11 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::u64_based::{
|
||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
ALL_U64_CODEC_TYPES,
|
||||
};
|
||||
use crate::column_values::CodecType;
|
||||
use crate::column_values::u64_based::{
|
||||
ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
|
||||
serialize_u64_based_column_values,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_serialize_deserialize_u128_header() {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::ops::{Range, RangeInclusive};
|
||||
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use fastdivide::DividerU64;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::{ColumnValues, RowId};
|
||||
@@ -23,11 +23,7 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
|
||||
// copied from unstable rust standard library.
|
||||
let d = n / q.get();
|
||||
let r = n % q.get();
|
||||
if r > 0 {
|
||||
d + 1
|
||||
} else {
|
||||
d
|
||||
}
|
||||
if r > 0 { d + 1 } else { d }
|
||||
}
|
||||
|
||||
// The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
|
||||
|
||||
@@ -4,12 +4,12 @@ use std::{io, iter};
|
||||
|
||||
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
|
||||
use fastdivide::DividerU64;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use crate::MonotonicallyMappableToU64;
|
||||
use crate::column_values::u64_based::line::Line;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::column_values::{ColumnValues, VecColumn};
|
||||
use crate::MonotonicallyMappableToU64;
|
||||
|
||||
const BLOCK_SIZE: u32 = 512u32;
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use std::io;
|
||||
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use super::line::Line;
|
||||
use super::ColumnValues;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::column_values::VecColumn;
|
||||
use super::line::Line;
|
||||
use crate::RowId;
|
||||
use crate::column_values::VecColumn;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
|
||||
const HALF_SPACE: u64 = u64::MAX / 2;
|
||||
const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
|
||||
|
||||
@@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec;
|
||||
pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
|
||||
pub use crate::column_values::u64_based::linear::LinearCodec;
|
||||
pub use crate::column_values::u64_based::stats_collector::StatsCollector;
|
||||
use crate::column_values::{monotonic_map_column, ColumnStats};
|
||||
use crate::column_values::{ColumnStats, monotonic_map_column};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{ColumnValues, MonotonicallyMappableToU64};
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::num::NonZeroU64;
|
||||
|
||||
use fastdivide::DividerU64;
|
||||
|
||||
use crate::column_values::ColumnStats;
|
||||
use crate::RowId;
|
||||
use crate::column_values::ColumnStats;
|
||||
|
||||
/// Compute the gcd of two non null numbers.
|
||||
///
|
||||
@@ -96,8 +96,8 @@ impl StatsCollector {
|
||||
mod tests {
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
|
||||
use crate::column_values::u64_based::ColumnStats;
|
||||
use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd};
|
||||
|
||||
fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
|
||||
let mut stats_collector = StatsCollector::default();
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::net::Ipv6Addr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::value::NumericalType;
|
||||
use crate::InvalidData;
|
||||
use crate::value::NumericalType;
|
||||
|
||||
/// The column type represents the column type.
|
||||
/// Any changes need to be propagated to `COLUMN_TYPES`.
|
||||
|
||||
@@ -10,11 +10,11 @@ use std::sync::Arc;
|
||||
pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||
|
||||
use super::writer::ColumnarSerializer;
|
||||
use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64};
|
||||
use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128};
|
||||
use crate::column_values::MergedColumnValues;
|
||||
use crate::columnar::ColumnarReader;
|
||||
use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
|
||||
use crate::columnar::writer::CompatibleNumericalTypes;
|
||||
use crate::columnar::ColumnarReader;
|
||||
use crate::dynamic_column::DynamicColumn;
|
||||
use crate::{
|
||||
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
|
||||
@@ -144,16 +144,17 @@ fn merge_column(
|
||||
let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
|
||||
Vec::with_capacity(columns_to_merge.len());
|
||||
for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
|
||||
if let Some(Column { index: idx, values }) =
|
||||
dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
|
||||
{
|
||||
column_indexes.push(idx);
|
||||
column_values.push(Some(values));
|
||||
} else {
|
||||
column_indexes.push(ColumnIndex::Empty {
|
||||
num_docs: num_docs_per_column[i],
|
||||
});
|
||||
column_values.push(None);
|
||||
match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) {
|
||||
Some(Column { index: idx, values }) => {
|
||||
column_indexes.push(idx);
|
||||
column_values.push(Some(values));
|
||||
}
|
||||
None => {
|
||||
column_indexes.push(ColumnIndex::Empty {
|
||||
num_docs: num_docs_per_column[i],
|
||||
});
|
||||
column_values.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
let merged_column_index =
|
||||
@@ -253,11 +254,13 @@ impl GroupedColumns {
|
||||
}
|
||||
// At the moment, only the numerical column type category has more than one possible
|
||||
// column type.
|
||||
assert!(self
|
||||
.columns
|
||||
.iter()
|
||||
.flatten()
|
||||
.all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical));
|
||||
assert!(
|
||||
self.columns
|
||||
.iter()
|
||||
.flatten()
|
||||
.all(|el| ColumnTypeCategory::from(el.column_type())
|
||||
== ColumnTypeCategory::Numerical)
|
||||
);
|
||||
merged_numerical_columns_type(self.columns.iter().flatten()).into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,18 +74,19 @@ impl<'a> TermMerger<'a> {
|
||||
/// False if there is none.
|
||||
pub fn advance(&mut self) -> bool {
|
||||
self.advance_segments();
|
||||
if let Some(head) = self.heap.pop() {
|
||||
self.term_streams_with_segment.push(head);
|
||||
while let Some(next_streamer) = self.heap.peek() {
|
||||
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
|
||||
break;
|
||||
match self.heap.pop() {
|
||||
Some(head) => {
|
||||
self.term_streams_with_segment.push(head);
|
||||
while let Some(next_streamer) = self.heap.peek() {
|
||||
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
|
||||
break;
|
||||
}
|
||||
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
||||
self.term_streams_with_segment.push(next_heap_it);
|
||||
}
|
||||
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
|
||||
self.term_streams_with_segment.push(next_heap_it);
|
||||
true
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ use proptest::collection::vec;
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
use crate::columnar::{merge_columnar, ColumnarReader, MergeRowOrder, StackMergeOrder};
|
||||
use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar};
|
||||
use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
|
||||
|
||||
fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
|
||||
|
||||
@@ -5,9 +5,9 @@ mod reader;
|
||||
mod writer;
|
||||
|
||||
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
||||
pub use format_version::{Version, CURRENT_VERSION};
|
||||
pub use format_version::{CURRENT_VERSION, Version};
|
||||
#[cfg(test)]
|
||||
pub(crate) use merge::ColumnTypeCategory;
|
||||
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||
pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
|
||||
pub use reader::ColumnarReader;
|
||||
pub use writer::ColumnarWriter;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::{fmt, io, mem};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use common::file_slice::FileSlice;
|
||||
use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
|
||||
use common::BinarySerializable;
|
||||
use sstable::{Dictionary, RangeSSTable};
|
||||
|
||||
use crate::columnar::{format_version, ColumnType};
|
||||
use crate::columnar::{ColumnType, format_version};
|
||||
use crate::dynamic_column::DynamicColumnHandle;
|
||||
use crate::{RowId, Version};
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ impl ColumnWriter {
|
||||
&self,
|
||||
arena: &MemoryArena,
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
|
||||
buffer.clear();
|
||||
self.values.read_to_end(arena, buffer);
|
||||
let mut cursor: &[u8] = &buffer[..];
|
||||
@@ -104,9 +104,10 @@ pub(crate) struct NumericalColumnWriter {
|
||||
|
||||
impl NumericalColumnWriter {
|
||||
pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
|
||||
assert!(self
|
||||
.compatible_numerical_types
|
||||
.is_type_accepted(numerical_type));
|
||||
assert!(
|
||||
self.compatible_numerical_types
|
||||
.is_type_accepted(numerical_type)
|
||||
);
|
||||
self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
|
||||
}
|
||||
}
|
||||
@@ -211,7 +212,7 @@ impl NumericalColumnWriter {
|
||||
self,
|
||||
arena: &MemoryArena,
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
|
||||
self.column_writer.operation_iterator(arena, buffer)
|
||||
}
|
||||
}
|
||||
@@ -255,7 +256,7 @@ impl StrOrBytesColumnWriter {
|
||||
&self,
|
||||
arena: &MemoryArena,
|
||||
byte_buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
|
||||
self.column_writer.operation_iterator(arena, byte_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ use std::net::Ipv6Addr;
|
||||
|
||||
use column_operation::ColumnOperation;
|
||||
pub(crate) use column_writers::CompatibleNumericalTypes;
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::CountingWriter;
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
pub(crate) use serializer::ColumnarSerializer;
|
||||
use stacker::{Addr, ArenaHashMap, MemoryArena};
|
||||
|
||||
use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
|
||||
use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128};
|
||||
use crate::columnar::column_type::ColumnType;
|
||||
use crate::columnar::writer::column_writers::{
|
||||
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
||||
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::{BinarySerializable, CountingWriter};
|
||||
use sstable::value::RangeValueWriter;
|
||||
use sstable::RangeSSTable;
|
||||
use sstable::value::RangeValueWriter;
|
||||
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::RowId;
|
||||
use crate::columnar::ColumnType;
|
||||
|
||||
pub struct ColumnarSerializer<W: io::Write> {
|
||||
wrt: CountingWriter<W>,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::RowId;
|
||||
use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::RowId;
|
||||
|
||||
/// The `IndexBuilder` interprets a sequence of
|
||||
/// calls of the form:
|
||||
@@ -31,12 +31,13 @@ pub struct OptionalIndexBuilder {
|
||||
|
||||
impl OptionalIndexBuilder {
|
||||
pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
|
||||
debug_assert!(self
|
||||
.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_doc| last_doc < num_rows)
|
||||
.unwrap_or(true));
|
||||
debug_assert!(
|
||||
self.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_doc| last_doc < num_rows)
|
||||
.unwrap_or(true)
|
||||
);
|
||||
&self.docs[..]
|
||||
}
|
||||
|
||||
@@ -48,12 +49,13 @@ impl OptionalIndexBuilder {
|
||||
impl IndexBuilder for OptionalIndexBuilder {
|
||||
#[inline(always)]
|
||||
fn record_row(&mut self, doc: RowId) {
|
||||
debug_assert!(self
|
||||
.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|prev_doc| doc > prev_doc)
|
||||
.unwrap_or(true));
|
||||
debug_assert!(
|
||||
self.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|prev_doc| doc > prev_doc)
|
||||
.unwrap_or(true)
|
||||
);
|
||||
self.docs.push(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@ use std::path::PathBuf;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::{
|
||||
merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
||||
CURRENT_VERSION,
|
||||
CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
||||
merge_columnar,
|
||||
};
|
||||
|
||||
const NUM_DOCS: u32 = u16::MAX as u32;
|
||||
|
||||
@@ -6,7 +6,7 @@ use common::file_slice::FileSlice;
|
||||
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
||||
|
||||
use crate::column::{BytesColumn, Column, StrColumn};
|
||||
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
|
||||
use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};
|
||||
|
||||
|
||||
@@ -44,11 +44,11 @@ pub use block_accessor::ColumnBlockAccessor;
|
||||
pub use column::{BytesColumn, Column, StrColumn};
|
||||
pub use column_index::ColumnIndex;
|
||||
pub use column_values::{
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||
};
|
||||
pub use columnar::{
|
||||
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
|
||||
CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
|
||||
};
|
||||
use sstable::VoidSSTable;
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
|
||||
@@ -716,8 +716,8 @@ fn test_columnar_merging_number_columns() {
|
||||
// TODO document edge case: required_columns incompatible with values.
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn columnar_docs_and_remap(
|
||||
) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
||||
fn columnar_docs_and_remap()
|
||||
-> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
||||
proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
|
||||
|columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
|
||||
let row_addrs: Vec<RowAddr> = columnars_docs
|
||||
|
||||
Reference in New Issue
Block a user