update edition to 2024 (#2620)

* update common to edition 2024

* update bitpacker to edition 2024

* update stacker to edition 2024

* update query-grammar to edition 2024

* update sstable to edition 2024 + fmt

* fmt

* update columnar to edition 2024

* cargo fmt

* use None instead of _
This commit is contained in:
PSeitz
2025-04-18 04:56:31 +02:00
committed by GitHub
parent 3fa90e70e2
commit 5379c99ea2
75 changed files with 239 additions and 231 deletions

View File

@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
readme = "README.md" readme = "README.md"
keywords = ["search", "information", "retrieval"] keywords = ["search", "information", "retrieval"]
edition = "2021" edition = "2021"
rust-version = "1.75" rust-version = "1.85"
exclude = ["benches/*.json", "benches/*.txt"] exclude = ["benches/*.json", "benches/*.txt"]
[dependencies] [dependencies]

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "tantivy-bitpacker" name = "tantivy-bitpacker"
version = "0.6.0" version = "0.6.0"
edition = "2021" edition = "2024"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT" license = "MIT"
categories = [] categories = []

View File

@@ -1,6 +1,6 @@
use super::bitpacker::BitPacker; use super::bitpacker::BitPacker;
use super::compute_num_bits; use super::compute_num_bits;
use crate::{minmax, BitUnpacker}; use crate::{BitUnpacker, minmax};
const BLOCK_SIZE: usize = 128; const BLOCK_SIZE: usize = 128;

View File

@@ -33,11 +33,7 @@ pub use crate::blocked_bitpacker::BlockedBitpacker;
/// number of bits. /// number of bits.
pub fn compute_num_bits(n: u64) -> u8 { pub fn compute_num_bits(n: u64) -> u8 {
let amplitude = (64u32 - n.leading_zeros()) as u8; let amplitude = (64u32 - n.leading_zeros()) as u8;
if amplitude <= 64 - 8 { if amplitude <= 64 - 8 { amplitude } else { 64 }
amplitude
} else {
64
}
} }
/// Computes the (min, max) of an iterator of `PartialOrd` values. /// Computes the (min, max) of an iterator of `PartialOrd` values.

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "tantivy-columnar" name = "tantivy-columnar"
version = "0.3.0" version = "0.3.0"
edition = "2021" edition = "2024"
license = "MIT" license = "MIT"
homepage = "https://github.com/quickwit-oss/tantivy" homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy"

View File

@@ -1,4 +1,4 @@
use binggan::{black_box, InputGroup}; use binggan::{InputGroup, black_box};
use common::*; use common::*;
use tantivy_columnar::Column; use tantivy_columnar::Column;

View File

@@ -4,9 +4,9 @@ extern crate test;
use std::sync::Arc; use std::sync::Arc;
use rand::prelude::*; use rand::prelude::*;
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType}; use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
use tantivy_columnar::*; use tantivy_columnar::*;
use test::{black_box, Bencher}; use test::{Bencher, black_box};
struct Columns { struct Columns {
pub optional: Column, pub optional: Column,

View File

@@ -1,7 +1,7 @@
pub mod common; pub mod common;
use binggan::BenchRunner; use binggan::BenchRunner;
use common::{generate_columnar_with_name, Card}; use common::{Card, generate_columnar_with_name};
use tantivy_columnar::*; use tantivy_columnar::*;
const NUM_DOCS: u32 = 100_000; const NUM_DOCS: u32 = 100_000;

View File

@@ -6,7 +6,7 @@ use std::sync::Arc;
use common::OwnedBytes; use common::OwnedBytes;
use rand::rngs::StdRng; use rand::rngs::StdRng;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rand::{random, Rng, SeedableRng}; use rand::{Rng, SeedableRng, random};
use tantivy_columnar::ColumnValues; use tantivy_columnar::ColumnValues;
use test::Bencher; use test::Bencher;
extern crate test; extern crate test;

View File

@@ -5,7 +5,7 @@ use std::ops::RangeInclusive;
use std::sync::Arc; use std::sync::Arc;
use rand::prelude::*; use rand::prelude::*;
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType}; use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
use tantivy_columnar::*; use tantivy_columnar::*;
use test::Bencher; use test::Bencher;

View File

@@ -66,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
&'a self, &'a self,
docs: &'a [u32], docs: &'a [u32],
accessor: &Column<T>, accessor: &Column<T>,
) -> impl Iterator<Item = (DocId, T)> + 'a { ) -> impl Iterator<Item = (DocId, T)> + 'a + use<'a, T> {
if accessor.index.get_cardinality().is_full() { if accessor.index.get_cardinality().is_full() {
docs.iter().cloned().zip(self.val_cache.iter().cloned()) docs.iter().cloned().zip(self.val_cache.iter().cloned())
} else { } else {

View File

@@ -4,8 +4,8 @@ use std::{fmt, io};
use sstable::{Dictionary, VoidSSTable}; use sstable::{Dictionary, VoidSSTable};
use crate::column::Column;
use crate::RowId; use crate::RowId;
use crate::column::Column;
/// Dictionary encoded column. /// Dictionary encoded column.
/// ///

View File

@@ -9,13 +9,14 @@ use std::sync::Arc;
use common::BinarySerializable; use common::BinarySerializable;
pub use dictionary_encoded::{BytesColumn, StrColumn}; pub use dictionary_encoded::{BytesColumn, StrColumn};
pub use serialize::{ pub use serialize::{
open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64, open_column_bytes, open_column_str, open_column_u64, open_column_u128,
open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64, open_column_u128_as_compact_u64, serialize_column_mappable_to_u64,
serialize_column_mappable_to_u128,
}; };
use crate::column_index::{ColumnIndex, Set}; use crate::column_index::{ColumnIndex, Set};
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal; use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
use crate::column_values::{monotonic_map_column, ColumnValues}; use crate::column_values::{ColumnValues, monotonic_map_column};
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId}; use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
#[derive(Clone)] #[derive(Clone)]

View File

@@ -6,10 +6,10 @@ use common::OwnedBytes;
use sstable::Dictionary; use sstable::Dictionary;
use crate::column::{BytesColumn, Column}; use crate::column::{BytesColumn, Column};
use crate::column_index::{serialize_column_index, SerializableColumnIndex}; use crate::column_index::{SerializableColumnIndex, serialize_column_index};
use crate::column_values::{ use crate::column_values::{
CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values, load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
}; };
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::{StrColumn, Version}; use crate::{StrColumn, Version};

View File

@@ -99,9 +99,9 @@ mod tests {
use crate::column_index::merge::detect_cardinality; use crate::column_index::merge::detect_cardinality;
use crate::column_index::multivalued_index::{ use crate::column_index::multivalued_index::{
open_multivalued_index, serialize_multivalued_index, MultiValueIndex, MultiValueIndex, open_multivalued_index, serialize_multivalued_index,
}; };
use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex}; use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index};
use crate::{ use crate::{
Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder, Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
}; };

View File

@@ -137,8 +137,8 @@ impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::column_index::OptionalIndex;
use crate::RowAddr; use crate::RowAddr;
use crate::column_index::OptionalIndex;
#[test] #[test]
fn test_integrate_num_vals_empty() { fn test_integrate_num_vals_empty() {

View File

@@ -1,8 +1,8 @@
use std::ops::Range; use std::ops::Range;
use crate::column_index::SerializableColumnIndex;
use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex}; use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
use crate::column_index::serialize::SerializableOptionalIndex; use crate::column_index::serialize::SerializableOptionalIndex;
use crate::column_index::SerializableColumnIndex;
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder}; use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};

View File

@@ -14,7 +14,7 @@ pub use merge::merge_column_index;
pub(crate) use multivalued_index::SerializableMultivalueIndex; pub(crate) use multivalued_index::SerializableMultivalueIndex;
pub use optional_index::{OptionalIndex, Set}; pub use optional_index::{OptionalIndex, Set};
pub use serialize::{ pub use serialize::{
open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex, SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index,
}; };
use crate::column_index::multivalued_index::MultiValueIndex; use crate::column_index::multivalued_index::MultiValueIndex;

View File

@@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes};
use super::optional_index::{open_optional_index, serialize_optional_index}; use super::optional_index::{open_optional_index, serialize_optional_index};
use super::{OptionalIndex, SerializableOptionalIndex, Set}; use super::{OptionalIndex, SerializableOptionalIndex, Set};
use crate::column_values::{ use crate::column_values::{
load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues, CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values,
}; };
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::{DocId, RowId, Version}; use crate::{DocId, RowId, Version};

View File

@@ -7,7 +7,7 @@ mod set_block;
use common::{BinarySerializable, OwnedBytes, VInt}; use common::{BinarySerializable, OwnedBytes, VInt};
pub use set::{SelectCursor, Set, SetCodec}; pub use set::{SelectCursor, Set, SetCodec};
use set_block::{ use set_block::{
DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES, DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec,
}; };
use crate::iterable::Iterable; use crate::iterable::Iterable;
@@ -259,11 +259,13 @@ impl Set<RowId> for OptionalIndex {
impl OptionalIndex { impl OptionalIndex {
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex { pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
assert!(row_ids assert!(
.last() row_ids
.copied() .last()
.map(|last_row_id| last_row_id < num_rows) .copied()
.unwrap_or(true)); .map(|last_row_id| last_row_id < num_rows)
.unwrap_or(true)
);
let mut buffer = Vec::new(); let mut buffer = Vec::new();
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap(); serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
let bytes = OwnedBytes::new(buffer); let bytes = OwnedBytes::new(buffer);

View File

@@ -2,7 +2,7 @@ use std::io::{self, Write};
use common::BinarySerializable; use common::BinarySerializable;
use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK}; use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec};
#[inline(always)] #[inline(always)]
fn get_bit_at(input: u64, n: u16) -> bool { fn get_bit_at(input: u64, n: u16) -> bool {

View File

@@ -1,7 +1,7 @@
mod dense; mod dense;
mod sparse; mod sparse;
pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES}; pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};
pub use sparse::{SparseBlock, SparseBlockCodec}; pub use sparse::{SparseBlock, SparseBlockCodec};
#[cfg(test)] #[cfg(test)]

View File

@@ -254,11 +254,7 @@ mod bench {
let mut current = start; let mut current = start;
std::iter::from_fn(move || { std::iter::from_fn(move || {
current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation); current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
if current >= end { if current >= end { None } else { Some(current) }
None
} else {
Some(current)
}
}) })
} }

View File

@@ -3,11 +3,11 @@ use std::io::Write;
use common::{CountingWriter, OwnedBytes}; use common::{CountingWriter, OwnedBytes};
use super::multivalued_index::SerializableMultivalueIndex;
use super::OptionalIndex; use super::OptionalIndex;
use super::multivalued_index::SerializableMultivalueIndex;
use crate::column_index::ColumnIndex;
use crate::column_index::multivalued_index::serialize_multivalued_index; use crate::column_index::multivalued_index::serialize_multivalued_index;
use crate::column_index::optional_index::serialize_optional_index; use crate::column_index::optional_index::serialize_optional_index;
use crate::column_index::ColumnIndex;
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::{Cardinality, RowId, Version}; use crate::{Cardinality, RowId, Version};

View File

@@ -11,7 +11,7 @@ use crate::column_values::u64_based::*;
fn get_data() -> Vec<u64> { fn get_data() -> Vec<u64> {
let mut rng = StdRng::seed_from_u64(2u64); let mut rng = StdRng::seed_from_u64(2u64);
let mut data: Vec<_> = (100..55000_u64) let mut data: Vec<_> = (100..55000_u64)
.map(|num| num + rng.gen::<u8>() as u64) .map(|num| num + rng.r#gen::<u8>() as u64)
.collect(); .collect();
data.push(99_000); data.push(99_000);
data.insert(1000, 2000); data.insert(1000, 2000);

View File

@@ -26,13 +26,13 @@ mod monotonic_column;
pub(crate) use merge::MergedColumnValues; pub(crate) use merge::MergedColumnValues;
pub use stats::ColumnStats; pub use stats::ColumnStats;
pub use u128_based::{
open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
CompactSpaceU64Accessor,
};
pub use u64_based::{ pub use u64_based::{
load_u64_based_column_values, serialize_and_load_u64_based_column_values, ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
};
pub use u128_based::{
CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
serialize_column_values_u128,
}; };
pub use vec_column::VecColumn; pub use vec_column::VecColumn;

View File

@@ -2,8 +2,8 @@ use std::fmt::Debug;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ops::{Range, RangeInclusive}; use std::ops::{Range, RangeInclusive};
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
use crate::ColumnValues; use crate::ColumnValues;
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
struct MonotonicMappingColumn<C, T, Input> { struct MonotonicMappingColumn<C, T, Input> {
from_column: C, from_column: C,
@@ -99,10 +99,10 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::column_values::VecColumn;
use crate::column_values::monotonic_mapping::{ use crate::column_values::monotonic_mapping::{
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal, StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
}; };
use crate::column_values::VecColumn;
#[test] #[test]
fn test_monotonic_mapping_iter() { fn test_monotonic_mapping_iter() {

View File

@@ -24,8 +24,8 @@ use build_compact_space::get_compact_space;
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128}; use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
use tantivy_bitpacker::{BitPacker, BitUnpacker}; use tantivy_bitpacker::{BitPacker, BitUnpacker};
use crate::column_values::ColumnValues;
use crate::RowId; use crate::RowId;
use crate::column_values::ColumnValues;
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of /// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
/// blanks depends on the number of blanks. /// blanks depends on the number of blanks.
@@ -653,12 +653,14 @@ mod tests {
), ),
&[3] &[3]
); );
assert!(get_positions_for_value_range_helper( assert!(
&decomp, get_positions_for_value_range_helper(
99998u128..=99998u128, &decomp,
complete_range.clone() 99998u128..=99998u128,
) complete_range.clone()
.is_empty()); )
.is_empty()
);
assert_eq!( assert_eq!(
&get_positions_for_value_range_helper( &get_positions_for_value_range_helper(
&decomp, &decomp,

View File

@@ -130,11 +130,11 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
#[cfg(test)] #[cfg(test)]
pub(crate) mod tests { pub(crate) mod tests {
use super::*; use super::*;
use crate::column_values::u64_based::{
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
ALL_U64_CODEC_TYPES,
};
use crate::column_values::CodecType; use crate::column_values::CodecType;
use crate::column_values::u64_based::{
ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
serialize_u64_based_column_values,
};
#[test] #[test]
fn test_serialize_deserialize_u128_header() { fn test_serialize_deserialize_u128_header() {

View File

@@ -4,7 +4,7 @@ use std::ops::{Range, RangeInclusive};
use common::{BinarySerializable, OwnedBytes}; use common::{BinarySerializable, OwnedBytes};
use fastdivide::DividerU64; use fastdivide::DividerU64;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
use crate::{ColumnValues, RowId}; use crate::{ColumnValues, RowId};
@@ -23,11 +23,7 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
// copied from unstable rust standard library. // copied from unstable rust standard library.
let d = n / q.get(); let d = n / q.get();
let r = n % q.get(); let r = n % q.get();
if r > 0 { if r > 0 { d + 1 } else { d }
d + 1
} else {
d
}
} }
// The bitpacked codec applies a linear transformation `f` over data that are bitpacked. // The bitpacked codec applies a linear transformation `f` over data that are bitpacked.

View File

@@ -4,12 +4,12 @@ use std::{io, iter};
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes}; use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
use fastdivide::DividerU64; use fastdivide::DividerU64;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
use crate::MonotonicallyMappableToU64;
use crate::column_values::u64_based::line::Line; use crate::column_values::u64_based::line::Line;
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
use crate::column_values::{ColumnValues, VecColumn}; use crate::column_values::{ColumnValues, VecColumn};
use crate::MonotonicallyMappableToU64;
const BLOCK_SIZE: u32 = 512u32; const BLOCK_SIZE: u32 = 512u32;

View File

@@ -1,13 +1,13 @@
use std::io; use std::io;
use common::{BinarySerializable, OwnedBytes}; use common::{BinarySerializable, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
use super::line::Line;
use super::ColumnValues; use super::ColumnValues;
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; use super::line::Line;
use crate::column_values::VecColumn;
use crate::RowId; use crate::RowId;
use crate::column_values::VecColumn;
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
const HALF_SPACE: u64 = u64::MAX / 2; const HALF_SPACE: u64 = u64::MAX / 2;
const LINE_ESTIMATION_BLOCK_LEN: usize = 512; const LINE_ESTIMATION_BLOCK_LEN: usize = 512;

View File

@@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec;
pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec; pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
pub use crate::column_values::u64_based::linear::LinearCodec; pub use crate::column_values::u64_based::linear::LinearCodec;
pub use crate::column_values::u64_based::stats_collector::StatsCollector; pub use crate::column_values::u64_based::stats_collector::StatsCollector;
use crate::column_values::{monotonic_map_column, ColumnStats}; use crate::column_values::{ColumnStats, monotonic_map_column};
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::{ColumnValues, MonotonicallyMappableToU64}; use crate::{ColumnValues, MonotonicallyMappableToU64};

View File

@@ -2,8 +2,8 @@ use std::num::NonZeroU64;
use fastdivide::DividerU64; use fastdivide::DividerU64;
use crate::column_values::ColumnStats;
use crate::RowId; use crate::RowId;
use crate::column_values::ColumnStats;
/// Compute the gcd of two non null numbers. /// Compute the gcd of two non null numbers.
/// ///
@@ -96,8 +96,8 @@ impl StatsCollector {
mod tests { mod tests {
use std::num::NonZeroU64; use std::num::NonZeroU64;
use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
use crate::column_values::u64_based::ColumnStats; use crate::column_values::u64_based::ColumnStats;
use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd};
fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats { fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
let mut stats_collector = StatsCollector::default(); let mut stats_collector = StatsCollector::default();

View File

@@ -4,8 +4,8 @@ use std::net::Ipv6Addr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::value::NumericalType;
use crate::InvalidData; use crate::InvalidData;
use crate::value::NumericalType;
/// The column type represents the column type. /// The column type represents the column type.
/// Any changes need to be propagated to `COLUMN_TYPES`. /// Any changes need to be propagated to `COLUMN_TYPES`.

View File

@@ -10,11 +10,11 @@ use std::sync::Arc;
pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder}; pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
use super::writer::ColumnarSerializer; use super::writer::ColumnarSerializer;
use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64}; use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128};
use crate::column_values::MergedColumnValues; use crate::column_values::MergedColumnValues;
use crate::columnar::ColumnarReader;
use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column; use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
use crate::columnar::writer::CompatibleNumericalTypes; use crate::columnar::writer::CompatibleNumericalTypes;
use crate::columnar::ColumnarReader;
use crate::dynamic_column::DynamicColumn; use crate::dynamic_column::DynamicColumn;
use crate::{ use crate::{
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType, BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
@@ -144,16 +144,17 @@ fn merge_column(
let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> = let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
Vec::with_capacity(columns_to_merge.len()); Vec::with_capacity(columns_to_merge.len());
for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() { for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
if let Some(Column { index: idx, values }) = match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) {
dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) Some(Column { index: idx, values }) => {
{ column_indexes.push(idx);
column_indexes.push(idx); column_values.push(Some(values));
column_values.push(Some(values)); }
} else { None => {
column_indexes.push(ColumnIndex::Empty { column_indexes.push(ColumnIndex::Empty {
num_docs: num_docs_per_column[i], num_docs: num_docs_per_column[i],
}); });
column_values.push(None); column_values.push(None);
}
} }
} }
let merged_column_index = let merged_column_index =
@@ -253,11 +254,13 @@ impl GroupedColumns {
} }
// At the moment, only the numerical column type category has more than one possible // At the moment, only the numerical column type category has more than one possible
// column type. // column type.
assert!(self assert!(
.columns self.columns
.iter() .iter()
.flatten() .flatten()
.all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical)); .all(|el| ColumnTypeCategory::from(el.column_type())
== ColumnTypeCategory::Numerical)
);
merged_numerical_columns_type(self.columns.iter().flatten()).into() merged_numerical_columns_type(self.columns.iter().flatten()).into()
} }
} }

View File

@@ -74,18 +74,19 @@ impl<'a> TermMerger<'a> {
/// False if there is none. /// False if there is none.
pub fn advance(&mut self) -> bool { pub fn advance(&mut self) -> bool {
self.advance_segments(); self.advance_segments();
if let Some(head) = self.heap.pop() { match self.heap.pop() {
self.term_streams_with_segment.push(head); Some(head) => {
while let Some(next_streamer) = self.heap.peek() { self.term_streams_with_segment.push(head);
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() { while let Some(next_streamer) = self.heap.peek() {
break; if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
break;
}
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
self.term_streams_with_segment.push(next_heap_it);
} }
let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand true
self.term_streams_with_segment.push(next_heap_it);
} }
true _ => false,
} else {
false
} }
} }

View File

@@ -3,7 +3,7 @@ use proptest::collection::vec;
use proptest::prelude::*; use proptest::prelude::*;
use super::*; use super::*;
use crate::columnar::{merge_columnar, ColumnarReader, MergeRowOrder, StackMergeOrder}; use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar};
use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId}; use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>( fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(

View File

@@ -5,9 +5,9 @@ mod reader;
mod writer; mod writer;
pub use column_type::{ColumnType, HasAssociatedColumnType}; pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use format_version::{Version, CURRENT_VERSION}; pub use format_version::{CURRENT_VERSION, Version};
#[cfg(test)] #[cfg(test)]
pub(crate) use merge::ColumnTypeCategory; pub(crate) use merge::ColumnTypeCategory;
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder}; pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
pub use reader::ColumnarReader; pub use reader::ColumnarReader;
pub use writer::ColumnarWriter; pub use writer::ColumnarWriter;

View File

@@ -1,11 +1,11 @@
use std::{fmt, io, mem}; use std::{fmt, io, mem};
use common::BinarySerializable;
use common::file_slice::FileSlice; use common::file_slice::FileSlice;
use common::json_path_writer::JSON_PATH_SEGMENT_SEP; use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
use common::BinarySerializable;
use sstable::{Dictionary, RangeSSTable}; use sstable::{Dictionary, RangeSSTable};
use crate::columnar::{format_version, ColumnType}; use crate::columnar::{ColumnType, format_version};
use crate::dynamic_column::DynamicColumnHandle; use crate::dynamic_column::DynamicColumnHandle;
use crate::{RowId, Version}; use crate::{RowId, Version};

View File

@@ -42,7 +42,7 @@ impl ColumnWriter {
&self, &self,
arena: &MemoryArena, arena: &MemoryArena,
buffer: &'a mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> impl Iterator<Item = ColumnOperation<V>> + 'a { ) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
buffer.clear(); buffer.clear();
self.values.read_to_end(arena, buffer); self.values.read_to_end(arena, buffer);
let mut cursor: &[u8] = &buffer[..]; let mut cursor: &[u8] = &buffer[..];
@@ -104,9 +104,10 @@ pub(crate) struct NumericalColumnWriter {
impl NumericalColumnWriter { impl NumericalColumnWriter {
pub fn force_numerical_type(&mut self, numerical_type: NumericalType) { pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
assert!(self assert!(
.compatible_numerical_types self.compatible_numerical_types
.is_type_accepted(numerical_type)); .is_type_accepted(numerical_type)
);
self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type); self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
} }
} }
@@ -211,7 +212,7 @@ impl NumericalColumnWriter {
self, self,
arena: &MemoryArena, arena: &MemoryArena,
buffer: &'a mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a { ) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
self.column_writer.operation_iterator(arena, buffer) self.column_writer.operation_iterator(arena, buffer)
} }
} }
@@ -255,7 +256,7 @@ impl StrOrBytesColumnWriter {
&self, &self,
arena: &MemoryArena, arena: &MemoryArena,
byte_buffer: &'a mut Vec<u8>, byte_buffer: &'a mut Vec<u8>,
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a { ) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
self.column_writer.operation_iterator(arena, byte_buffer) self.column_writer.operation_iterator(arena, byte_buffer)
} }
} }

View File

@@ -8,13 +8,13 @@ use std::net::Ipv6Addr;
use column_operation::ColumnOperation; use column_operation::ColumnOperation;
pub(crate) use column_writers::CompatibleNumericalTypes; pub(crate) use column_writers::CompatibleNumericalTypes;
use common::json_path_writer::JSON_END_OF_PATH;
use common::CountingWriter; use common::CountingWriter;
use common::json_path_writer::JSON_END_OF_PATH;
pub(crate) use serializer::ColumnarSerializer; pub(crate) use serializer::ColumnarSerializer;
use stacker::{Addr, ArenaHashMap, MemoryArena}; use stacker::{Addr, ArenaHashMap, MemoryArena};
use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex}; use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64}; use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128};
use crate::columnar::column_type::ColumnType; use crate::columnar::column_type::ColumnType;
use crate::columnar::writer::column_writers::{ use crate::columnar::writer::column_writers::{
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter, ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,

View File

@@ -3,11 +3,11 @@ use std::io::Write;
use common::json_path_writer::JSON_END_OF_PATH; use common::json_path_writer::JSON_END_OF_PATH;
use common::{BinarySerializable, CountingWriter}; use common::{BinarySerializable, CountingWriter};
use sstable::value::RangeValueWriter;
use sstable::RangeSSTable; use sstable::RangeSSTable;
use sstable::value::RangeValueWriter;
use crate::columnar::ColumnType;
use crate::RowId; use crate::RowId;
use crate::columnar::ColumnType;
pub struct ColumnarSerializer<W: io::Write> { pub struct ColumnarSerializer<W: io::Write> {
wrt: CountingWriter<W>, wrt: CountingWriter<W>,

View File

@@ -1,6 +1,6 @@
use crate::RowId;
use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex}; use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
use crate::iterable::Iterable; use crate::iterable::Iterable;
use crate::RowId;
/// The `IndexBuilder` interprets a sequence of /// The `IndexBuilder` interprets a sequence of
/// calls of the form: /// calls of the form:
@@ -31,12 +31,13 @@ pub struct OptionalIndexBuilder {
impl OptionalIndexBuilder { impl OptionalIndexBuilder {
pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ { pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
debug_assert!(self debug_assert!(
.docs self.docs
.last() .last()
.copied() .copied()
.map(|last_doc| last_doc < num_rows) .map(|last_doc| last_doc < num_rows)
.unwrap_or(true)); .unwrap_or(true)
);
&self.docs[..] &self.docs[..]
} }
@@ -48,12 +49,13 @@ impl OptionalIndexBuilder {
impl IndexBuilder for OptionalIndexBuilder { impl IndexBuilder for OptionalIndexBuilder {
#[inline(always)] #[inline(always)]
fn record_row(&mut self, doc: RowId) { fn record_row(&mut self, doc: RowId) {
debug_assert!(self debug_assert!(
.docs self.docs
.last() .last()
.copied() .copied()
.map(|prev_doc| doc > prev_doc) .map(|prev_doc| doc > prev_doc)
.unwrap_or(true)); .unwrap_or(true)
);
self.docs.push(doc); self.docs.push(doc);
} }
} }

View File

@@ -3,8 +3,8 @@ use std::path::PathBuf;
use itertools::Itertools; use itertools::Itertools;
use crate::{ use crate::{
merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder, CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
CURRENT_VERSION, merge_columnar,
}; };
const NUM_DOCS: u32 = u16::MAX as u32; const NUM_DOCS: u32 = u16::MAX as u32;

View File

@@ -6,7 +6,7 @@ use common::file_slice::FileSlice;
use common::{ByteCount, DateTime, HasLen, OwnedBytes}; use common::{ByteCount, DateTime, HasLen, OwnedBytes};
use crate::column::{BytesColumn, Column, StrColumn}; use crate::column::{BytesColumn, Column, StrColumn};
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn}; use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
use crate::columnar::ColumnType; use crate::columnar::ColumnType;
use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version}; use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};

View File

@@ -44,11 +44,11 @@ pub use block_accessor::ColumnBlockAccessor;
pub use column::{BytesColumn, Column, StrColumn}; pub use column::{BytesColumn, Column, StrColumn};
pub use column_index::ColumnIndex; pub use column_index::ColumnIndex;
pub use column_values::{ pub use column_values::{
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
}; };
pub use columnar::{ pub use columnar::{
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType, CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
}; };
use sstable::VoidSSTable; use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue}; pub use value::{NumericalType, NumericalValue};

View File

@@ -716,8 +716,8 @@ fn test_columnar_merging_number_columns() {
// TODO document edge case: required_columns incompatible with values. // TODO document edge case: required_columns incompatible with values.
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
fn columnar_docs_and_remap( fn columnar_docs_and_remap()
) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> { -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map( proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
|columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| { |columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
let row_addrs: Vec<RowAddr> = columnars_docs let row_addrs: Vec<RowAddr> = columnars_docs

View File

@@ -3,7 +3,7 @@ name = "tantivy-common"
version = "0.7.0" version = "0.7.0"
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"] authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
license = "MIT" license = "MIT"
edition = "2021" edition = "2024"
description = "common traits and utility functions used by multiple tantivy subcrates" description = "common traits and utility functions used by multiple tantivy subcrates"
documentation = "https://docs.rs/tantivy_common/" documentation = "https://docs.rs/tantivy_common/"
homepage = "https://github.com/quickwit-oss/tantivy" homepage = "https://github.com/quickwit-oss/tantivy"

View File

@@ -1,7 +1,7 @@
use binggan::{black_box, BenchRunner}; use binggan::{BenchRunner, black_box};
use rand::seq::IteratorRandom; use rand::seq::IteratorRandom;
use rand::thread_rng; use rand::thread_rng;
use tantivy_common::{serialize_vint_u32, BitSet, TinySet}; use tantivy_common::{BitSet, TinySet, serialize_vint_u32};
fn bench_vint() { fn bench_vint() {
let mut runner = BenchRunner::new(); let mut runner = BenchRunner::new();

View File

@@ -65,11 +65,11 @@ pub fn transform_bound_inner_res<TFrom, TTo>(
) -> io::Result<Bound<TTo>> { ) -> io::Result<Bound<TTo>> {
use self::Bound::*; use self::Bound::*;
Ok(match bound { Ok(match bound {
Excluded(ref from_val) => match transform(from_val)? { Excluded(from_val) => match transform(from_val)? {
TransformBound::NewBound(new_val) => new_val, TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Excluded(new_val), TransformBound::Existing(new_val) => Excluded(new_val),
}, },
Included(ref from_val) => match transform(from_val)? { Included(from_val) => match transform(from_val)? {
TransformBound::NewBound(new_val) => new_val, TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Included(new_val), TransformBound::Existing(new_val) => Included(new_val),
}, },
@@ -85,11 +85,11 @@ pub fn transform_bound_inner<TFrom, TTo>(
) -> Bound<TTo> { ) -> Bound<TTo> {
use self::Bound::*; use self::Bound::*;
match bound { match bound {
Excluded(ref from_val) => match transform(from_val) { Excluded(from_val) => match transform(from_val) {
TransformBound::NewBound(new_val) => new_val, TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Excluded(new_val), TransformBound::Existing(new_val) => Excluded(new_val),
}, },
Included(ref from_val) => match transform(from_val) { Included(from_val) => match transform(from_val) {
TransformBound::NewBound(new_val) => new_val, TransformBound::NewBound(new_val) => new_val,
TransformBound::Existing(new_val) => Included(new_val), TransformBound::Existing(new_val) => Included(new_val),
}, },
@@ -111,8 +111,8 @@ pub fn map_bound<TFrom, TTo>(
) -> Bound<TTo> { ) -> Bound<TTo> {
use self::Bound::*; use self::Bound::*;
match bound { match bound {
Excluded(ref from_val) => Bound::Excluded(transform(from_val)), Excluded(from_val) => Bound::Excluded(transform(from_val)),
Included(ref from_val) => Bound::Included(transform(from_val)), Included(from_val) => Bound::Included(transform(from_val)),
Unbounded => Unbounded, Unbounded => Unbounded,
} }
} }
@@ -123,8 +123,8 @@ pub fn map_bound_res<TFrom, TTo, Err>(
) -> Result<Bound<TTo>, Err> { ) -> Result<Bound<TTo>, Err> {
use self::Bound::*; use self::Bound::*;
Ok(match bound { Ok(match bound {
Excluded(ref from_val) => Excluded(transform(from_val)?), Excluded(from_val) => Excluded(transform(from_val)?),
Included(ref from_val) => Included(transform(from_val)?), Included(from_val) => Included(transform(from_val)?),
Unbounded => Unbounded, Unbounded => Unbounded,
}) })
} }

View File

@@ -74,7 +74,7 @@ impl FileHandle for WrapFile {
{ {
use std::io::{Read, Seek}; use std::io::{Read, Seek};
let mut file = self.file.try_clone()?; // Clone the file to read from it separately let mut file = self.file.try_clone()?; // Clone the file to read from it separately
// Seek to the start position in the file // Seek to the start position in the file
file.seek(io::SeekFrom::Start(start as u64))?; file.seek(io::SeekFrom::Start(start as u64))?;
// Read the data into the buffer // Read the data into the buffer
file.read_exact(&mut buffer)?; file.read_exact(&mut buffer)?;
@@ -346,8 +346,8 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use super::{FileHandle, FileSlice}; use super::{FileHandle, FileSlice};
use crate::file_slice::combine_ranges;
use crate::HasLen; use crate::HasLen;
use crate::file_slice::combine_ranges;
#[test] #[test]
fn test_file_slice() -> io::Result<()> { fn test_file_slice() -> io::Result<()> {

View File

@@ -22,7 +22,7 @@ pub use json_path_writer::JsonPathWriter;
pub use ownedbytes::{OwnedBytes, StableDeref}; pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
pub use vint::{ pub use vint::{
read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128, VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint,
}; };
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite}; pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
@@ -177,8 +177,10 @@ pub(crate) mod test {
#[test] #[test]
fn test_f64_order() { fn test_f64_order() {
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) assert!(
.contains(&f64_to_u64(f64::NAN))); // nan is not a number !(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
.contains(&f64_to_u64(f64::NAN))
); // nan is not a number
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa

View File

@@ -222,7 +222,7 @@ impl BinarySerializable for VInt {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{serialize_vint_u32, BinarySerializable, VInt}; use super::{BinarySerializable, VInt, serialize_vint_u32};
fn aux_test_vint(val: u64) { fn aux_test_vint(val: u64) {
let mut v = [14u8; 10]; let mut v = [14u8; 10];

View File

@@ -9,7 +9,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy"
readme = "README.md" readme = "README.md"
keywords = ["search", "information", "retrieval"] keywords = ["search", "information", "retrieval"]
edition = "2021" edition = "2024"
[dependencies] [dependencies]
nom = "7" nom = "7"

View File

@@ -186,19 +186,19 @@ macro_rules! tuple_trait_impl(
); );
macro_rules! tuple_trait_inner( macro_rules! tuple_trait_inner(
($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({ ($it:tt, $self:expr_2021, $input:expr_2021, (), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?; let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err); $error_list.append(&mut err);
succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+)) succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
}); });
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({ ($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?; let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err); $error_list.append(&mut err);
succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+)) succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
}); });
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({ ($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident) => ({
let (i, (o, mut err)) = $self.$it.parse($input.clone())?; let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
$error_list.append(&mut err); $error_list.append(&mut err);
@@ -328,13 +328,13 @@ macro_rules! alt_trait_impl(
); );
macro_rules! alt_trait_inner( macro_rules! alt_trait_inner(
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => ( ($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
match $self.$it.0.parse($input.clone()) { match $self.$it.0.parse($input.clone()) {
Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)), Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
Ok((input_left, _)) => Some($self.$it.1.parse(input_left)), Ok((input_left, _)) => Some($self.$it.1.parse(input_left)),
} }
); );
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => ( ($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident) => (
None None
); );
); );

View File

@@ -1,6 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::iter::once; use std::iter::once;
use nom::IResult;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::{ use nom::character::complete::{
@@ -10,12 +11,11 @@ use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify};
use nom::error::{Error, ErrorKind}; use nom::error::{Error, ErrorKind};
use nom::multi::{many0, many1, separated_list0}; use nom::multi::{many0, many1, separated_list0};
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
use nom::IResult;
use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral}; use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
use crate::Occur;
use crate::infallible::*; use crate::infallible::*;
use crate::user_input_ast::Delimiter; use crate::user_input_ast::Delimiter;
use crate::Occur;
// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to // Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
// special characters. // special characters.
@@ -1030,7 +1030,7 @@ fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) { fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
match input { match input {
(None, UserInputAst::Clause(ref mut clauses)) if clauses.len() == 1 => { (None, UserInputAst::Clause(clauses)) if clauses.len() == 1 => {
*input = clauses.pop().unwrap(); // safe because clauses.len() == 1 *input = clauses.pop().unwrap(); // safe because clauses.len() == 1
} }
_ => {} _ => {}
@@ -1376,7 +1376,7 @@ mod test {
#[test] #[test]
fn test_range_parser_lenient() { fn test_range_parser_lenient() {
let literal = |query| literal_infallible(query).unwrap().1 .0.unwrap(); let literal = |query| literal_infallible(query).unwrap().1.0.unwrap();
// same tests as non-lenient // same tests as non-lenient
let res = literal("title: <hello"); let res = literal("title: <hello");

View File

@@ -51,7 +51,7 @@ impl UserInputLeaf {
pub(crate) fn set_default_field(&mut self, default_field: String) { pub(crate) fn set_default_field(&mut self, default_field: String) {
match self { match self {
UserInputLeaf::Literal(ref mut literal) if literal.field_name.is_none() => { UserInputLeaf::Literal(literal) if literal.field_name.is_none() => {
literal.field_name = Some(default_field) literal.field_name = Some(default_field)
} }
UserInputLeaf::All => { UserInputLeaf::All => {
@@ -59,12 +59,8 @@ impl UserInputLeaf {
field: default_field, field: default_field,
} }
} }
UserInputLeaf::Range { ref mut field, .. } if field.is_none() => { UserInputLeaf::Range { field, .. } if field.is_none() => *field = Some(default_field),
*field = Some(default_field) UserInputLeaf::Set { field, .. } if field.is_none() => *field = Some(default_field),
}
UserInputLeaf::Set { ref mut field, .. } if field.is_none() => {
*field = Some(default_field)
}
_ => (), // field was already set, do nothing _ => (), // field was already set, do nothing
} }
} }
@@ -75,11 +71,11 @@ impl Debug for UserInputLeaf {
match self { match self {
UserInputLeaf::Literal(literal) => literal.fmt(formatter), UserInputLeaf::Literal(literal) => literal.fmt(formatter),
UserInputLeaf::Range { UserInputLeaf::Range {
ref field, field,
ref lower, lower,
ref upper, upper,
} => { } => {
if let Some(ref field) = field { if let Some(field) = field {
// TODO properly escape field (in case of \") // TODO properly escape field (in case of \")
write!(formatter, "\"{field}\":")?; write!(formatter, "\"{field}\":")?;
} }
@@ -89,7 +85,7 @@ impl Debug for UserInputLeaf {
Ok(()) Ok(())
} }
UserInputLeaf::Set { field, elements } => { UserInputLeaf::Set { field, elements } => {
if let Some(ref field) = field { if let Some(field) = field {
// TODO properly escape field (in case of \") // TODO properly escape field (in case of \")
write!(formatter, "\"{field}\": ")?; write!(formatter, "\"{field}\": ")?;
} }
@@ -267,7 +263,7 @@ impl UserInputAst {
.iter_mut() .iter_mut()
.for_each(|(_, ast)| ast.set_default_field(field.clone())), .for_each(|(_, ast)| ast.set_default_field(field.clone())),
UserInputAst::Leaf(leaf) => leaf.set_default_field(field), UserInputAst::Leaf(leaf) => leaf.set_default_field(field),
UserInputAst::Boost(ref mut ast, _) => ast.set_default_field(field), UserInputAst::Boost(ast, _) => ast.set_default_field(field),
} }
} }
} }

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "tantivy-sstable" name = "tantivy-sstable"
version = "0.3.0" version = "0.3.0"
edition = "2021" edition = "2024"
license = "MIT" license = "MIT"
homepage = "https://github.com/quickwit-oss/tantivy" homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy"

View File

@@ -1,8 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use common::file_slice::FileSlice;
use common::OwnedBytes; use common::OwnedBytes;
use criterion::{criterion_group, criterion_main, Criterion}; use common::file_slice::FileSlice;
use criterion::{Criterion, criterion_group, criterion_main};
use tantivy_sstable::{Dictionary, MonotonicU64SSTable}; use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
fn make_test_sstable(suffix: &str) -> FileSlice { fn make_test_sstable(suffix: &str) -> FileSlice {

View File

@@ -2,7 +2,7 @@ use std::collections::BTreeSet;
use std::io; use std::io;
use common::file_slice::FileSlice; use common::file_slice::FileSlice;
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{Criterion, criterion_group, criterion_main};
use rand::rngs::StdRng; use rand::rngs::StdRng;
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use tantivy_sstable::{Dictionary, MonotonicU64SSTable}; use tantivy_sstable::{Dictionary, MonotonicU64SSTable};

View File

@@ -51,18 +51,21 @@ impl BlockReader {
let block_len = match self.reader.len() { let block_len = match self.reader.len() {
0 => { 0 => {
// we are out of data for this block. Check if we have another block after // we are out of data for this block. Check if we have another block after
if let Some(new_reader) = self.next_readers.next() { match self.next_readers.next() {
self.reader = new_reader; Some(new_reader) => {
continue; self.reader = new_reader;
} else { continue;
return Ok(false); }
_ => {
return Ok(false);
}
} }
} }
1..=3 => { 1..=3 => {
return Err(io::Error::new( return Err(io::Error::new(
io::ErrorKind::UnexpectedEof, io::ErrorKind::UnexpectedEof,
"failed to read block_len", "failed to read block_len",
)) ));
} }
_ => self.reader.read_u32() as usize, _ => self.reader.read_u32() as usize,
}; };

View File

@@ -5,7 +5,7 @@ use common::{CountingWriter, OwnedBytes};
use zstd::bulk::Compressor; use zstd::bulk::Compressor;
use super::value::ValueWriter; use super::value::ValueWriter;
use super::{value, vint, BlockReader}; use super::{BlockReader, value, vint};
const FOUR_BIT_LIMITS: usize = 1 << 4; const FOUR_BIT_LIMITS: usize = 1 << 4;
const VINT_MODE: u8 = 1u8; const VINT_MODE: u8 = 1u8;

View File

@@ -6,13 +6,13 @@ use std::marker::PhantomData;
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use std::sync::Arc; use std::sync::Arc;
use common::bounds::{transform_bound_inner_res, TransformBound}; use common::bounds::{TransformBound, transform_bound_inner_res};
use common::file_slice::FileSlice; use common::file_slice::FileSlice;
use common::{BinarySerializable, OwnedBytes}; use common::{BinarySerializable, OwnedBytes};
use futures_util::{stream, StreamExt, TryStreamExt}; use futures_util::{StreamExt, TryStreamExt, stream};
use itertools::Itertools; use itertools::Itertools;
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton; use tantivy_fst::Automaton;
use tantivy_fst::automaton::AlwaysMatch;
use crate::sstable_index_v3::SSTableIndexV3Empty; use crate::sstable_index_v3::SSTableIndexV3Empty;
use crate::streamer::{Streamer, StreamerBuilder}; use crate::streamer::{Streamer, StreamerBuilder};
@@ -311,7 +311,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
return Err(io::Error::new( return Err(io::Error::new(
io::ErrorKind::Other, io::ErrorKind::Other,
format!("Unsupported sstable version, expected one of [2, 3], found {version}"), format!("Unsupported sstable version, expected one of [2, 3], found {version}"),
)) ));
} }
}; };
@@ -644,8 +644,8 @@ mod tests {
use common::OwnedBytes; use common::OwnedBytes;
use super::Dictionary; use super::Dictionary;
use crate::dictionary::TermOrdHit;
use crate::MonotonicU64SSTable; use crate::MonotonicU64SSTable;
use crate::dictionary::TermOrdHit;
#[derive(Debug)] #[derive(Debug)]
struct PermissionedHandle { struct PermissionedHandle {
@@ -914,30 +914,33 @@ mod tests {
// Single term // Single term
let mut terms = Vec::new(); let mut terms = Vec::new();
assert!(dic assert!(
.sorted_ords_to_term_cb(100_000..100_001, |term| { dic.sorted_ords_to_term_cb(100_000..100_001, |term| {
terms.push(term.to_vec()); terms.push(term.to_vec());
Ok(()) Ok(())
}) })
.unwrap()); .unwrap()
);
assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]); assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]);
// Single term // Single term
let mut terms = Vec::new(); let mut terms = Vec::new();
assert!(dic assert!(
.sorted_ords_to_term_cb(100_001..100_002, |term| { dic.sorted_ords_to_term_cb(100_001..100_002, |term| {
terms.push(term.to_vec()); terms.push(term.to_vec());
Ok(()) Ok(())
}) })
.unwrap()); .unwrap()
);
assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]); assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]);
// both terms // both terms
let mut terms = Vec::new(); let mut terms = Vec::new();
assert!(dic assert!(
.sorted_ords_to_term_cb(100_000..100_002, |term| { dic.sorted_ords_to_term_cb(100_000..100_002, |term| {
terms.push(term.to_vec()); terms.push(term.to_vec());
Ok(()) Ok(())
}) })
.unwrap()); .unwrap()
);
assert_eq!( assert_eq!(
terms, terms,
vec![ vec![
@@ -947,12 +950,13 @@ mod tests {
); );
// Test cross block // Test cross block
let mut terms = Vec::new(); let mut terms = Vec::new();
assert!(dic assert!(
.sorted_ords_to_term_cb(98653..=98655, |term| { dic.sorted_ords_to_term_cb(98653..=98655, |term| {
terms.push(term.to_vec()); terms.push(term.to_vec());
Ok(()) Ok(())
}) })
.unwrap()); .unwrap()
);
assert_eq!( assert_eq!(
terms, terms,
vec![ vec![

View File

@@ -322,7 +322,7 @@ mod test {
use common::OwnedBytes; use common::OwnedBytes;
use super::{common_prefix_len, MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable}; use super::{MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable, common_prefix_len};
fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) { fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) {
assert_eq!( assert_eq!(

View File

@@ -1,6 +1,6 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::binary_heap::PeekMut;
use std::collections::BinaryHeap; use std::collections::BinaryHeap;
use std::collections::binary_heap::PeekMut;
use std::io; use std::io;
use super::{SingleValueMerger, ValueMerger}; use super::{SingleValueMerger, ValueMerger};
@@ -41,14 +41,17 @@ pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
loop { loop {
let len = heap.len(); let len = heap.len();
let mut value_merger; let mut value_merger;
if let Some(mut head) = heap.peek_mut() { match heap.peek_mut() {
writer.insert_key(head.0.key()).unwrap(); Some(mut head) => {
value_merger = merger.new_value(head.0.value()); writer.insert_key(head.0.key()).unwrap();
if !head.0.advance()? { value_merger = merger.new_value(head.0.value());
PeekMut::pop(head); if !head.0.advance()? {
PeekMut::pop(head);
}
}
_ => {
break;
} }
} else {
break;
} }
for _ in 0..len - 1 { for _ in 0..len - 1 {
if let Some(mut head) = heap.peek_mut() { if let Some(mut head) = heap.peek_mut() {

View File

@@ -3,12 +3,12 @@ use std::ops::Range;
use std::sync::Arc; use std::sync::Arc;
use common::{BinarySerializable, FixedSize, OwnedBytes}; use common::{BinarySerializable, FixedSize, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker}; use tantivy_bitpacker::{BitPacker, compute_num_bits};
use tantivy_fst::raw::Fst; use tantivy_fst::raw::Fst;
use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer}; use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer};
use crate::block_match_automaton::can_block_match_automaton; use crate::block_match_automaton::can_block_match_automaton;
use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal}; use crate::{SSTableDataCorruption, TermOrdinal, common_prefix_len};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum SSTableIndex { pub enum SSTableIndex {
@@ -824,8 +824,8 @@ mod tests {
use common::OwnedBytes; use common::OwnedBytes;
use super::*; use super::*;
use crate::block_match_automaton::tests::EqBuffer;
use crate::SSTableDataCorruption; use crate::SSTableDataCorruption;
use crate::block_match_automaton::tests::EqBuffer;
#[test] #[test]
fn test_sstable_index() { fn test_sstable_index() {

View File

@@ -1,8 +1,8 @@
use std::io; use std::io;
use std::ops::Bound; use std::ops::Bound;
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton; use tantivy_fst::Automaton;
use tantivy_fst::automaton::AlwaysMatch;
use crate::dictionary::Dictionary; use crate::dictionary::Dictionary;
use crate::{DeltaReader, SSTable, TermOrdinal}; use crate::{DeltaReader, SSTable, TermOrdinal};

View File

@@ -1,7 +1,7 @@
use std::io; use std::io;
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
use crate::{vint, BlockAddr}; use crate::{BlockAddr, vint};
#[derive(Default)] #[derive(Default)]
pub(crate) struct IndexValueReader { pub(crate) struct IndexValueReader {

View File

@@ -1,7 +1,7 @@
use std::io; use std::io;
use std::ops::Range; use std::ops::Range;
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
/// See module comment. /// See module comment.
#[derive(Default)] #[derive(Default)]

View File

@@ -1,6 +1,6 @@
use std::io; use std::io;
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
use crate::vint; use crate::vint;
#[derive(Default)] #[derive(Default)]

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "tantivy-stacker" name = "tantivy-stacker"
version = "0.3.0" version = "0.3.0"
edition = "2021" edition = "2024"
license = "MIT" license = "MIT"
homepage = "https://github.com/quickwit-oss/tantivy" homepage = "https://github.com/quickwit-oss/tantivy"
repository = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy"

View File

@@ -1,5 +1,5 @@
use binggan::plugins::PeakMemAllocPlugin; use binggan::plugins::PeakMemAllocPlugin;
use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM}; use binggan::{BenchRunner, INSTRUMENTED_SYSTEM, PeakMemAlloc, black_box};
use rand::SeedableRng; use rand::SeedableRng;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena}; use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena};

View File

@@ -13,7 +13,7 @@ mod shared_arena_hashmap;
pub use self::arena_hashmap::ArenaHashMap; pub use self::arena_hashmap::ArenaHashMap;
pub use self::expull::ExpUnrolledLinkedList; pub use self::expull::ExpUnrolledLinkedList;
pub use self::memory_arena::{Addr, MemoryArena}; pub use self::memory_arena::{Addr, MemoryArena};
pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap}; pub use self::shared_arena_hashmap::{SharedArenaHashMap, compute_table_memory_size};
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key. /// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
pub type UnorderedId = u32; pub type UnorderedId = u32;

View File

@@ -356,7 +356,7 @@ mod tests {
use std::collections::HashMap; use std::collections::HashMap;
use super::{compute_previous_power_of_two, SharedArenaHashMap}; use super::{SharedArenaHashMap, compute_previous_power_of_two};
use crate::MemoryArena; use crate::MemoryArena;
#[test] #[test]