mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
update edition to 2024 (#2620)
* update common to edition 2024 * update bitpacker to edition 2024 * update stacker to edition 2024 * update query-grammar to edition 2024 * update sstable to edition 2024 + fmt * fmt * update columnar to edition 2024 * cargo fmt * use None instead of _
This commit is contained in:
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
readme = "README.md"
|
||||
keywords = ["search", "information", "retrieval"]
|
||||
edition = "2021"
|
||||
rust-version = "1.75"
|
||||
rust-version = "1.85"
|
||||
exclude = ["benches/*.json", "benches/*.txt"]
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.6.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
license = "MIT"
|
||||
categories = []
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::bitpacker::BitPacker;
|
||||
use super::compute_num_bits;
|
||||
use crate::{minmax, BitUnpacker};
|
||||
use crate::{BitUnpacker, minmax};
|
||||
|
||||
const BLOCK_SIZE: usize = 128;
|
||||
|
||||
|
||||
@@ -33,11 +33,7 @@ pub use crate::blocked_bitpacker::BlockedBitpacker;
|
||||
/// number of bits.
|
||||
pub fn compute_num_bits(n: u64) -> u8 {
|
||||
let amplitude = (64u32 - n.leading_zeros()) as u8;
|
||||
if amplitude <= 64 - 8 {
|
||||
amplitude
|
||||
} else {
|
||||
64
|
||||
}
|
||||
if amplitude <= 64 - 8 { amplitude } else { 64 }
|
||||
}
|
||||
|
||||
/// Computes the (min, max) of an iterator of `PartialOrd` values.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use binggan::{black_box, InputGroup};
|
||||
use binggan::{InputGroup, black_box};
|
||||
use common::*;
|
||||
use tantivy_columnar::Column;
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ extern crate test;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::prelude::*;
|
||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
||||
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||
use tantivy_columnar::*;
|
||||
use test::{black_box, Bencher};
|
||||
use test::{Bencher, black_box};
|
||||
|
||||
struct Columns {
|
||||
pub optional: Column,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
pub mod common;
|
||||
|
||||
use binggan::BenchRunner;
|
||||
use common::{generate_columnar_with_name, Card};
|
||||
use common::{Card, generate_columnar_with_name};
|
||||
use tantivy_columnar::*;
|
||||
|
||||
const NUM_DOCS: u32 = 100_000;
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
use common::OwnedBytes;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::{random, Rng, SeedableRng};
|
||||
use rand::{Rng, SeedableRng, random};
|
||||
use tantivy_columnar::ColumnValues;
|
||||
use test::Bencher;
|
||||
extern crate test;
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::ops::RangeInclusive;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rand::prelude::*;
|
||||
use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
|
||||
use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
|
||||
use tantivy_columnar::*;
|
||||
use test::Bencher;
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
||||
&'a self,
|
||||
docs: &'a [u32],
|
||||
accessor: &Column<T>,
|
||||
) -> impl Iterator<Item = (DocId, T)> + 'a {
|
||||
) -> impl Iterator<Item = (DocId, T)> + 'a + use<'a, T> {
|
||||
if accessor.index.get_cardinality().is_full() {
|
||||
docs.iter().cloned().zip(self.val_cache.iter().cloned())
|
||||
} else {
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::{fmt, io};
|
||||
|
||||
use sstable::{Dictionary, VoidSSTable};
|
||||
|
||||
use crate::column::Column;
|
||||
use crate::RowId;
|
||||
use crate::column::Column;
|
||||
|
||||
/// Dictionary encoded column.
|
||||
///
|
||||
|
||||
@@ -9,13 +9,14 @@ use std::sync::Arc;
|
||||
use common::BinarySerializable;
|
||||
pub use dictionary_encoded::{BytesColumn, StrColumn};
|
||||
pub use serialize::{
|
||||
open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
|
||||
open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
|
||||
open_column_bytes, open_column_str, open_column_u64, open_column_u128,
|
||||
open_column_u128_as_compact_u64, serialize_column_mappable_to_u64,
|
||||
serialize_column_mappable_to_u128,
|
||||
};
|
||||
|
||||
use crate::column_index::{ColumnIndex, Set};
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||
use crate::column_values::{monotonic_map_column, ColumnValues};
|
||||
use crate::column_values::{ColumnValues, monotonic_map_column};
|
||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -6,10 +6,10 @@ use common::OwnedBytes;
|
||||
use sstable::Dictionary;
|
||||
|
||||
use crate::column::{BytesColumn, Column};
|
||||
use crate::column_index::{serialize_column_index, SerializableColumnIndex};
|
||||
use crate::column_index::{SerializableColumnIndex, serialize_column_index};
|
||||
use crate::column_values::{
|
||||
CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||
load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
|
||||
CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{StrColumn, Version};
|
||||
|
||||
@@ -99,9 +99,9 @@ mod tests {
|
||||
|
||||
use crate::column_index::merge::detect_cardinality;
|
||||
use crate::column_index::multivalued_index::{
|
||||
open_multivalued_index, serialize_multivalued_index, MultiValueIndex,
|
||||
MultiValueIndex, open_multivalued_index, serialize_multivalued_index,
|
||||
};
|
||||
use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
|
||||
use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index};
|
||||
use crate::{
|
||||
Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
|
||||
};
|
||||
|
||||
@@ -137,8 +137,8 @@ impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::column_index::OptionalIndex;
|
||||
use crate::RowAddr;
|
||||
use crate::column_index::OptionalIndex;
|
||||
|
||||
#[test]
|
||||
fn test_integrate_num_vals_empty() {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::column_index::SerializableColumnIndex;
|
||||
use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
|
||||
use crate::column_index::serialize::SerializableOptionalIndex;
|
||||
use crate::column_index::SerializableColumnIndex;
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ pub use merge::merge_column_index;
|
||||
pub(crate) use multivalued_index::SerializableMultivalueIndex;
|
||||
pub use optional_index::{OptionalIndex, Set};
|
||||
pub use serialize::{
|
||||
open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex,
|
||||
SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index,
|
||||
};
|
||||
|
||||
use crate::column_index::multivalued_index::MultiValueIndex;
|
||||
|
||||
@@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes};
|
||||
use super::optional_index::{open_optional_index, serialize_optional_index};
|
||||
use super::{OptionalIndex, SerializableOptionalIndex, Set};
|
||||
use crate::column_values::{
|
||||
load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues,
|
||||
CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{DocId, RowId, Version};
|
||||
|
||||
@@ -7,7 +7,7 @@ mod set_block;
|
||||
use common::{BinarySerializable, OwnedBytes, VInt};
|
||||
pub use set::{SelectCursor, Set, SetCodec};
|
||||
use set_block::{
|
||||
DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
|
||||
DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec,
|
||||
};
|
||||
|
||||
use crate::iterable::Iterable;
|
||||
@@ -259,11 +259,13 @@ impl Set<RowId> for OptionalIndex {
|
||||
|
||||
impl OptionalIndex {
|
||||
pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
|
||||
assert!(row_ids
|
||||
assert!(
|
||||
row_ids
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_row_id| last_row_id < num_rows)
|
||||
.unwrap_or(true));
|
||||
.unwrap_or(true)
|
||||
);
|
||||
let mut buffer = Vec::new();
|
||||
serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
|
||||
let bytes = OwnedBytes::new(buffer);
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::io::{self, Write};
|
||||
|
||||
use common::BinarySerializable;
|
||||
|
||||
use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK};
|
||||
use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec};
|
||||
|
||||
#[inline(always)]
|
||||
fn get_bit_at(input: u64, n: u16) -> bool {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
mod dense;
|
||||
mod sparse;
|
||||
|
||||
pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES};
|
||||
pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};
|
||||
pub use sparse::{SparseBlock, SparseBlockCodec};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -254,11 +254,7 @@ mod bench {
|
||||
let mut current = start;
|
||||
std::iter::from_fn(move || {
|
||||
current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
|
||||
if current >= end {
|
||||
None
|
||||
} else {
|
||||
Some(current)
|
||||
}
|
||||
if current >= end { None } else { Some(current) }
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
||||
|
||||
use common::{CountingWriter, OwnedBytes};
|
||||
|
||||
use super::multivalued_index::SerializableMultivalueIndex;
|
||||
use super::OptionalIndex;
|
||||
use super::multivalued_index::SerializableMultivalueIndex;
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::column_index::multivalued_index::serialize_multivalued_index;
|
||||
use crate::column_index::optional_index::serialize_optional_index;
|
||||
use crate::column_index::ColumnIndex;
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{Cardinality, RowId, Version};
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::column_values::u64_based::*;
|
||||
fn get_data() -> Vec<u64> {
|
||||
let mut rng = StdRng::seed_from_u64(2u64);
|
||||
let mut data: Vec<_> = (100..55000_u64)
|
||||
.map(|num| num + rng.gen::<u8>() as u64)
|
||||
.map(|num| num + rng.r#gen::<u8>() as u64)
|
||||
.collect();
|
||||
data.push(99_000);
|
||||
data.insert(1000, 2000);
|
||||
|
||||
@@ -26,13 +26,13 @@ mod monotonic_column;
|
||||
|
||||
pub(crate) use merge::MergedColumnValues;
|
||||
pub use stats::ColumnStats;
|
||||
pub use u128_based::{
|
||||
open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
|
||||
CompactSpaceU64Accessor,
|
||||
};
|
||||
pub use u64_based::{
|
||||
load_u64_based_column_values, serialize_and_load_u64_based_column_values,
|
||||
serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
|
||||
ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
|
||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
};
|
||||
pub use u128_based::{
|
||||
CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
|
||||
serialize_column_values_u128,
|
||||
};
|
||||
pub use vec_column::VecColumn;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||
use crate::ColumnValues;
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||
|
||||
struct MonotonicMappingColumn<C, T, Input> {
|
||||
from_column: C,
|
||||
@@ -99,10 +99,10 @@ where
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::VecColumn;
|
||||
use crate::column_values::monotonic_mapping::{
|
||||
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
|
||||
};
|
||||
use crate::column_values::VecColumn;
|
||||
|
||||
#[test]
|
||||
fn test_monotonic_mapping_iter() {
|
||||
|
||||
@@ -24,8 +24,8 @@ use build_compact_space::get_compact_space;
|
||||
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker};
|
||||
|
||||
use crate::column_values::ColumnValues;
|
||||
use crate::RowId;
|
||||
use crate::column_values::ColumnValues;
|
||||
|
||||
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
|
||||
/// blanks depends on the number of blanks.
|
||||
@@ -653,12 +653,14 @@ mod tests {
|
||||
),
|
||||
&[3]
|
||||
);
|
||||
assert!(get_positions_for_value_range_helper(
|
||||
assert!(
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=99998u128,
|
||||
complete_range.clone()
|
||||
)
|
||||
.is_empty());
|
||||
.is_empty()
|
||||
);
|
||||
assert_eq!(
|
||||
&get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
|
||||
@@ -130,11 +130,11 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::u64_based::{
|
||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
ALL_U64_CODEC_TYPES,
|
||||
};
|
||||
use crate::column_values::CodecType;
|
||||
use crate::column_values::u64_based::{
|
||||
ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
|
||||
serialize_u64_based_column_values,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_serialize_deserialize_u128_header() {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::ops::{Range, RangeInclusive};
|
||||
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use fastdivide::DividerU64;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::{ColumnValues, RowId};
|
||||
@@ -23,11 +23,7 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
|
||||
// copied from unstable rust standard library.
|
||||
let d = n / q.get();
|
||||
let r = n % q.get();
|
||||
if r > 0 {
|
||||
d + 1
|
||||
} else {
|
||||
d
|
||||
}
|
||||
if r > 0 { d + 1 } else { d }
|
||||
}
|
||||
|
||||
// The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
|
||||
|
||||
@@ -4,12 +4,12 @@ use std::{io, iter};
|
||||
|
||||
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
|
||||
use fastdivide::DividerU64;
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use crate::MonotonicallyMappableToU64;
|
||||
use crate::column_values::u64_based::line::Line;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::column_values::{ColumnValues, VecColumn};
|
||||
use crate::MonotonicallyMappableToU64;
|
||||
|
||||
const BLOCK_SIZE: u32 = 512u32;
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use std::io;
|
||||
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use super::line::Line;
|
||||
use super::ColumnValues;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::column_values::VecColumn;
|
||||
use super::line::Line;
|
||||
use crate::RowId;
|
||||
use crate::column_values::VecColumn;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
|
||||
const HALF_SPACE: u64 = u64::MAX / 2;
|
||||
const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
|
||||
|
||||
@@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec;
|
||||
pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
|
||||
pub use crate::column_values::u64_based::linear::LinearCodec;
|
||||
pub use crate::column_values::u64_based::stats_collector::StatsCollector;
|
||||
use crate::column_values::{monotonic_map_column, ColumnStats};
|
||||
use crate::column_values::{ColumnStats, monotonic_map_column};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{ColumnValues, MonotonicallyMappableToU64};
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ use std::num::NonZeroU64;
|
||||
|
||||
use fastdivide::DividerU64;
|
||||
|
||||
use crate::column_values::ColumnStats;
|
||||
use crate::RowId;
|
||||
use crate::column_values::ColumnStats;
|
||||
|
||||
/// Compute the gcd of two non null numbers.
|
||||
///
|
||||
@@ -96,8 +96,8 @@ impl StatsCollector {
|
||||
mod tests {
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
|
||||
use crate::column_values::u64_based::ColumnStats;
|
||||
use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd};
|
||||
|
||||
fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
|
||||
let mut stats_collector = StatsCollector::default();
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::net::Ipv6Addr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::value::NumericalType;
|
||||
use crate::InvalidData;
|
||||
use crate::value::NumericalType;
|
||||
|
||||
/// The column type represents the column type.
|
||||
/// Any changes need to be propagated to `COLUMN_TYPES`.
|
||||
|
||||
@@ -10,11 +10,11 @@ use std::sync::Arc;
|
||||
pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||
|
||||
use super::writer::ColumnarSerializer;
|
||||
use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64};
|
||||
use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128};
|
||||
use crate::column_values::MergedColumnValues;
|
||||
use crate::columnar::ColumnarReader;
|
||||
use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
|
||||
use crate::columnar::writer::CompatibleNumericalTypes;
|
||||
use crate::columnar::ColumnarReader;
|
||||
use crate::dynamic_column::DynamicColumn;
|
||||
use crate::{
|
||||
BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
|
||||
@@ -144,18 +144,19 @@ fn merge_column(
|
||||
let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
|
||||
Vec::with_capacity(columns_to_merge.len());
|
||||
for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
|
||||
if let Some(Column { index: idx, values }) =
|
||||
dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
|
||||
{
|
||||
match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) {
|
||||
Some(Column { index: idx, values }) => {
|
||||
column_indexes.push(idx);
|
||||
column_values.push(Some(values));
|
||||
} else {
|
||||
}
|
||||
None => {
|
||||
column_indexes.push(ColumnIndex::Empty {
|
||||
num_docs: num_docs_per_column[i],
|
||||
});
|
||||
column_values.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
let merged_column_index =
|
||||
crate::column_index::merge_column_index(&column_indexes[..], merge_row_order);
|
||||
let merge_column_values = MergedColumnValues {
|
||||
@@ -253,11 +254,13 @@ impl GroupedColumns {
|
||||
}
|
||||
// At the moment, only the numerical column type category has more than one possible
|
||||
// column type.
|
||||
assert!(self
|
||||
.columns
|
||||
assert!(
|
||||
self.columns
|
||||
.iter()
|
||||
.flatten()
|
||||
.all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical));
|
||||
.all(|el| ColumnTypeCategory::from(el.column_type())
|
||||
== ColumnTypeCategory::Numerical)
|
||||
);
|
||||
merged_numerical_columns_type(self.columns.iter().flatten()).into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +74,8 @@ impl<'a> TermMerger<'a> {
|
||||
/// False if there is none.
|
||||
pub fn advance(&mut self) -> bool {
|
||||
self.advance_segments();
|
||||
if let Some(head) = self.heap.pop() {
|
||||
match self.heap.pop() {
|
||||
Some(head) => {
|
||||
self.term_streams_with_segment.push(head);
|
||||
while let Some(next_streamer) = self.heap.peek() {
|
||||
if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
|
||||
@@ -84,8 +85,8 @@ impl<'a> TermMerger<'a> {
|
||||
self.term_streams_with_segment.push(next_heap_it);
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ use proptest::collection::vec;
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
use crate::columnar::{merge_columnar, ColumnarReader, MergeRowOrder, StackMergeOrder};
|
||||
use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar};
|
||||
use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
|
||||
|
||||
fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
|
||||
|
||||
@@ -5,9 +5,9 @@ mod reader;
|
||||
mod writer;
|
||||
|
||||
pub use column_type::{ColumnType, HasAssociatedColumnType};
|
||||
pub use format_version::{Version, CURRENT_VERSION};
|
||||
pub use format_version::{CURRENT_VERSION, Version};
|
||||
#[cfg(test)]
|
||||
pub(crate) use merge::ColumnTypeCategory;
|
||||
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
|
||||
pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
|
||||
pub use reader::ColumnarReader;
|
||||
pub use writer::ColumnarWriter;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::{fmt, io, mem};
|
||||
|
||||
use common::BinarySerializable;
|
||||
use common::file_slice::FileSlice;
|
||||
use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
|
||||
use common::BinarySerializable;
|
||||
use sstable::{Dictionary, RangeSSTable};
|
||||
|
||||
use crate::columnar::{format_version, ColumnType};
|
||||
use crate::columnar::{ColumnType, format_version};
|
||||
use crate::dynamic_column::DynamicColumnHandle;
|
||||
use crate::{RowId, Version};
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ impl ColumnWriter {
|
||||
&self,
|
||||
arena: &MemoryArena,
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
|
||||
buffer.clear();
|
||||
self.values.read_to_end(arena, buffer);
|
||||
let mut cursor: &[u8] = &buffer[..];
|
||||
@@ -104,9 +104,10 @@ pub(crate) struct NumericalColumnWriter {
|
||||
|
||||
impl NumericalColumnWriter {
|
||||
pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
|
||||
assert!(self
|
||||
.compatible_numerical_types
|
||||
.is_type_accepted(numerical_type));
|
||||
assert!(
|
||||
self.compatible_numerical_types
|
||||
.is_type_accepted(numerical_type)
|
||||
);
|
||||
self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
|
||||
}
|
||||
}
|
||||
@@ -211,7 +212,7 @@ impl NumericalColumnWriter {
|
||||
self,
|
||||
arena: &MemoryArena,
|
||||
buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
|
||||
self.column_writer.operation_iterator(arena, buffer)
|
||||
}
|
||||
}
|
||||
@@ -255,7 +256,7 @@ impl StrOrBytesColumnWriter {
|
||||
&self,
|
||||
arena: &MemoryArena,
|
||||
byte_buffer: &'a mut Vec<u8>,
|
||||
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a {
|
||||
) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
|
||||
self.column_writer.operation_iterator(arena, byte_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,13 +8,13 @@ use std::net::Ipv6Addr;
|
||||
|
||||
use column_operation::ColumnOperation;
|
||||
pub(crate) use column_writers::CompatibleNumericalTypes;
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::CountingWriter;
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
pub(crate) use serializer::ColumnarSerializer;
|
||||
use stacker::{Addr, ArenaHashMap, MemoryArena};
|
||||
|
||||
use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
|
||||
use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
|
||||
use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128};
|
||||
use crate::columnar::column_type::ColumnType;
|
||||
use crate::columnar::writer::column_writers::{
|
||||
ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::io::Write;
|
||||
|
||||
use common::json_path_writer::JSON_END_OF_PATH;
|
||||
use common::{BinarySerializable, CountingWriter};
|
||||
use sstable::value::RangeValueWriter;
|
||||
use sstable::RangeSSTable;
|
||||
use sstable::value::RangeValueWriter;
|
||||
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::RowId;
|
||||
use crate::columnar::ColumnType;
|
||||
|
||||
pub struct ColumnarSerializer<W: io::Write> {
|
||||
wrt: CountingWriter<W>,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::RowId;
|
||||
use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
|
||||
use crate::iterable::Iterable;
|
||||
use crate::RowId;
|
||||
|
||||
/// The `IndexBuilder` interprets a sequence of
|
||||
/// calls of the form:
|
||||
@@ -31,12 +31,13 @@ pub struct OptionalIndexBuilder {
|
||||
|
||||
impl OptionalIndexBuilder {
|
||||
pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
|
||||
debug_assert!(self
|
||||
.docs
|
||||
debug_assert!(
|
||||
self.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|last_doc| last_doc < num_rows)
|
||||
.unwrap_or(true));
|
||||
.unwrap_or(true)
|
||||
);
|
||||
&self.docs[..]
|
||||
}
|
||||
|
||||
@@ -48,12 +49,13 @@ impl OptionalIndexBuilder {
|
||||
impl IndexBuilder for OptionalIndexBuilder {
|
||||
#[inline(always)]
|
||||
fn record_row(&mut self, doc: RowId) {
|
||||
debug_assert!(self
|
||||
.docs
|
||||
debug_assert!(
|
||||
self.docs
|
||||
.last()
|
||||
.copied()
|
||||
.map(|prev_doc| doc > prev_doc)
|
||||
.unwrap_or(true));
|
||||
.unwrap_or(true)
|
||||
);
|
||||
self.docs.push(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@ use std::path::PathBuf;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::{
|
||||
merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
||||
CURRENT_VERSION,
|
||||
CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
|
||||
merge_columnar,
|
||||
};
|
||||
|
||||
const NUM_DOCS: u32 = u16::MAX as u32;
|
||||
|
||||
@@ -6,7 +6,7 @@ use common::file_slice::FileSlice;
|
||||
use common::{ByteCount, DateTime, HasLen, OwnedBytes};
|
||||
|
||||
use crate::column::{BytesColumn, Column, StrColumn};
|
||||
use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
|
||||
use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
|
||||
use crate::columnar::ColumnType;
|
||||
use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};
|
||||
|
||||
|
||||
@@ -44,11 +44,11 @@ pub use block_accessor::ColumnBlockAccessor;
|
||||
pub use column::{BytesColumn, Column, StrColumn};
|
||||
pub use column_index::ColumnIndex;
|
||||
pub use column_values::{
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||
};
|
||||
pub use columnar::{
|
||||
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
|
||||
CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
|
||||
};
|
||||
use sstable::VoidSSTable;
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
|
||||
@@ -716,8 +716,8 @@ fn test_columnar_merging_number_columns() {
|
||||
// TODO document edge case: required_columns incompatible with values.
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn columnar_docs_and_remap(
|
||||
) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
||||
fn columnar_docs_and_remap()
|
||||
-> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
|
||||
proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
|
||||
|columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
|
||||
let row_addrs: Vec<RowAddr> = columnars_docs
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "tantivy-common"
|
||||
version = "0.7.0"
|
||||
authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
description = "common traits and utility functions used by multiple tantivy subcrates"
|
||||
documentation = "https://docs.rs/tantivy_common/"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use binggan::{black_box, BenchRunner};
|
||||
use binggan::{BenchRunner, black_box};
|
||||
use rand::seq::IteratorRandom;
|
||||
use rand::thread_rng;
|
||||
use tantivy_common::{serialize_vint_u32, BitSet, TinySet};
|
||||
use tantivy_common::{BitSet, TinySet, serialize_vint_u32};
|
||||
|
||||
fn bench_vint() {
|
||||
let mut runner = BenchRunner::new();
|
||||
|
||||
@@ -65,11 +65,11 @@ pub fn transform_bound_inner_res<TFrom, TTo>(
|
||||
) -> io::Result<Bound<TTo>> {
|
||||
use self::Bound::*;
|
||||
Ok(match bound {
|
||||
Excluded(ref from_val) => match transform(from_val)? {
|
||||
Excluded(from_val) => match transform(from_val)? {
|
||||
TransformBound::NewBound(new_val) => new_val,
|
||||
TransformBound::Existing(new_val) => Excluded(new_val),
|
||||
},
|
||||
Included(ref from_val) => match transform(from_val)? {
|
||||
Included(from_val) => match transform(from_val)? {
|
||||
TransformBound::NewBound(new_val) => new_val,
|
||||
TransformBound::Existing(new_val) => Included(new_val),
|
||||
},
|
||||
@@ -85,11 +85,11 @@ pub fn transform_bound_inner<TFrom, TTo>(
|
||||
) -> Bound<TTo> {
|
||||
use self::Bound::*;
|
||||
match bound {
|
||||
Excluded(ref from_val) => match transform(from_val) {
|
||||
Excluded(from_val) => match transform(from_val) {
|
||||
TransformBound::NewBound(new_val) => new_val,
|
||||
TransformBound::Existing(new_val) => Excluded(new_val),
|
||||
},
|
||||
Included(ref from_val) => match transform(from_val) {
|
||||
Included(from_val) => match transform(from_val) {
|
||||
TransformBound::NewBound(new_val) => new_val,
|
||||
TransformBound::Existing(new_val) => Included(new_val),
|
||||
},
|
||||
@@ -111,8 +111,8 @@ pub fn map_bound<TFrom, TTo>(
|
||||
) -> Bound<TTo> {
|
||||
use self::Bound::*;
|
||||
match bound {
|
||||
Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
|
||||
Included(ref from_val) => Bound::Included(transform(from_val)),
|
||||
Excluded(from_val) => Bound::Excluded(transform(from_val)),
|
||||
Included(from_val) => Bound::Included(transform(from_val)),
|
||||
Unbounded => Unbounded,
|
||||
}
|
||||
}
|
||||
@@ -123,8 +123,8 @@ pub fn map_bound_res<TFrom, TTo, Err>(
|
||||
) -> Result<Bound<TTo>, Err> {
|
||||
use self::Bound::*;
|
||||
Ok(match bound {
|
||||
Excluded(ref from_val) => Excluded(transform(from_val)?),
|
||||
Included(ref from_val) => Included(transform(from_val)?),
|
||||
Excluded(from_val) => Excluded(transform(from_val)?),
|
||||
Included(from_val) => Included(transform(from_val)?),
|
||||
Unbounded => Unbounded,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -346,8 +346,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{FileHandle, FileSlice};
|
||||
use crate::file_slice::combine_ranges;
|
||||
use crate::HasLen;
|
||||
use crate::file_slice::combine_ranges;
|
||||
|
||||
#[test]
|
||||
fn test_file_slice() -> io::Result<()> {
|
||||
|
||||
@@ -22,7 +22,7 @@ pub use json_path_writer::JsonPathWriter;
|
||||
pub use ownedbytes::{OwnedBytes, StableDeref};
|
||||
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
|
||||
pub use vint::{
|
||||
read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
|
||||
VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint,
|
||||
};
|
||||
pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
|
||||
|
||||
@@ -177,8 +177,10 @@ pub(crate) mod test {
|
||||
|
||||
#[test]
|
||||
fn test_f64_order() {
|
||||
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
|
||||
.contains(&f64_to_u64(f64::NAN))); // nan is not a number
|
||||
assert!(
|
||||
!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
|
||||
.contains(&f64_to_u64(f64::NAN))
|
||||
); // nan is not a number
|
||||
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
|
||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
|
||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
|
||||
|
||||
@@ -222,7 +222,7 @@ impl BinarySerializable for VInt {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::{serialize_vint_u32, BinarySerializable, VInt};
|
||||
use super::{BinarySerializable, VInt, serialize_vint_u32};
|
||||
|
||||
fn aux_test_vint(val: u64) {
|
||||
let mut v = [14u8; 10];
|
||||
|
||||
@@ -9,7 +9,7 @@ homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
repository = "https://github.com/quickwit-oss/tantivy"
|
||||
readme = "README.md"
|
||||
keywords = ["search", "information", "retrieval"]
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
nom = "7"
|
||||
|
||||
@@ -186,19 +186,19 @@ macro_rules! tuple_trait_impl(
|
||||
);
|
||||
|
||||
macro_rules! tuple_trait_inner(
|
||||
($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({
|
||||
($it:tt, $self:expr_2021, $input:expr_2021, (), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
|
||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||
$error_list.append(&mut err);
|
||||
|
||||
succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+))
|
||||
});
|
||||
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({
|
||||
($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident $($id:ident)+) => ({
|
||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||
$error_list.append(&mut err);
|
||||
|
||||
succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+))
|
||||
});
|
||||
($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({
|
||||
($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident) => ({
|
||||
let (i, (o, mut err)) = $self.$it.parse($input.clone())?;
|
||||
$error_list.append(&mut err);
|
||||
|
||||
@@ -328,13 +328,13 @@ macro_rules! alt_trait_impl(
|
||||
);
|
||||
|
||||
macro_rules! alt_trait_inner(
|
||||
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
|
||||
($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => (
|
||||
match $self.$it.0.parse($input.clone()) {
|
||||
Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)),
|
||||
Ok((input_left, _)) => Some($self.$it.1.parse(input_left)),
|
||||
}
|
||||
);
|
||||
($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => (
|
||||
($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident) => (
|
||||
None
|
||||
);
|
||||
);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::iter::once;
|
||||
|
||||
use nom::IResult;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::{
|
||||
@@ -10,12 +11,11 @@ use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify};
|
||||
use nom::error::{Error, ErrorKind};
|
||||
use nom::multi::{many0, many1, separated_list0};
|
||||
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
|
||||
use nom::IResult;
|
||||
|
||||
use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
|
||||
use crate::Occur;
|
||||
use crate::infallible::*;
|
||||
use crate::user_input_ast::Delimiter;
|
||||
use crate::Occur;
|
||||
|
||||
// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
|
||||
// special characters.
|
||||
@@ -1030,7 +1030,7 @@ fn rewrite_ast(mut input: UserInputAst) -> UserInputAst {
|
||||
|
||||
fn rewrite_ast_clause(input: &mut (Option<Occur>, UserInputAst)) {
|
||||
match input {
|
||||
(None, UserInputAst::Clause(ref mut clauses)) if clauses.len() == 1 => {
|
||||
(None, UserInputAst::Clause(clauses)) if clauses.len() == 1 => {
|
||||
*input = clauses.pop().unwrap(); // safe because clauses.len() == 1
|
||||
}
|
||||
_ => {}
|
||||
|
||||
@@ -51,7 +51,7 @@ impl UserInputLeaf {
|
||||
|
||||
pub(crate) fn set_default_field(&mut self, default_field: String) {
|
||||
match self {
|
||||
UserInputLeaf::Literal(ref mut literal) if literal.field_name.is_none() => {
|
||||
UserInputLeaf::Literal(literal) if literal.field_name.is_none() => {
|
||||
literal.field_name = Some(default_field)
|
||||
}
|
||||
UserInputLeaf::All => {
|
||||
@@ -59,12 +59,8 @@ impl UserInputLeaf {
|
||||
field: default_field,
|
||||
}
|
||||
}
|
||||
UserInputLeaf::Range { ref mut field, .. } if field.is_none() => {
|
||||
*field = Some(default_field)
|
||||
}
|
||||
UserInputLeaf::Set { ref mut field, .. } if field.is_none() => {
|
||||
*field = Some(default_field)
|
||||
}
|
||||
UserInputLeaf::Range { field, .. } if field.is_none() => *field = Some(default_field),
|
||||
UserInputLeaf::Set { field, .. } if field.is_none() => *field = Some(default_field),
|
||||
_ => (), // field was already set, do nothing
|
||||
}
|
||||
}
|
||||
@@ -75,11 +71,11 @@ impl Debug for UserInputLeaf {
|
||||
match self {
|
||||
UserInputLeaf::Literal(literal) => literal.fmt(formatter),
|
||||
UserInputLeaf::Range {
|
||||
ref field,
|
||||
ref lower,
|
||||
ref upper,
|
||||
field,
|
||||
lower,
|
||||
upper,
|
||||
} => {
|
||||
if let Some(ref field) = field {
|
||||
if let Some(field) = field {
|
||||
// TODO properly escape field (in case of \")
|
||||
write!(formatter, "\"{field}\":")?;
|
||||
}
|
||||
@@ -89,7 +85,7 @@ impl Debug for UserInputLeaf {
|
||||
Ok(())
|
||||
}
|
||||
UserInputLeaf::Set { field, elements } => {
|
||||
if let Some(ref field) = field {
|
||||
if let Some(field) = field {
|
||||
// TODO properly escape field (in case of \")
|
||||
write!(formatter, "\"{field}\": ")?;
|
||||
}
|
||||
@@ -267,7 +263,7 @@ impl UserInputAst {
|
||||
.iter_mut()
|
||||
.for_each(|(_, ast)| ast.set_default_field(field.clone())),
|
||||
UserInputAst::Leaf(leaf) => leaf.set_default_field(field),
|
||||
UserInputAst::Boost(ref mut ast, _) => ast.set_default_field(field),
|
||||
UserInputAst::Boost(ast, _) => ast.set_default_field(field),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use common::OwnedBytes;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use common::file_slice::FileSlice;
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
||||
|
||||
fn make_test_sstable(suffix: &str) -> FileSlice {
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::BTreeSet;
|
||||
use std::io;
|
||||
|
||||
use common::file_slice::FileSlice;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tantivy_sstable::{Dictionary, MonotonicU64SSTable};
|
||||
|
||||
@@ -51,18 +51,21 @@ impl BlockReader {
|
||||
let block_len = match self.reader.len() {
|
||||
0 => {
|
||||
// we are out of data for this block. Check if we have another block after
|
||||
if let Some(new_reader) = self.next_readers.next() {
|
||||
match self.next_readers.next() {
|
||||
Some(new_reader) => {
|
||||
self.reader = new_reader;
|
||||
continue;
|
||||
} else {
|
||||
}
|
||||
_ => {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
1..=3 => {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"failed to read block_len",
|
||||
))
|
||||
));
|
||||
}
|
||||
_ => self.reader.read_u32() as usize,
|
||||
};
|
||||
|
||||
@@ -5,7 +5,7 @@ use common::{CountingWriter, OwnedBytes};
|
||||
use zstd::bulk::Compressor;
|
||||
|
||||
use super::value::ValueWriter;
|
||||
use super::{value, vint, BlockReader};
|
||||
use super::{BlockReader, value, vint};
|
||||
|
||||
const FOUR_BIT_LIMITS: usize = 1 << 4;
|
||||
const VINT_MODE: u8 = 1u8;
|
||||
|
||||
@@ -6,13 +6,13 @@ use std::marker::PhantomData;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::bounds::{transform_bound_inner_res, TransformBound};
|
||||
use common::bounds::{TransformBound, transform_bound_inner_res};
|
||||
use common::file_slice::FileSlice;
|
||||
use common::{BinarySerializable, OwnedBytes};
|
||||
use futures_util::{stream, StreamExt, TryStreamExt};
|
||||
use futures_util::{StreamExt, TryStreamExt, stream};
|
||||
use itertools::Itertools;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
|
||||
use crate::sstable_index_v3::SSTableIndexV3Empty;
|
||||
use crate::streamer::{Streamer, StreamerBuilder};
|
||||
@@ -311,7 +311,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("Unsupported sstable version, expected one of [2, 3], found {version}"),
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -644,8 +644,8 @@ mod tests {
|
||||
use common::OwnedBytes;
|
||||
|
||||
use super::Dictionary;
|
||||
use crate::dictionary::TermOrdHit;
|
||||
use crate::MonotonicU64SSTable;
|
||||
use crate::dictionary::TermOrdHit;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PermissionedHandle {
|
||||
@@ -914,30 +914,33 @@ mod tests {
|
||||
|
||||
// Single term
|
||||
let mut terms = Vec::new();
|
||||
assert!(dic
|
||||
.sorted_ords_to_term_cb(100_000..100_001, |term| {
|
||||
assert!(
|
||||
dic.sorted_ords_to_term_cb(100_000..100_001, |term| {
|
||||
terms.push(term.to_vec());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap());
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]);
|
||||
// Single term
|
||||
let mut terms = Vec::new();
|
||||
assert!(dic
|
||||
.sorted_ords_to_term_cb(100_001..100_002, |term| {
|
||||
assert!(
|
||||
dic.sorted_ords_to_term_cb(100_001..100_002, |term| {
|
||||
terms.push(term.to_vec());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap());
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]);
|
||||
// both terms
|
||||
let mut terms = Vec::new();
|
||||
assert!(dic
|
||||
.sorted_ords_to_term_cb(100_000..100_002, |term| {
|
||||
assert!(
|
||||
dic.sorted_ords_to_term_cb(100_000..100_002, |term| {
|
||||
terms.push(term.to_vec());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap());
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
terms,
|
||||
vec![
|
||||
@@ -947,12 +950,13 @@ mod tests {
|
||||
);
|
||||
// Test cross block
|
||||
let mut terms = Vec::new();
|
||||
assert!(dic
|
||||
.sorted_ords_to_term_cb(98653..=98655, |term| {
|
||||
assert!(
|
||||
dic.sorted_ords_to_term_cb(98653..=98655, |term| {
|
||||
terms.push(term.to_vec());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap());
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
terms,
|
||||
vec![
|
||||
|
||||
@@ -322,7 +322,7 @@ mod test {
|
||||
|
||||
use common::OwnedBytes;
|
||||
|
||||
use super::{common_prefix_len, MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable};
|
||||
use super::{MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable, common_prefix_len};
|
||||
|
||||
fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) {
|
||||
assert_eq!(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::io;
|
||||
|
||||
use super::{SingleValueMerger, ValueMerger};
|
||||
@@ -41,15 +41,18 @@ pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
||||
loop {
|
||||
let len = heap.len();
|
||||
let mut value_merger;
|
||||
if let Some(mut head) = heap.peek_mut() {
|
||||
match heap.peek_mut() {
|
||||
Some(mut head) => {
|
||||
writer.insert_key(head.0.key()).unwrap();
|
||||
value_merger = merger.new_value(head.0.value());
|
||||
if !head.0.advance()? {
|
||||
PeekMut::pop(head);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for _ in 0..len - 1 {
|
||||
if let Some(mut head) = heap.peek_mut() {
|
||||
if head.0.key() == writer.last_inserted_key() {
|
||||
|
||||
@@ -3,12 +3,12 @@ use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common::{BinarySerializable, FixedSize, OwnedBytes};
|
||||
use tantivy_bitpacker::{compute_num_bits, BitPacker};
|
||||
use tantivy_bitpacker::{BitPacker, compute_num_bits};
|
||||
use tantivy_fst::raw::Fst;
|
||||
use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer};
|
||||
|
||||
use crate::block_match_automaton::can_block_match_automaton;
|
||||
use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal};
|
||||
use crate::{SSTableDataCorruption, TermOrdinal, common_prefix_len};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SSTableIndex {
|
||||
@@ -824,8 +824,8 @@ mod tests {
|
||||
use common::OwnedBytes;
|
||||
|
||||
use super::*;
|
||||
use crate::block_match_automaton::tests::EqBuffer;
|
||||
use crate::SSTableDataCorruption;
|
||||
use crate::block_match_automaton::tests::EqBuffer;
|
||||
|
||||
#[test]
|
||||
fn test_sstable_index() {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::io;
|
||||
use std::ops::Bound;
|
||||
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
|
||||
use crate::dictionary::Dictionary;
|
||||
use crate::{DeltaReader, SSTable, TermOrdinal};
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
|
||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
||||
use crate::{vint, BlockAddr};
|
||||
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||
use crate::{BlockAddr, vint};
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct IndexValueReader {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
||||
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||
|
||||
/// See module comment.
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::io;
|
||||
|
||||
use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter};
|
||||
use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64};
|
||||
use crate::vint;
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
homepage = "https://github.com/quickwit-oss/tantivy"
|
||||
repository = "https://github.com/quickwit-oss/tantivy"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use binggan::plugins::PeakMemAllocPlugin;
|
||||
use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
|
||||
use binggan::{BenchRunner, INSTRUMENTED_SYSTEM, PeakMemAlloc, black_box};
|
||||
use rand::SeedableRng;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena};
|
||||
|
||||
@@ -13,7 +13,7 @@ mod shared_arena_hashmap;
|
||||
pub use self::arena_hashmap::ArenaHashMap;
|
||||
pub use self::expull::ExpUnrolledLinkedList;
|
||||
pub use self::memory_arena::{Addr, MemoryArena};
|
||||
pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap};
|
||||
pub use self::shared_arena_hashmap::{SharedArenaHashMap, compute_table_memory_size};
|
||||
|
||||
/// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key.
|
||||
pub type UnorderedId = u32;
|
||||
|
||||
@@ -356,7 +356,7 @@ mod tests {
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::{compute_previous_power_of_two, SharedArenaHashMap};
|
||||
use super::{SharedArenaHashMap, compute_previous_power_of_two};
|
||||
use crate::MemoryArena;
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user