From 5379c99ea2a4639b659f640d9fd2c43030929161 Mon Sep 17 00:00:00 2001 From: PSeitz Date: Fri, 18 Apr 2025 04:56:31 +0200 Subject: [PATCH] update edition to 2024 (#2620) * update common to edition 2024 * update bitpacker to edition 2024 * update stacker to edition 2024 * update query-grammar to edition 2024 * update sstable to edition 2024 + fmt * fmt * update columnar to edition 2024 * cargo fmt * use None instead of _ --- Cargo.toml | 2 +- bitpacker/Cargo.toml | 2 +- bitpacker/src/blocked_bitpacker.rs | 2 +- bitpacker/src/lib.rs | 6 +-- columnar/Cargo.toml | 2 +- columnar/benches/bench_access.rs | 2 +- columnar/benches/bench_first_vals.rs | 4 +- columnar/benches/bench_merge.rs | 2 +- columnar/benches/bench_values_u128.rs | 2 +- columnar/benches/bench_values_u64.rs | 2 +- columnar/src/block_accessor.rs | 2 +- columnar/src/column/dictionary_encoded.rs | 2 +- columnar/src/column/mod.rs | 7 ++-- columnar/src/column/serialize.rs | 4 +- columnar/src/column_index/merge/mod.rs | 4 +- columnar/src/column_index/merge/shuffled.rs | 2 +- columnar/src/column_index/merge/stacked.rs | 2 +- columnar/src/column_index/mod.rs | 2 +- .../src/column_index/multivalued_index.rs | 2 +- .../src/column_index/optional_index/mod.rs | 14 ++++--- .../optional_index/set_block/dense.rs | 2 +- .../optional_index/set_block/mod.rs | 2 +- .../src/column_index/optional_index/tests.rs | 6 +-- columnar/src/column_index/serialize.rs | 4 +- columnar/src/column_values/bench.rs | 2 +- columnar/src/column_values/mod.rs | 12 +++--- .../src/column_values/monotonic_column.rs | 4 +- .../u128_based/compact_space/mod.rs | 16 ++++---- columnar/src/column_values/u128_based/mod.rs | 8 ++-- .../src/column_values/u64_based/bitpacked.rs | 8 +--- .../u64_based/blockwise_linear.rs | 4 +- .../src/column_values/u64_based/linear.rs | 8 ++-- columnar/src/column_values/u64_based/mod.rs | 2 +- .../u64_based/stats_collector.rs | 4 +- columnar/src/columnar/column_type.rs | 2 +- columnar/src/columnar/merge/mod.rs | 37 +++++++++--------- columnar/src/columnar/merge/term_merger.rs | 21 +++++----- columnar/src/columnar/merge/tests.rs | 2 +- columnar/src/columnar/mod.rs | 4 +- columnar/src/columnar/reader/mod.rs | 4 +- .../src/columnar/writer/column_writers.rs | 13 ++++--- columnar/src/columnar/writer/mod.rs | 4 +- columnar/src/columnar/writer/serializer.rs | 4 +- columnar/src/columnar/writer/value_index.rs | 28 +++++++------- columnar/src/compat_tests.rs | 4 +- columnar/src/dynamic_column.rs | 2 +- columnar/src/lib.rs | 6 +-- columnar/src/tests.rs | 4 +- common/Cargo.toml | 2 +- common/benches/bench.rs | 4 +- common/src/bounds.rs | 16 ++++---- common/src/file_slice.rs | 4 +- common/src/lib.rs | 8 ++-- common/src/vint.rs | 2 +- query-grammar/Cargo.toml | 2 +- query-grammar/src/infallible.rs | 10 ++--- query-grammar/src/query_grammar.rs | 8 ++-- query-grammar/src/user_input_ast.rs | 22 +++++------ sstable/Cargo.toml | 2 +- sstable/benches/ord_to_term.rs | 4 +- sstable/benches/stream_bench.rs | 2 +- sstable/src/block_reader.rs | 15 +++++--- sstable/src/delta.rs | 2 +- sstable/src/dictionary.rs | 38 ++++++++++--------- sstable/src/lib.rs | 2 +- sstable/src/merge/heap_merge.rs | 19 ++++++---- sstable/src/sstable_index_v3.rs | 6 +-- sstable/src/streamer.rs | 2 +- sstable/src/value/index.rs | 4 +- sstable/src/value/range.rs | 2 +- sstable/src/value/u64_monotonic.rs | 2 +- stacker/Cargo.toml | 2 +- stacker/benches/bench.rs | 2 +- stacker/src/lib.rs | 2 +- stacker/src/shared_arena_hashmap.rs | 2 +- 75 files changed, 239 insertions(+), 231 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 43f760e28..cab07efcd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy" readme = "README.md" keywords = ["search", "information", "retrieval"] edition = "2021" -rust-version = "1.75" +rust-version = "1.85" exclude = ["benches/*.json", "benches/*.txt"] [dependencies] diff --git a/bitpacker/Cargo.toml b/bitpacker/Cargo.toml index 104f5f805..0b99e6ac4 100644 --- a/bitpacker/Cargo.toml +++ b/bitpacker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tantivy-bitpacker" version = "0.6.0" -edition = "2021" +edition = "2024" authors = ["Paul Masurel "] license = "MIT" categories = [] diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs index e82a92de7..9438bee41 100644 --- a/bitpacker/src/blocked_bitpacker.rs +++ b/bitpacker/src/blocked_bitpacker.rs @@ -1,6 +1,6 @@ use super::bitpacker::BitPacker; use super::compute_num_bits; -use crate::{minmax, BitUnpacker}; +use crate::{BitUnpacker, minmax}; const BLOCK_SIZE: usize = 128; diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs index b2eacec05..ec78bdedd 100644 --- a/bitpacker/src/lib.rs +++ b/bitpacker/src/lib.rs @@ -33,11 +33,7 @@ pub use crate::blocked_bitpacker::BlockedBitpacker; /// number of bits. pub fn compute_num_bits(n: u64) -> u8 { let amplitude = (64u32 - n.leading_zeros()) as u8; - if amplitude <= 64 - 8 { - amplitude - } else { - 64 - } + if amplitude <= 64 - 8 { amplitude } else { 64 } } /// Computes the (min, max) of an iterator of `PartialOrd` values. diff --git a/columnar/Cargo.toml b/columnar/Cargo.toml index 1048d13fe..b17606c08 100644 --- a/columnar/Cargo.toml +++ b/columnar/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tantivy-columnar" version = "0.3.0" -edition = "2021" +edition = "2024" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy" diff --git a/columnar/benches/bench_access.rs b/columnar/benches/bench_access.rs index e40d3f179..a591d3cbc 100644 --- a/columnar/benches/bench_access.rs +++ b/columnar/benches/bench_access.rs @@ -1,4 +1,4 @@ -use binggan::{black_box, InputGroup}; +use binggan::{InputGroup, black_box}; use common::*; use tantivy_columnar::Column; diff --git a/columnar/benches/bench_first_vals.rs b/columnar/benches/bench_first_vals.rs index bfc943176..cd0af0e1a 100644 --- a/columnar/benches/bench_first_vals.rs +++ b/columnar/benches/bench_first_vals.rs @@ -4,9 +4,9 @@ extern crate test; use std::sync::Arc; use rand::prelude::*; -use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType}; +use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values}; use tantivy_columnar::*; -use test::{black_box, Bencher}; +use test::{Bencher, black_box}; struct Columns { pub optional: Column, diff --git a/columnar/benches/bench_merge.rs b/columnar/benches/bench_merge.rs index ec1bca942..a4b6c3b3f 100644 --- a/columnar/benches/bench_merge.rs +++ b/columnar/benches/bench_merge.rs @@ -1,7 +1,7 @@ pub mod common; use binggan::BenchRunner; -use common::{generate_columnar_with_name, Card}; +use common::{Card, generate_columnar_with_name}; use tantivy_columnar::*; const NUM_DOCS: u32 = 100_000; diff --git a/columnar/benches/bench_values_u128.rs b/columnar/benches/bench_values_u128.rs index d2f550544..0297fbe73 100644 --- a/columnar/benches/bench_values_u128.rs +++ b/columnar/benches/bench_values_u128.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use common::OwnedBytes; use rand::rngs::StdRng; use rand::seq::SliceRandom; -use rand::{random, Rng, SeedableRng}; +use rand::{Rng, SeedableRng, random}; use tantivy_columnar::ColumnValues; use test::Bencher; extern crate test; diff --git a/columnar/benches/bench_values_u64.rs b/columnar/benches/bench_values_u64.rs index 313a85754..26de02e35 100644 --- a/columnar/benches/bench_values_u64.rs +++ b/columnar/benches/bench_values_u64.rs @@ -5,7 +5,7 @@ use std::ops::RangeInclusive; use std::sync::Arc; use rand::prelude::*; -use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType}; +use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values}; use tantivy_columnar::*; use test::Bencher; diff --git a/columnar/src/block_accessor.rs b/columnar/src/block_accessor.rs index 6f47a97eb..6bd24ba3b 100644 --- a/columnar/src/block_accessor.rs +++ b/columnar/src/block_accessor.rs @@ -66,7 +66,7 @@ impl &'a self, docs: &'a [u32], accessor: &Column, - ) -> impl Iterator + 'a { + ) -> impl Iterator + 'a + use<'a, T> { if accessor.index.get_cardinality().is_full() { docs.iter().cloned().zip(self.val_cache.iter().cloned()) } else { diff --git a/columnar/src/column/dictionary_encoded.rs b/columnar/src/column/dictionary_encoded.rs index 90cdfc1d2..816bb1538 100644 --- a/columnar/src/column/dictionary_encoded.rs +++ b/columnar/src/column/dictionary_encoded.rs @@ -4,8 +4,8 @@ use std::{fmt, io}; use sstable::{Dictionary, VoidSSTable}; -use crate::column::Column; use crate::RowId; +use crate::column::Column; /// Dictionary encoded column. /// diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index 4349c7407..ab2fca6b9 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -9,13 +9,14 @@ use std::sync::Arc; use common::BinarySerializable; pub use dictionary_encoded::{BytesColumn, StrColumn}; pub use serialize::{ - open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64, - open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64, + open_column_bytes, open_column_str, open_column_u64, open_column_u128, + open_column_u128_as_compact_u64, serialize_column_mappable_to_u64, + serialize_column_mappable_to_u128, }; use crate::column_index::{ColumnIndex, Set}; use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal; -use crate::column_values::{monotonic_map_column, ColumnValues}; +use crate::column_values::{ColumnValues, monotonic_map_column}; use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId}; #[derive(Clone)] diff --git a/columnar/src/column/serialize.rs b/columnar/src/column/serialize.rs index 73fc5e7f5..e2127933b 100644 --- a/columnar/src/column/serialize.rs +++ b/columnar/src/column/serialize.rs @@ -6,10 +6,10 @@ use common::OwnedBytes; use sstable::Dictionary; use crate::column::{BytesColumn, Column}; -use crate::column_index::{serialize_column_index, SerializableColumnIndex}; +use crate::column_index::{SerializableColumnIndex, serialize_column_index}; use crate::column_values::{ + CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128, load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values, - CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64, }; use crate::iterable::Iterable; use crate::{StrColumn, Version}; diff --git a/columnar/src/column_index/merge/mod.rs b/columnar/src/column_index/merge/mod.rs index ea09bdba1..816d45d74 100644 --- a/columnar/src/column_index/merge/mod.rs +++ b/columnar/src/column_index/merge/mod.rs @@ -99,9 +99,9 @@ mod tests { use crate::column_index::merge::detect_cardinality; use crate::column_index::multivalued_index::{ - open_multivalued_index, serialize_multivalued_index, MultiValueIndex, + MultiValueIndex, open_multivalued_index, serialize_multivalued_index, }; - use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex}; + use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index}; use crate::{ Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder, }; diff --git a/columnar/src/column_index/merge/shuffled.rs b/columnar/src/column_index/merge/shuffled.rs index b46669985..f560fa154 100644 --- a/columnar/src/column_index/merge/shuffled.rs +++ b/columnar/src/column_index/merge/shuffled.rs @@ -137,8 +137,8 @@ impl Iterable for ShuffledMultivaluedIndex<'_> { #[cfg(test)] mod tests { use super::*; - use crate::column_index::OptionalIndex; use crate::RowAddr; + use crate::column_index::OptionalIndex; #[test] fn test_integrate_num_vals_empty() { diff --git a/columnar/src/column_index/merge/stacked.rs b/columnar/src/column_index/merge/stacked.rs index 32724f5b0..8a2742d63 100644 --- a/columnar/src/column_index/merge/stacked.rs +++ b/columnar/src/column_index/merge/stacked.rs @@ -1,8 +1,8 @@ use std::ops::Range; +use crate::column_index::SerializableColumnIndex; use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex}; use crate::column_index::serialize::SerializableOptionalIndex; -use crate::column_index::SerializableColumnIndex; use crate::iterable::Iterable; use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder}; diff --git a/columnar/src/column_index/mod.rs b/columnar/src/column_index/mod.rs index a692c40ec..0003c1a3b 100644 --- a/columnar/src/column_index/mod.rs +++ b/columnar/src/column_index/mod.rs @@ -14,7 +14,7 @@ pub use merge::merge_column_index; pub(crate) use multivalued_index::SerializableMultivalueIndex; pub use optional_index::{OptionalIndex, Set}; pub use serialize::{ - open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex, + SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index, }; use crate::column_index::multivalued_index::MultiValueIndex; diff --git a/columnar/src/column_index/multivalued_index.rs b/columnar/src/column_index/multivalued_index.rs index cef5a1221..953aec245 100644 --- a/columnar/src/column_index/multivalued_index.rs +++ b/columnar/src/column_index/multivalued_index.rs @@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes}; use super::optional_index::{open_optional_index, serialize_optional_index}; use super::{OptionalIndex, SerializableOptionalIndex, Set}; use crate::column_values::{ - load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues, + CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values, }; use crate::iterable::Iterable; use crate::{DocId, RowId, Version}; diff --git a/columnar/src/column_index/optional_index/mod.rs b/columnar/src/column_index/optional_index/mod.rs index 0923c0c2e..b0e3a6793 100644 --- a/columnar/src/column_index/optional_index/mod.rs +++ b/columnar/src/column_index/optional_index/mod.rs @@ -7,7 +7,7 @@ mod set_block; use common::{BinarySerializable, OwnedBytes, VInt}; pub use set::{SelectCursor, Set, SetCodec}; use set_block::{ - DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES, + DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, }; use crate::iterable::Iterable; @@ -259,11 +259,13 @@ impl Set for OptionalIndex { impl OptionalIndex { pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex { - assert!(row_ids - .last() - .copied() - .map(|last_row_id| last_row_id < num_rows) - .unwrap_or(true)); + assert!( + row_ids + .last() + .copied() + .map(|last_row_id| last_row_id < num_rows) + .unwrap_or(true) + ); let mut buffer = Vec::new(); serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap(); let bytes = OwnedBytes::new(buffer); diff --git a/columnar/src/column_index/optional_index/set_block/dense.rs b/columnar/src/column_index/optional_index/set_block/dense.rs index 33e030d26..9089cc789 100644 --- a/columnar/src/column_index/optional_index/set_block/dense.rs +++ b/columnar/src/column_index/optional_index/set_block/dense.rs @@ -2,7 +2,7 @@ use std::io::{self, Write}; use common::BinarySerializable; -use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK}; +use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec}; #[inline(always)] fn get_bit_at(input: u64, n: u16) -> bool { diff --git a/columnar/src/column_index/optional_index/set_block/mod.rs b/columnar/src/column_index/optional_index/set_block/mod.rs index fdfc0062e..8d2586934 100644 --- a/columnar/src/column_index/optional_index/set_block/mod.rs +++ b/columnar/src/column_index/optional_index/set_block/mod.rs @@ -1,7 +1,7 @@ mod dense; mod sparse; -pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES}; +pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec}; pub use sparse::{SparseBlock, SparseBlockCodec}; #[cfg(test)] diff --git a/columnar/src/column_index/optional_index/tests.rs b/columnar/src/column_index/optional_index/tests.rs index 41d496390..205095d91 100644 --- a/columnar/src/column_index/optional_index/tests.rs +++ b/columnar/src/column_index/optional_index/tests.rs @@ -254,11 +254,7 @@ mod bench { let mut current = start; std::iter::from_fn(move || { current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation); - if current >= end { - None - } else { - Some(current) - } + if current >= end { None } else { Some(current) } }) } diff --git a/columnar/src/column_index/serialize.rs b/columnar/src/column_index/serialize.rs index 673c3459b..aa3001a2c 100644 --- a/columnar/src/column_index/serialize.rs +++ b/columnar/src/column_index/serialize.rs @@ -3,11 +3,11 @@ use std::io::Write; use common::{CountingWriter, OwnedBytes}; -use super::multivalued_index::SerializableMultivalueIndex; use super::OptionalIndex; +use super::multivalued_index::SerializableMultivalueIndex; +use crate::column_index::ColumnIndex; use crate::column_index::multivalued_index::serialize_multivalued_index; use crate::column_index::optional_index::serialize_optional_index; -use crate::column_index::ColumnIndex; use crate::iterable::Iterable; use crate::{Cardinality, RowId, Version}; diff --git a/columnar/src/column_values/bench.rs b/columnar/src/column_values/bench.rs index 416a33388..5623b5fb3 100644 --- a/columnar/src/column_values/bench.rs +++ b/columnar/src/column_values/bench.rs @@ -11,7 +11,7 @@ use crate::column_values::u64_based::*; fn get_data() -> Vec { let mut rng = StdRng::seed_from_u64(2u64); let mut data: Vec<_> = (100..55000_u64) - .map(|num| num + rng.gen::() as u64) + .map(|num| num + rng.r#gen::() as u64) .collect(); data.push(99_000); data.insert(1000, 2000); diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index ef5de5154..bc61c752e 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -26,13 +26,13 @@ mod monotonic_column; pub(crate) use merge::MergedColumnValues; pub use stats::ColumnStats; -pub use u128_based::{ - open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128, - CompactSpaceU64Accessor, -}; pub use u64_based::{ - load_u64_based_column_values, serialize_and_load_u64_based_column_values, - serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES, + ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values, + serialize_and_load_u64_based_column_values, serialize_u64_based_column_values, +}; +pub use u128_based::{ + CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped, + serialize_column_values_u128, }; pub use vec_column::VecColumn; diff --git a/columnar/src/column_values/monotonic_column.rs b/columnar/src/column_values/monotonic_column.rs index 506650be3..35de3787a 100644 --- a/columnar/src/column_values/monotonic_column.rs +++ b/columnar/src/column_values/monotonic_column.rs @@ -2,8 +2,8 @@ use std::fmt::Debug; use std::marker::PhantomData; use std::ops::{Range, RangeInclusive}; -use crate::column_values::monotonic_mapping::StrictlyMonotonicFn; use crate::ColumnValues; +use crate::column_values::monotonic_mapping::StrictlyMonotonicFn; struct MonotonicMappingColumn { from_column: C, @@ -99,10 +99,10 @@ where #[cfg(test)] mod tests { use super::*; + use crate::column_values::VecColumn; use crate::column_values::monotonic_mapping::{ StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal, }; - use crate::column_values::VecColumn; #[test] fn test_monotonic_mapping_iter() { diff --git a/columnar/src/column_values/u128_based/compact_space/mod.rs b/columnar/src/column_values/u128_based/compact_space/mod.rs index f246c7b0c..2c815bdce 100644 --- a/columnar/src/column_values/u128_based/compact_space/mod.rs +++ b/columnar/src/column_values/u128_based/compact_space/mod.rs @@ -24,8 +24,8 @@ use build_compact_space::get_compact_space; use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128}; use tantivy_bitpacker::{BitPacker, BitUnpacker}; -use crate::column_values::ColumnValues; use crate::RowId; +use crate::column_values::ColumnValues; /// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of /// blanks depends on the number of blanks. @@ -653,12 +653,14 @@ mod tests { ), &[3] ); - assert!(get_positions_for_value_range_helper( - &decomp, - 99998u128..=99998u128, - complete_range.clone() - ) - .is_empty()); + assert!( + get_positions_for_value_range_helper( + &decomp, + 99998u128..=99998u128, + complete_range.clone() + ) + .is_empty() + ); assert_eq!( &get_positions_for_value_range_helper( &decomp, diff --git a/columnar/src/column_values/u128_based/mod.rs b/columnar/src/column_values/u128_based/mod.rs index 30665630a..62e9a1f92 100644 --- a/columnar/src/column_values/u128_based/mod.rs +++ b/columnar/src/column_values/u128_based/mod.rs @@ -130,11 +130,11 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result u64 { // copied from unstable rust standard library. let d = n / q.get(); let r = n % q.get(); - if r > 0 { - d + 1 - } else { - d - } + if r > 0 { d + 1 } else { d } } // The bitpacked codec applies a linear transformation `f` over data that are bitpacked. diff --git a/columnar/src/column_values/u64_based/blockwise_linear.rs b/columnar/src/column_values/u64_based/blockwise_linear.rs index eb9191aa8..e37f9098c 100644 --- a/columnar/src/column_values/u64_based/blockwise_linear.rs +++ b/columnar/src/column_values/u64_based/blockwise_linear.rs @@ -4,12 +4,12 @@ use std::{io, iter}; use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes}; use fastdivide::DividerU64; -use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; +use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits}; +use crate::MonotonicallyMappableToU64; use crate::column_values::u64_based::line::Line; use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; use crate::column_values::{ColumnValues, VecColumn}; -use crate::MonotonicallyMappableToU64; const BLOCK_SIZE: u32 = 512u32; diff --git a/columnar/src/column_values/u64_based/linear.rs b/columnar/src/column_values/u64_based/linear.rs index ba0c9e641..fed14169c 100644 --- a/columnar/src/column_values/u64_based/linear.rs +++ b/columnar/src/column_values/u64_based/linear.rs @@ -1,13 +1,13 @@ use std::io; use common::{BinarySerializable, OwnedBytes}; -use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; +use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits}; -use super::line::Line; use super::ColumnValues; -use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; -use crate::column_values::VecColumn; +use super::line::Line; use crate::RowId; +use crate::column_values::VecColumn; +use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats}; const HALF_SPACE: u64 = u64::MAX / 2; const LINE_ESTIMATION_BLOCK_LEN: usize = 512; diff --git a/columnar/src/column_values/u64_based/mod.rs b/columnar/src/column_values/u64_based/mod.rs index 7afc71e3f..3edfe1c49 100644 --- a/columnar/src/column_values/u64_based/mod.rs +++ b/columnar/src/column_values/u64_based/mod.rs @@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec; pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec; pub use crate::column_values::u64_based::linear::LinearCodec; pub use crate::column_values::u64_based::stats_collector::StatsCollector; -use crate::column_values::{monotonic_map_column, ColumnStats}; +use crate::column_values::{ColumnStats, monotonic_map_column}; use crate::iterable::Iterable; use crate::{ColumnValues, MonotonicallyMappableToU64}; diff --git a/columnar/src/column_values/u64_based/stats_collector.rs b/columnar/src/column_values/u64_based/stats_collector.rs index dd149b102..8a55a50b2 100644 --- a/columnar/src/column_values/u64_based/stats_collector.rs +++ b/columnar/src/column_values/u64_based/stats_collector.rs @@ -2,8 +2,8 @@ use std::num::NonZeroU64; use fastdivide::DividerU64; -use crate::column_values::ColumnStats; use crate::RowId; +use crate::column_values::ColumnStats; /// Compute the gcd of two non null numbers. /// @@ -96,8 +96,8 @@ impl StatsCollector { mod tests { use std::num::NonZeroU64; - use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector}; use crate::column_values::u64_based::ColumnStats; + use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd}; fn compute_stats(vals: impl Iterator) -> ColumnStats { let mut stats_collector = StatsCollector::default(); diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index ac61a7253..0e193915e 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -4,8 +4,8 @@ use std::net::Ipv6Addr; use serde::{Deserialize, Serialize}; -use crate::value::NumericalType; use crate::InvalidData; +use crate::value::NumericalType; /// The column type represents the column type. /// Any changes need to be propagated to `COLUMN_TYPES`. diff --git a/columnar/src/columnar/merge/mod.rs b/columnar/src/columnar/merge/mod.rs index b286698df..54449f8a4 100644 --- a/columnar/src/columnar/merge/mod.rs +++ b/columnar/src/columnar/merge/mod.rs @@ -10,11 +10,11 @@ use std::sync::Arc; pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder}; use super::writer::ColumnarSerializer; -use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64}; +use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128}; use crate::column_values::MergedColumnValues; +use crate::columnar::ColumnarReader; use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column; use crate::columnar::writer::CompatibleNumericalTypes; -use crate::columnar::ColumnarReader; use crate::dynamic_column::DynamicColumn; use crate::{ BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType, @@ -144,16 +144,17 @@ fn merge_column( let mut column_values: Vec>> = Vec::with_capacity(columns_to_merge.len()); for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() { - if let Some(Column { index: idx, values }) = - dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) - { - column_indexes.push(idx); - column_values.push(Some(values)); - } else { - column_indexes.push(ColumnIndex::Empty { - num_docs: num_docs_per_column[i], - }); - column_values.push(None); + match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) { + Some(Column { index: idx, values }) => { + column_indexes.push(idx); + column_values.push(Some(values)); + } + None => { + column_indexes.push(ColumnIndex::Empty { + num_docs: num_docs_per_column[i], + }); + column_values.push(None); + } } } let merged_column_index = @@ -253,11 +254,13 @@ impl GroupedColumns { } // At the moment, only the numerical column type category has more than one possible // column type. - assert!(self - .columns - .iter() - .flatten() - .all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical)); + assert!( + self.columns + .iter() + .flatten() + .all(|el| ColumnTypeCategory::from(el.column_type()) + == ColumnTypeCategory::Numerical) + ); merged_numerical_columns_type(self.columns.iter().flatten()).into() } } diff --git a/columnar/src/columnar/merge/term_merger.rs b/columnar/src/columnar/merge/term_merger.rs index 7b63790fb..710af485a 100644 --- a/columnar/src/columnar/merge/term_merger.rs +++ b/columnar/src/columnar/merge/term_merger.rs @@ -74,18 +74,19 @@ impl<'a> TermMerger<'a> { /// False if there is none. pub fn advance(&mut self) -> bool { self.advance_segments(); - if let Some(head) = self.heap.pop() { - self.term_streams_with_segment.push(head); - while let Some(next_streamer) = self.heap.peek() { - if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() { - break; + match self.heap.pop() { + Some(head) => { + self.term_streams_with_segment.push(head); + while let Some(next_streamer) = self.heap.peek() { + if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() { + break; + } + let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand + self.term_streams_with_segment.push(next_heap_it); } - let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand - self.term_streams_with_segment.push(next_heap_it); + true } - true - } else { - false + _ => false, } } diff --git a/columnar/src/columnar/merge/tests.rs b/columnar/src/columnar/merge/tests.rs index 206d5da5f..4487c527c 100644 --- a/columnar/src/columnar/merge/tests.rs +++ b/columnar/src/columnar/merge/tests.rs @@ -3,7 +3,7 @@ use proptest::collection::vec; use proptest::prelude::*; use super::*; -use crate::columnar::{merge_columnar, ColumnarReader, MergeRowOrder, StackMergeOrder}; +use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar}; use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId}; fn make_columnar + HasAssociatedColumnType + Copy>( diff --git a/columnar/src/columnar/mod.rs b/columnar/src/columnar/mod.rs index bb9e485be..5b57e0298 100644 --- a/columnar/src/columnar/mod.rs +++ b/columnar/src/columnar/mod.rs @@ -5,9 +5,9 @@ mod reader; mod writer; pub use column_type::{ColumnType, HasAssociatedColumnType}; -pub use format_version::{Version, CURRENT_VERSION}; +pub use format_version::{CURRENT_VERSION, Version}; #[cfg(test)] pub(crate) use merge::ColumnTypeCategory; -pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder}; +pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar}; pub use reader::ColumnarReader; pub use writer::ColumnarWriter; diff --git a/columnar/src/columnar/reader/mod.rs b/columnar/src/columnar/reader/mod.rs index cb659bb5d..dfdaf3a82 100644 --- a/columnar/src/columnar/reader/mod.rs +++ b/columnar/src/columnar/reader/mod.rs @@ -1,11 +1,11 @@ use std::{fmt, io, mem}; +use common::BinarySerializable; use common::file_slice::FileSlice; use common::json_path_writer::JSON_PATH_SEGMENT_SEP; -use common::BinarySerializable; use sstable::{Dictionary, RangeSSTable}; -use crate::columnar::{format_version, ColumnType}; +use crate::columnar::{ColumnType, format_version}; use crate::dynamic_column::DynamicColumnHandle; use crate::{RowId, Version}; diff --git a/columnar/src/columnar/writer/column_writers.rs b/columnar/src/columnar/writer/column_writers.rs index e26e1ee2d..525fc52a3 100644 --- a/columnar/src/columnar/writer/column_writers.rs +++ b/columnar/src/columnar/writer/column_writers.rs @@ -42,7 +42,7 @@ impl ColumnWriter { &self, arena: &MemoryArena, buffer: &'a mut Vec, - ) -> impl Iterator> + 'a { + ) -> impl Iterator> + 'a + use<'a, V> { buffer.clear(); self.values.read_to_end(arena, buffer); let mut cursor: &[u8] = &buffer[..]; @@ -104,9 +104,10 @@ pub(crate) struct NumericalColumnWriter { impl NumericalColumnWriter { pub fn force_numerical_type(&mut self, numerical_type: NumericalType) { - assert!(self - .compatible_numerical_types - .is_type_accepted(numerical_type)); + assert!( + self.compatible_numerical_types + .is_type_accepted(numerical_type) + ); self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type); } } @@ -211,7 +212,7 @@ impl NumericalColumnWriter { self, arena: &MemoryArena, buffer: &'a mut Vec, - ) -> impl Iterator> + 'a { + ) -> impl Iterator> + 'a + use<'a> { self.column_writer.operation_iterator(arena, buffer) } } @@ -255,7 +256,7 @@ impl StrOrBytesColumnWriter { &self, arena: &MemoryArena, byte_buffer: &'a mut Vec, - ) -> impl Iterator> + 'a { + ) -> impl Iterator> + 'a + use<'a> { self.column_writer.operation_iterator(arena, byte_buffer) } } diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 7fb899345..cbd334003 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -8,13 +8,13 @@ use std::net::Ipv6Addr; use column_operation::ColumnOperation; pub(crate) use column_writers::CompatibleNumericalTypes; -use common::json_path_writer::JSON_END_OF_PATH; use common::CountingWriter; +use common::json_path_writer::JSON_END_OF_PATH; pub(crate) use serializer::ColumnarSerializer; use stacker::{Addr, ArenaHashMap, MemoryArena}; use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex}; -use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64}; +use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128}; use crate::columnar::column_type::ColumnType; use crate::columnar::writer::column_writers::{ ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter, diff --git a/columnar/src/columnar/writer/serializer.rs b/columnar/src/columnar/writer/serializer.rs index 818acfe69..03b7601e8 100644 --- a/columnar/src/columnar/writer/serializer.rs +++ b/columnar/src/columnar/writer/serializer.rs @@ -3,11 +3,11 @@ use std::io::Write; use common::json_path_writer::JSON_END_OF_PATH; use common::{BinarySerializable, CountingWriter}; -use sstable::value::RangeValueWriter; use sstable::RangeSSTable; +use sstable::value::RangeValueWriter; -use crate::columnar::ColumnType; use crate::RowId; +use crate::columnar::ColumnType; pub struct ColumnarSerializer { wrt: CountingWriter, diff --git a/columnar/src/columnar/writer/value_index.rs b/columnar/src/columnar/writer/value_index.rs index a35432e3a..d289346b8 100644 --- a/columnar/src/columnar/writer/value_index.rs +++ b/columnar/src/columnar/writer/value_index.rs @@ -1,6 +1,6 @@ +use crate::RowId; use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex}; use crate::iterable::Iterable; -use crate::RowId; /// The `IndexBuilder` interprets a sequence of /// calls of the form: @@ -31,12 +31,13 @@ pub struct OptionalIndexBuilder { impl OptionalIndexBuilder { pub fn finish(&mut self, num_rows: RowId) -> impl Iterable + '_ { - debug_assert!(self - .docs - .last() - .copied() - .map(|last_doc| last_doc < num_rows) - .unwrap_or(true)); + debug_assert!( + self.docs + .last() + .copied() + .map(|last_doc| last_doc < num_rows) + .unwrap_or(true) + ); &self.docs[..] } @@ -48,12 +49,13 @@ impl OptionalIndexBuilder { impl IndexBuilder for OptionalIndexBuilder { #[inline(always)] fn record_row(&mut self, doc: RowId) { - debug_assert!(self - .docs - .last() - .copied() - .map(|prev_doc| doc > prev_doc) - .unwrap_or(true)); + debug_assert!( + self.docs + .last() + .copied() + .map(|prev_doc| doc > prev_doc) + .unwrap_or(true) + ); self.docs.push(doc); } } diff --git a/columnar/src/compat_tests.rs b/columnar/src/compat_tests.rs index 8a504ab26..e791f5a40 100644 --- a/columnar/src/compat_tests.rs +++ b/columnar/src/compat_tests.rs @@ -3,8 +3,8 @@ use std::path::PathBuf; use itertools::Itertools; use crate::{ - merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder, - CURRENT_VERSION, + CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder, + merge_columnar, }; const NUM_DOCS: u32 = u16::MAX as u32; diff --git a/columnar/src/dynamic_column.rs b/columnar/src/dynamic_column.rs index 2b9d69770..b98bbd2fb 100644 --- a/columnar/src/dynamic_column.rs +++ b/columnar/src/dynamic_column.rs @@ -6,7 +6,7 @@ use common::file_slice::FileSlice; use common::{ByteCount, DateTime, HasLen, OwnedBytes}; use crate::column::{BytesColumn, Column, StrColumn}; -use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn}; +use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column}; use crate::columnar::ColumnType; use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version}; diff --git a/columnar/src/lib.rs b/columnar/src/lib.rs index 2b7a60b3a..30e09e8fa 100644 --- a/columnar/src/lib.rs +++ b/columnar/src/lib.rs @@ -44,11 +44,11 @@ pub use block_accessor::ColumnBlockAccessor; pub use column::{BytesColumn, Column, StrColumn}; pub use column_index::ColumnIndex; pub use column_values::{ - ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64, + ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128, }; pub use columnar::{ - merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType, - MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION, + CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType, + MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar, }; use sstable::VoidSSTable; pub use value::{NumericalType, NumericalValue}; diff --git a/columnar/src/tests.rs b/columnar/src/tests.rs index b7ce7f27f..5fa537466 100644 --- a/columnar/src/tests.rs +++ b/columnar/src/tests.rs @@ -716,8 +716,8 @@ fn test_columnar_merging_number_columns() { // TODO document edge case: required_columns incompatible with values. #[allow(clippy::type_complexity)] -fn columnar_docs_and_remap( -) -> impl Strategy>>, Vec)> { +fn columnar_docs_and_remap() +-> impl Strategy>>, Vec)> { proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map( |columnars_docs: Vec>>| { let row_addrs: Vec = columnars_docs diff --git a/common/Cargo.toml b/common/Cargo.toml index 63394e7a2..7d1c1d65c 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -3,7 +3,7 @@ name = "tantivy-common" version = "0.7.0" authors = ["Paul Masurel ", "Pascal Seitz "] license = "MIT" -edition = "2021" +edition = "2024" description = "common traits and utility functions used by multiple tantivy subcrates" documentation = "https://docs.rs/tantivy_common/" homepage = "https://github.com/quickwit-oss/tantivy" diff --git a/common/benches/bench.rs b/common/benches/bench.rs index 10f176428..81260e116 100644 --- a/common/benches/bench.rs +++ b/common/benches/bench.rs @@ -1,7 +1,7 @@ -use binggan::{black_box, BenchRunner}; +use binggan::{BenchRunner, black_box}; use rand::seq::IteratorRandom; use rand::thread_rng; -use tantivy_common::{serialize_vint_u32, BitSet, TinySet}; +use tantivy_common::{BitSet, TinySet, serialize_vint_u32}; fn bench_vint() { let mut runner = BenchRunner::new(); diff --git a/common/src/bounds.rs b/common/src/bounds.rs index 712c77852..af477ff49 100644 --- a/common/src/bounds.rs +++ b/common/src/bounds.rs @@ -65,11 +65,11 @@ pub fn transform_bound_inner_res( ) -> io::Result> { use self::Bound::*; Ok(match bound { - Excluded(ref from_val) => match transform(from_val)? { + Excluded(from_val) => match transform(from_val)? { TransformBound::NewBound(new_val) => new_val, TransformBound::Existing(new_val) => Excluded(new_val), }, - Included(ref from_val) => match transform(from_val)? { + Included(from_val) => match transform(from_val)? { TransformBound::NewBound(new_val) => new_val, TransformBound::Existing(new_val) => Included(new_val), }, @@ -85,11 +85,11 @@ pub fn transform_bound_inner( ) -> Bound { use self::Bound::*; match bound { - Excluded(ref from_val) => match transform(from_val) { + Excluded(from_val) => match transform(from_val) { TransformBound::NewBound(new_val) => new_val, TransformBound::Existing(new_val) => Excluded(new_val), }, - Included(ref from_val) => match transform(from_val) { + Included(from_val) => match transform(from_val) { TransformBound::NewBound(new_val) => new_val, TransformBound::Existing(new_val) => Included(new_val), }, @@ -111,8 +111,8 @@ pub fn map_bound( ) -> Bound { use self::Bound::*; match bound { - Excluded(ref from_val) => Bound::Excluded(transform(from_val)), - Included(ref from_val) => Bound::Included(transform(from_val)), + Excluded(from_val) => Bound::Excluded(transform(from_val)), + Included(from_val) => Bound::Included(transform(from_val)), Unbounded => Unbounded, } } @@ -123,8 +123,8 @@ pub fn map_bound_res( ) -> Result, Err> { use self::Bound::*; Ok(match bound { - Excluded(ref from_val) => Excluded(transform(from_val)?), - Included(ref from_val) => Included(transform(from_val)?), + Excluded(from_val) => Excluded(transform(from_val)?), + Included(from_val) => Included(transform(from_val)?), Unbounded => Unbounded, }) } diff --git a/common/src/file_slice.rs b/common/src/file_slice.rs index 0d5d1adcd..dbaec16ac 100644 --- a/common/src/file_slice.rs +++ b/common/src/file_slice.rs @@ -74,7 +74,7 @@ impl FileHandle for WrapFile { { use std::io::{Read, Seek}; let mut file = self.file.try_clone()?; // Clone the file to read from it separately - // Seek to the start position in the file + // Seek to the start position in the file file.seek(io::SeekFrom::Start(start as u64))?; // Read the data into the buffer file.read_exact(&mut buffer)?; @@ -346,8 +346,8 @@ mod tests { use std::sync::Arc; use super::{FileHandle, FileSlice}; - use crate::file_slice::combine_ranges; use crate::HasLen; + use crate::file_slice::combine_ranges; #[test] fn test_file_slice() -> io::Result<()> { diff --git a/common/src/lib.rs b/common/src/lib.rs index cbe13c951..4e64af11c 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -22,7 +22,7 @@ pub use json_path_writer::JsonPathWriter; pub use ownedbytes::{OwnedBytes, StableDeref}; pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize}; pub use vint::{ - read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128, + VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, }; pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite}; @@ -177,8 +177,10 @@ pub(crate) mod test { #[test] fn test_f64_order() { - assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) - .contains(&f64_to_u64(f64::NAN))); // nan is not a number + assert!( + !(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)) + .contains(&f64_to_u64(f64::NAN)) + ); // nan is not a number assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa diff --git a/common/src/vint.rs b/common/src/vint.rs index b09e73b92..393f89ef9 100644 --- a/common/src/vint.rs +++ b/common/src/vint.rs @@ -222,7 +222,7 @@ impl BinarySerializable for VInt { #[cfg(test)] mod tests { - use super::{serialize_vint_u32, BinarySerializable, VInt}; + use super::{BinarySerializable, VInt, serialize_vint_u32}; fn aux_test_vint(val: u64) { let mut v = [14u8; 10]; diff --git a/query-grammar/Cargo.toml b/query-grammar/Cargo.toml index 19ed26059..7d71b2603 100644 --- a/query-grammar/Cargo.toml +++ b/query-grammar/Cargo.toml @@ -9,7 +9,7 @@ homepage = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy" readme = "README.md" keywords = ["search", "information", "retrieval"] -edition = "2021" +edition = "2024" [dependencies] nom = "7" diff --git a/query-grammar/src/infallible.rs b/query-grammar/src/infallible.rs index 414054045..9fd5bff69 100644 --- a/query-grammar/src/infallible.rs +++ b/query-grammar/src/infallible.rs @@ -186,19 +186,19 @@ macro_rules! tuple_trait_impl( ); macro_rules! tuple_trait_inner( - ($it:tt, $self:expr, $input:expr, (), $error_list:expr, $head:ident $($id:ident)+) => ({ + ($it:tt, $self:expr_2021, $input:expr_2021, (), $error_list:expr_2021, $head:ident $($id:ident)+) => ({ let (i, (o, mut err)) = $self.$it.parse($input.clone())?; $error_list.append(&mut err); succ!($it, tuple_trait_inner!($self, i, ( o ), $error_list, $($id)+)) }); - ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident $($id:ident)+) => ({ + ($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident $($id:ident)+) => ({ let (i, (o, mut err)) = $self.$it.parse($input.clone())?; $error_list.append(&mut err); succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $error_list, $($id)+)) }); - ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $error_list:expr, $head:ident) => ({ + ($it:tt, $self:expr_2021, $input:expr_2021, ($($parsed:tt)*), $error_list:expr_2021, $head:ident) => ({ let (i, (o, mut err)) = $self.$it.parse($input.clone())?; $error_list.append(&mut err); @@ -328,13 +328,13 @@ macro_rules! alt_trait_impl( ); macro_rules! alt_trait_inner( - ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => ( + ($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident, $($id_cond:ident $id:ident),+) => ( match $self.$it.0.parse($input.clone()) { Err(_) => succ!($it, alt_trait_inner!($self, $input, $($id_cond $id),+)), Ok((input_left, _)) => Some($self.$it.1.parse(input_left)), } ); - ($it:tt, $self:expr, $input:expr, $head_cond:ident $head:ident) => ( + ($it:tt, $self:expr_2021, $input:expr_2021, $head_cond:ident $head:ident) => ( None ); ); diff --git a/query-grammar/src/query_grammar.rs b/query-grammar/src/query_grammar.rs index 4bc992bae..0030c776c 100644 --- a/query-grammar/src/query_grammar.rs +++ b/query-grammar/src/query_grammar.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::iter::once; +use nom::IResult; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{ @@ -10,12 +11,11 @@ use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify}; use nom::error::{Error, ErrorKind}; use nom::multi::{many0, many1, separated_list0}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; -use nom::IResult; use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral}; +use crate::Occur; use crate::infallible::*; use crate::user_input_ast::Delimiter; -use crate::Occur; // Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to // special characters. @@ -1030,7 +1030,7 @@ fn rewrite_ast(mut input: UserInputAst) -> UserInputAst { fn rewrite_ast_clause(input: &mut (Option, UserInputAst)) { match input { - (None, UserInputAst::Clause(ref mut clauses)) if clauses.len() == 1 => { + (None, UserInputAst::Clause(clauses)) if clauses.len() == 1 => { *input = clauses.pop().unwrap(); // safe because clauses.len() == 1 } _ => {} @@ -1376,7 +1376,7 @@ mod test { #[test] fn test_range_parser_lenient() { - let literal = |query| literal_infallible(query).unwrap().1 .0.unwrap(); + let literal = |query| literal_infallible(query).unwrap().1.0.unwrap(); // same tests as non-lenient let res = literal("title: { + UserInputLeaf::Literal(literal) if literal.field_name.is_none() => { literal.field_name = Some(default_field) } UserInputLeaf::All => { @@ -59,12 +59,8 @@ impl UserInputLeaf { field: default_field, } } - UserInputLeaf::Range { ref mut field, .. } if field.is_none() => { - *field = Some(default_field) - } - UserInputLeaf::Set { ref mut field, .. } if field.is_none() => { - *field = Some(default_field) - } + UserInputLeaf::Range { field, .. } if field.is_none() => *field = Some(default_field), + UserInputLeaf::Set { field, .. } if field.is_none() => *field = Some(default_field), _ => (), // field was already set, do nothing } } @@ -75,11 +71,11 @@ impl Debug for UserInputLeaf { match self { UserInputLeaf::Literal(literal) => literal.fmt(formatter), UserInputLeaf::Range { - ref field, - ref lower, - ref upper, + field, + lower, + upper, } => { - if let Some(ref field) = field { + if let Some(field) = field { // TODO properly escape field (in case of \") write!(formatter, "\"{field}\":")?; } @@ -89,7 +85,7 @@ impl Debug for UserInputLeaf { Ok(()) } UserInputLeaf::Set { field, elements } => { - if let Some(ref field) = field { + if let Some(field) = field { // TODO properly escape field (in case of \") write!(formatter, "\"{field}\": ")?; } @@ -267,7 +263,7 @@ impl UserInputAst { .iter_mut() .for_each(|(_, ast)| ast.set_default_field(field.clone())), UserInputAst::Leaf(leaf) => leaf.set_default_field(field), - UserInputAst::Boost(ref mut ast, _) => ast.set_default_field(field), + UserInputAst::Boost(ast, _) => ast.set_default_field(field), } } } diff --git a/sstable/Cargo.toml b/sstable/Cargo.toml index 0ec2eae8f..2b2928bb2 100644 --- a/sstable/Cargo.toml +++ b/sstable/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tantivy-sstable" version = "0.3.0" -edition = "2021" +edition = "2024" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy" diff --git a/sstable/benches/ord_to_term.rs b/sstable/benches/ord_to_term.rs index db1823dd3..68122756c 100644 --- a/sstable/benches/ord_to_term.rs +++ b/sstable/benches/ord_to_term.rs @@ -1,8 +1,8 @@ use std::sync::Arc; -use common::file_slice::FileSlice; use common::OwnedBytes; -use criterion::{criterion_group, criterion_main, Criterion}; +use common::file_slice::FileSlice; +use criterion::{Criterion, criterion_group, criterion_main}; use tantivy_sstable::{Dictionary, MonotonicU64SSTable}; fn make_test_sstable(suffix: &str) -> FileSlice { diff --git a/sstable/benches/stream_bench.rs b/sstable/benches/stream_bench.rs index 23c8a2200..cffe41e26 100644 --- a/sstable/benches/stream_bench.rs +++ b/sstable/benches/stream_bench.rs @@ -2,7 +2,7 @@ use std::collections::BTreeSet; use std::io; use common::file_slice::FileSlice; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use tantivy_sstable::{Dictionary, MonotonicU64SSTable}; diff --git a/sstable/src/block_reader.rs b/sstable/src/block_reader.rs index 3299c3b4c..8a86e83f7 100644 --- a/sstable/src/block_reader.rs +++ b/sstable/src/block_reader.rs @@ -51,18 +51,21 @@ impl BlockReader { let block_len = match self.reader.len() { 0 => { // we are out of data for this block. Check if we have another block after - if let Some(new_reader) = self.next_readers.next() { - self.reader = new_reader; - continue; - } else { - return Ok(false); + match self.next_readers.next() { + Some(new_reader) => { + self.reader = new_reader; + continue; + } + _ => { + return Ok(false); + } } } 1..=3 => { return Err(io::Error::new( io::ErrorKind::UnexpectedEof, "failed to read block_len", - )) + )); } _ => self.reader.read_u32() as usize, }; diff --git a/sstable/src/delta.rs b/sstable/src/delta.rs index fba996ac0..9542c7905 100644 --- a/sstable/src/delta.rs +++ b/sstable/src/delta.rs @@ -5,7 +5,7 @@ use common::{CountingWriter, OwnedBytes}; use zstd::bulk::Compressor; use super::value::ValueWriter; -use super::{value, vint, BlockReader}; +use super::{BlockReader, value, vint}; const FOUR_BIT_LIMITS: usize = 1 << 4; const VINT_MODE: u8 = 1u8; diff --git a/sstable/src/dictionary.rs b/sstable/src/dictionary.rs index c58a0a40e..caa7a6bb8 100644 --- a/sstable/src/dictionary.rs +++ b/sstable/src/dictionary.rs @@ -6,13 +6,13 @@ use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; use std::sync::Arc; -use common::bounds::{transform_bound_inner_res, TransformBound}; +use common::bounds::{TransformBound, transform_bound_inner_res}; use common::file_slice::FileSlice; use common::{BinarySerializable, OwnedBytes}; -use futures_util::{stream, StreamExt, TryStreamExt}; +use futures_util::{StreamExt, TryStreamExt, stream}; use itertools::Itertools; -use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::Automaton; +use tantivy_fst::automaton::AlwaysMatch; use crate::sstable_index_v3::SSTableIndexV3Empty; use crate::streamer::{Streamer, StreamerBuilder}; @@ -311,7 +311,7 @@ impl Dictionary { return Err(io::Error::new( io::ErrorKind::Other, format!("Unsupported sstable version, expected one of [2, 3], found {version}"), - )) + )); } }; @@ -644,8 +644,8 @@ mod tests { use common::OwnedBytes; use super::Dictionary; - use crate::dictionary::TermOrdHit; use crate::MonotonicU64SSTable; + use crate::dictionary::TermOrdHit; #[derive(Debug)] struct PermissionedHandle { @@ -914,30 +914,33 @@ mod tests { // Single term let mut terms = Vec::new(); - assert!(dic - .sorted_ords_to_term_cb(100_000..100_001, |term| { + assert!( + dic.sorted_ords_to_term_cb(100_000..100_001, |term| { terms.push(term.to_vec()); Ok(()) }) - .unwrap()); + .unwrap() + ); assert_eq!(terms, vec![format!("{:05X}", 100_000).into_bytes(),]); // Single term let mut terms = Vec::new(); - assert!(dic - .sorted_ords_to_term_cb(100_001..100_002, |term| { + assert!( + dic.sorted_ords_to_term_cb(100_001..100_002, |term| { terms.push(term.to_vec()); Ok(()) }) - .unwrap()); + .unwrap() + ); assert_eq!(terms, vec![format!("{:05X}", 100_001).into_bytes(),]); // both terms let mut terms = Vec::new(); - assert!(dic - .sorted_ords_to_term_cb(100_000..100_002, |term| { + assert!( + dic.sorted_ords_to_term_cb(100_000..100_002, |term| { terms.push(term.to_vec()); Ok(()) }) - .unwrap()); + .unwrap() + ); assert_eq!( terms, vec![ @@ -947,12 +950,13 @@ mod tests { ); // Test cross block let mut terms = Vec::new(); - assert!(dic - .sorted_ords_to_term_cb(98653..=98655, |term| { + assert!( + dic.sorted_ords_to_term_cb(98653..=98655, |term| { terms.push(term.to_vec()); Ok(()) }) - .unwrap()); + .unwrap() + ); assert_eq!( terms, vec![ diff --git a/sstable/src/lib.rs b/sstable/src/lib.rs index f872d2e19..ea3965899 100644 --- a/sstable/src/lib.rs +++ b/sstable/src/lib.rs @@ -322,7 +322,7 @@ mod test { use common::OwnedBytes; - use super::{common_prefix_len, MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable}; + use super::{MonotonicU64SSTable, SSTable, VoidMerge, VoidSSTable, common_prefix_len}; fn aux_test_common_prefix_len(left: &str, right: &str, expect_len: usize) { assert_eq!( diff --git a/sstable/src/merge/heap_merge.rs b/sstable/src/merge/heap_merge.rs index 0123c4fe8..c5ab07e35 100644 --- a/sstable/src/merge/heap_merge.rs +++ b/sstable/src/merge/heap_merge.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; -use std::collections::binary_heap::PeekMut; use std::collections::BinaryHeap; +use std::collections::binary_heap::PeekMut; use std::io; use super::{SingleValueMerger, ValueMerger}; @@ -41,14 +41,17 @@ pub fn merge_sstable>( loop { let len = heap.len(); let mut value_merger; - if let Some(mut head) = heap.peek_mut() { - writer.insert_key(head.0.key()).unwrap(); - value_merger = merger.new_value(head.0.value()); - if !head.0.advance()? { - PeekMut::pop(head); + match heap.peek_mut() { + Some(mut head) => { + writer.insert_key(head.0.key()).unwrap(); + value_merger = merger.new_value(head.0.value()); + if !head.0.advance()? { + PeekMut::pop(head); + } + } + _ => { + break; } - } else { - break; } for _ in 0..len - 1 { if let Some(mut head) = heap.peek_mut() { diff --git a/sstable/src/sstable_index_v3.rs b/sstable/src/sstable_index_v3.rs index 1c99c36e5..c2ab1fa07 100644 --- a/sstable/src/sstable_index_v3.rs +++ b/sstable/src/sstable_index_v3.rs @@ -3,12 +3,12 @@ use std::ops::Range; use std::sync::Arc; use common::{BinarySerializable, FixedSize, OwnedBytes}; -use tantivy_bitpacker::{compute_num_bits, BitPacker}; +use tantivy_bitpacker::{BitPacker, compute_num_bits}; use tantivy_fst::raw::Fst; use tantivy_fst::{Automaton, IntoStreamer, Map, MapBuilder, Streamer}; use crate::block_match_automaton::can_block_match_automaton; -use crate::{common_prefix_len, SSTableDataCorruption, TermOrdinal}; +use crate::{SSTableDataCorruption, TermOrdinal, common_prefix_len}; #[derive(Debug, Clone)] pub enum SSTableIndex { @@ -824,8 +824,8 @@ mod tests { use common::OwnedBytes; use super::*; - use crate::block_match_automaton::tests::EqBuffer; use crate::SSTableDataCorruption; + use crate::block_match_automaton::tests::EqBuffer; #[test] fn test_sstable_index() { diff --git a/sstable/src/streamer.rs b/sstable/src/streamer.rs index f0f052c33..eb0c44e29 100644 --- a/sstable/src/streamer.rs +++ b/sstable/src/streamer.rs @@ -1,8 +1,8 @@ use std::io; use std::ops::Bound; -use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::Automaton; +use tantivy_fst::automaton::AlwaysMatch; use crate::dictionary::Dictionary; use crate::{DeltaReader, SSTable, TermOrdinal}; diff --git a/sstable/src/value/index.rs b/sstable/src/value/index.rs index 50faf36c4..d8470c12c 100644 --- a/sstable/src/value/index.rs +++ b/sstable/src/value/index.rs @@ -1,7 +1,7 @@ use std::io; -use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; -use crate::{vint, BlockAddr}; +use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64}; +use crate::{BlockAddr, vint}; #[derive(Default)] pub(crate) struct IndexValueReader { diff --git a/sstable/src/value/range.rs b/sstable/src/value/range.rs index 2fff531c1..d625a67d0 100644 --- a/sstable/src/value/range.rs +++ b/sstable/src/value/range.rs @@ -1,7 +1,7 @@ use std::io; use std::ops::Range; -use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; +use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64}; /// See module comment. #[derive(Default)] diff --git a/sstable/src/value/u64_monotonic.rs b/sstable/src/value/u64_monotonic.rs index 7c660fb3b..5e5a92560 100644 --- a/sstable/src/value/u64_monotonic.rs +++ b/sstable/src/value/u64_monotonic.rs @@ -1,6 +1,6 @@ use std::io; -use crate::value::{deserialize_vint_u64, ValueReader, ValueWriter}; +use crate::value::{ValueReader, ValueWriter, deserialize_vint_u64}; use crate::vint; #[derive(Default)] diff --git a/stacker/Cargo.toml b/stacker/Cargo.toml index b1cde232f..b29ed8fac 100644 --- a/stacker/Cargo.toml +++ b/stacker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tantivy-stacker" version = "0.3.0" -edition = "2021" +edition = "2024" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" repository = "https://github.com/quickwit-oss/tantivy" diff --git a/stacker/benches/bench.rs b/stacker/benches/bench.rs index 185451bbd..ed5ea5eeb 100644 --- a/stacker/benches/bench.rs +++ b/stacker/benches/bench.rs @@ -1,5 +1,5 @@ use binggan::plugins::PeakMemAllocPlugin; -use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM}; +use binggan::{BenchRunner, INSTRUMENTED_SYSTEM, PeakMemAlloc, black_box}; use rand::SeedableRng; use rustc_hash::FxHashMap; use tantivy_stacker::{ArenaHashMap, ExpUnrolledLinkedList, MemoryArena}; diff --git a/stacker/src/lib.rs b/stacker/src/lib.rs index 9e29d2d5b..7eda3cff9 100644 --- a/stacker/src/lib.rs +++ b/stacker/src/lib.rs @@ -13,7 +13,7 @@ mod shared_arena_hashmap; pub use self::arena_hashmap::ArenaHashMap; pub use self::expull::ExpUnrolledLinkedList; pub use self::memory_arena::{Addr, MemoryArena}; -pub use self::shared_arena_hashmap::{compute_table_memory_size, SharedArenaHashMap}; +pub use self::shared_arena_hashmap::{SharedArenaHashMap, compute_table_memory_size}; /// When adding an element in a `ArenaHashMap`, we get a unique id associated to the given key. pub type UnorderedId = u32; diff --git a/stacker/src/shared_arena_hashmap.rs b/stacker/src/shared_arena_hashmap.rs index f558b0f8a..bdd07f7d3 100644 --- a/stacker/src/shared_arena_hashmap.rs +++ b/stacker/src/shared_arena_hashmap.rs @@ -356,7 +356,7 @@ mod tests { use std::collections::HashMap; - use super::{compute_previous_power_of_two, SharedArenaHashMap}; + use super::{SharedArenaHashMap, compute_previous_power_of_two}; use crate::MemoryArena; #[test]