diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs index 11ea37566..d270647a9 100644 --- a/bitpacker/src/bitpacker.rs +++ b/bitpacker/src/bitpacker.rs @@ -368,9 +368,9 @@ mod test { for start_idx in 0u32..32u32 { output.resize(len, 0); bitunpacker.get_batch_u32s(start_idx, &buffer, &mut output); - for i in 0..len { + for (i, output_byte) in output.iter().enumerate() { let expected = (start_idx + i as u32) & mask; - assert_eq!(output[i], expected); + assert_eq!(*output_byte, expected); } } } diff --git a/columnar/src/column_index/optional_index/tests.rs b/columnar/src/column_index/optional_index/tests.rs index e24b42194..2bcb77fd6 100644 --- a/columnar/src/column_index/optional_index/tests.rs +++ b/columnar/src/column_index/optional_index/tests.rs @@ -110,8 +110,8 @@ fn test_null_index(data: &[bool]) { .map(|(pos, _val)| pos as u32) .collect(); let mut select_iter = null_index.select_cursor(); - for i in 0..orig_idx_with_value.len() { - assert_eq!(select_iter.select(i as u32), orig_idx_with_value[i]); + for (i, expected) in orig_idx_with_value.iter().enumerate() { + assert_eq!(select_iter.select(i as u32), *expected); } let step_size = (orig_idx_with_value.len() / 100).max(1); diff --git a/columnar/src/column_values/u64_based/line.rs b/columnar/src/column_values/u64_based/line.rs index f3d5504fd..ecc2af22b 100644 --- a/columnar/src/column_values/u64_based/line.rs +++ b/columnar/src/column_values/u64_based/line.rs @@ -125,9 +125,8 @@ impl Line { /// Returns a line that attemps to approximate a function /// f: i in 0..[ys.num_vals()) -> ys[i]. /// - /// - The approximation is always lower than the actual value. - /// Or more rigorously, formally `f(i).wrapping_sub(ys[i])` is small - /// for any i in [0..ys.len()). + /// - The approximation is always lower than the actual value. Or more rigorously, formally + /// `f(i).wrapping_sub(ys[i])` is small for any i in [0..ys.len()). /// - It computes without panicking for any value of it. /// /// This function is only invariable by translation if all of the diff --git a/columnar/src/columnar/merge/mod.rs b/columnar/src/columnar/merge/mod.rs index d970d6861..c7721391d 100644 --- a/columnar/src/columnar/merge/mod.rs +++ b/columnar/src/columnar/merge/mod.rs @@ -64,10 +64,9 @@ impl From for ColumnTypeCategory { /// resulting columnar. When a required column is a numerical column type, one of two things can /// happen: /// - If the required column type is compatible with all of the input columnar, the resulsting -/// merged -/// columnar will simply coerce the input column and use the required column type. -/// - If the required column type is incompatible with one of the input columnar, the merged -/// will fail with an InvalidData error. +/// merged columnar will simply coerce the input column and use the required column type. +/// - If the required column type is incompatible with one of the input columnar, the merged will +/// fail with an InvalidData error. /// /// `merge_row_order` makes it possible to remove or reorder row in the resulting /// `Columnar` table. diff --git a/columnar/src/columnar/merge/term_merger.rs b/columnar/src/columnar/merge/term_merger.rs index 75cba350a..77b40964e 100644 --- a/columnar/src/columnar/merge/term_merger.rs +++ b/columnar/src/columnar/merge/term_merger.rs @@ -35,8 +35,7 @@ impl<'a> Ord for HeapItem<'a> { /// /// The item yield is actually a pair with /// - the term -/// - a slice with the ordinal of the segments containing -/// the terms. +/// - a slice with the ordinal of the segments containing the terms. pub struct TermMerger<'a> { heap: BinaryHeap>, current_streamers: Vec>, diff --git a/query-grammar/src/infallible.rs b/query-grammar/src/infallible.rs index 0f9edec8e..c37cc1238 100644 --- a/query-grammar/src/infallible.rs +++ b/query-grammar/src/infallible.rs @@ -109,6 +109,9 @@ where F: nom::Parser { move |input: I| match f.parse(input) { Ok((input, (output, _err))) => Ok((input, output)), Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)), + // old versions don't understand this is uninhabited and need the empty match to help, + // newer versions warn because this arm is unreachable (which it is indeed). + #[allow(unreachable_patterns)] Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {}, } } diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 19df314d9..73c037971 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -102,10 +102,8 @@ fn retry_policy(is_blocking: bool) -> RetryPolicy { /// /// There are currently two implementations of `Directory` /// -/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this -/// should be your default choice. -/// - The [`RamDirectory`][crate::directory::RamDirectory], which -/// should be used mostly for tests. +/// - The [`MMapDirectory`][crate::directory::MmapDirectory], this should be your default choice. +/// - The [`RamDirectory`][crate::directory::RamDirectory], which should be used mostly for tests. pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// Opens a file and returns a boxed `FileHandle`. /// diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 60f825328..b93cff20b 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -25,10 +25,9 @@ impl FacetReader { /// Creates a new `FacetReader`. /// /// A facet reader just wraps : - /// - a `MultiValuedFastFieldReader` that makes it possible to - /// access the list of facet ords for a given document. - /// - a `TermDictionary` that helps associating a facet to - /// an ordinal and vice versa. + /// - a `MultiValuedFastFieldReader` that makes it possible to access the list of facet ords for + /// a given document. + /// - a `TermDictionary` that helps associating a facet to an ordinal and vice versa. pub fn new(facet_column: StrColumn) -> FacetReader { FacetReader { facet_column } } diff --git a/src/future_result.rs b/src/future_result.rs index d1d43f7e2..3516ec408 100644 --- a/src/future_result.rs +++ b/src/future_result.rs @@ -11,8 +11,8 @@ use crate::TantivyError; /// progress. Dropping the `FutureResult` does not cancel the task being executed /// either. /// -/// - In a sync context, you can call `FutureResult::wait()`. The function -/// does not rely on `block_on`. +/// - In a sync context, you can call `FutureResult::wait()`. The function does not rely on +/// `block_on`. /// - In an async context, you can call simply use `FutureResult` as a future. pub struct FutureResult { inner: Inner, diff --git a/src/index/index.rs b/src/index/index.rs index 250181db1..052bc4f92 100644 --- a/src/index/index.rs +++ b/src/index/index.rs @@ -49,10 +49,8 @@ fn load_metas( /// Save the index meta file. /// This operation is atomic : /// Either -/// - it fails, in which case an error is returned, -/// and the `meta.json` remains untouched, -/// - it succeeds, and `meta.json` is written -/// and flushed. +/// - it fails, in which case an error is returned, and the `meta.json` remains untouched, +/// - it succeeds, and `meta.json` is written and flushed. /// /// This method is not part of tantivy's public API fn save_new_metas( @@ -529,12 +527,12 @@ impl Index { /// `IndexWriter` on the system is accessing the index directory, /// it is safe to manually delete the lockfile. /// - /// - `num_threads` defines the number of indexing workers that - /// should work at the same time. + /// - `num_threads` defines the number of indexing workers that should work at the same time. /// - /// - `overall_memory_budget_in_bytes` sets the amount of memory - /// allocated for all indexing thread. - /// Each thread will receive a budget of `overall_memory_budget_in_bytes / num_threads`. + /// - `overall_memory_budget_in_bytes` sets the amount of memory allocated for all indexing + /// thread. + /// + /// Each thread will receive a budget of `overall_memory_budget_in_bytes / num_threads`. /// /// # Errors /// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`. diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index 4f8a8a383..8700e8135 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -179,8 +179,7 @@ impl DeleteCursor { /// Skips operations and position it so that /// - either all of the delete operation currently in the queue are consume and the next get /// will return `None`. - /// - the next get will return the first operation with an - /// `opstamp >= target_opstamp`. + /// - the next get will return the first operation with an `opstamp >= target_opstamp`. pub fn skip_to(&mut self, target_opstamp: Opstamp) { // TODO Can be optimize as we work with block. while self.is_behind_opstamp(target_opstamp) { diff --git a/src/indexer/merge_operation.rs b/src/indexer/merge_operation.rs index a4ce49c93..073319002 100644 --- a/src/indexer/merge_operation.rs +++ b/src/indexer/merge_operation.rs @@ -29,8 +29,8 @@ impl MergeOperationInventory { /// A `MergeOperation` has two roles. /// It carries all of the information required to describe a merge: -/// - `target_opstamp` is the opstamp up to which we want to consume the -/// delete queue and reflect their deletes. +/// - `target_opstamp` is the opstamp up to which we want to consume the delete queue and reflect +/// their deletes. /// - `segment_ids` is the list of segment to be merged. /// /// The second role is to ensure keep track of the fact that these diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs index 56fcf09b2..f47c88c53 100644 --- a/src/indexer/segment_entry.rs +++ b/src/indexer/segment_entry.rs @@ -10,12 +10,9 @@ use crate::indexer::delete_queue::DeleteCursor; /// /// In addition to segment `meta`, /// it contains a few transient states -/// - `alive_bitset` is a bitset describing -/// documents that were alive during the commit -/// itself. -/// - `delete_cursor` is the position in the delete queue. -/// Deletes happening before the cursor are reflected either -/// in the .del file or in the `alive_bitset`. +/// - `alive_bitset` is a bitset describing documents that were alive during the commit itself. +/// - `delete_cursor` is the position in the delete queue. Deletes happening before the cursor are +/// reflected either in the .del file or in the `alive_bitset`. #[derive(Clone)] pub struct SegmentEntry { meta: SegmentMeta, diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 7166ebb56..5fa6ebfaa 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -30,10 +30,8 @@ const NUM_MERGE_THREADS: usize = 4; /// Save the index meta file. /// This operation is atomic: /// Either -/// - it fails, in which case an error is returned, -/// and the `meta.json` remains untouched, -/// - it success, and `meta.json` is written -/// and flushed. +/// - it fails, in which case an error is returned, and the `meta.json` remains untouched, +/// - it success, and `meta.json` is written and flushed. /// /// This method is not part of tantivy's public API pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()> { diff --git a/src/lib.rs b/src/lib.rs index dd4fc4f52..c34131e0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -125,8 +125,8 @@ //! //! - **Searching**: [Searcher] searches the segments with anything that implements //! [Query](query::Query) and merges the results. The list of [supported -//! queries](query::Query#implementors). Custom Queries are supported by implementing the -//! [Query](query::Query) trait. +//! queries](query::Query#implementors). Custom Queries are supported by implementing the +//! [Query](query::Query) trait. //! //! - **[Directory](directory)**: Abstraction over the storage where the index data is stored. //! diff --git a/src/postings/block_search.rs b/src/postings/block_search.rs index 0fb21239c..558b0a346 100644 --- a/src/postings/block_search.rs +++ b/src/postings/block_search.rs @@ -18,7 +18,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE; /// # Assumption /// /// - The block is sorted. Some elements may appear several times. This is the case at the -/// end of the last block for instance. +/// end of the last block for instance. /// - The target is assumed smaller or equal to the last element of the block. pub fn branchless_binary_search(arr: &[u32; COMPRESSION_BLOCK_SIZE], target: u32) -> usize { let mut start = 0; diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index 5fa925bb1..20ac200f7 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -5,15 +5,10 @@ use crate::schema::{IndexRecordOption, Term}; /// The boolean query returns a set of documents /// that matches the Boolean combination of constituent subqueries. /// -/// The documents matched by the boolean query are -/// those which -/// * match all of the sub queries associated with the -/// `Must` occurrence -/// * match none of the sub queries associated with the -/// `MustNot` occurrence. -/// * match at least one of the sub queries associated -/// with the `Must` or `Should` occurrence. -/// +/// The documents matched by the boolean query are those which +/// - match all of the sub queries associated with the `Must` occurrence +/// - match none of the sub queries associated with the `MustNot` occurrence. +/// - match at least one of the sub queries associated with the `Must` or `Should` occurrence. /// /// You can combine other query types and their `Occur`ances into one `BooleanQuery` /// diff --git a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs index b456e4e40..814965b25 100644 --- a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs +++ b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs @@ -6,6 +6,9 @@ use crate::query::phrase_query::{intersection_count, PhraseScorer}; use crate::query::Scorer; use crate::{DocId, Score}; +// MultiPrefix is the larger variant, and also the one we expect most often. PhraseScorer is > 1kB +// though, it would be interesting to slim it down if possible. +#[allow(clippy::large_enum_variant)] enum PhraseKind { SinglePrefix { position_offset: u32, diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index 147aef29b..5c67f4e27 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -219,8 +219,8 @@ fn intersection_exists_with_slop( /// In contrast to the regular algorithm this solves some issues: /// - Keep track of the slop so far. Slop is a budget that is spent on the distance between terms. /// - When encountering a match between two positions, which position is the best match is unclear -/// and depends on intersections afterwards, therefore this algorithm keeps left and right as -/// matches, but only counts one. +/// and depends on intersections afterwards, therefore this algorithm keeps left and right as +/// matches, but only counts one. /// /// This algorithm may return an incorrect count in some cases (e.g. left, right expansion and is /// then matches both on the following term.) diff --git a/src/query/query.rs b/src/query/query.rs index 4808849ba..32f74536f 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -115,10 +115,10 @@ impl<'a> EnableScoring<'a> { /// /// So to sum it up : /// - a `Query` is a recipe to define a set of documents as well the way to score them. -/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance -/// hold statistics about the different term of the query. It is created by the query. -/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific -/// [`SegmentReader`]. It is created by the [`Weight`]. +/// - a [`Weight`] is this recipe tied to a specific [`Searcher`]. It may for instance hold +/// statistics about the different term of the query. It is created by the query. +/// - a [`Scorer`] is a cursor over the set of matching documents, for a specific [`SegmentReader`]. +/// It is created by the [`Weight`]. /// /// When implementing a new type of `Query`, it is normal to implement a /// dedicated `Query`, [`Weight`] and [`Scorer`]. diff --git a/src/query/range_query/fast_field_range_doc_set.rs b/src/query/range_query/fast_field_range_doc_set.rs index f30f8620c..779269069 100644 --- a/src/query/range_query/fast_field_range_doc_set.rs +++ b/src/query/range_query/fast_field_range_doc_set.rs @@ -49,10 +49,10 @@ pub(crate) struct RangeDocSet { /// /// There are two patterns. /// - We do a full scan. => We can load large chunks. We don't know in advance if seek call - /// will come, so we start with small chunks + /// will come, so we start with small chunks /// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we - /// should load small chunks. When the seeks are small, we can employ the same strategy as on a - /// full scan. + /// should load small chunks. When the seeks are small, we can employ the same strategy as on + /// a full scan. fetch_horizon: u32, /// Current batch of loaded docs. loaded_docs: VecCursor, diff --git a/src/schema/facet.rs b/src/schema/facet.rs index d609e8d70..275b9cb90 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -169,7 +169,7 @@ impl Facet { /// Extract path from the `Facet`. pub fn to_path(&self) -> Vec<&str> { - self.encoded_str().split(|c| c == FACET_SEP_CHAR).collect() + self.encoded_str().split(FACET_SEP_CHAR).collect() } /// This function is the inverse of Facet::from(&str). diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 8d2f9b230..77e061bc6 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -12,8 +12,7 @@ use crate::schema::{ /// /// It consists of /// - a field name -/// - a field type, itself wrapping up options describing -/// how the field should be indexed. +/// - a field type, itself wrapping up options describing how the field should be indexed. #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct FieldEntry { name: String, diff --git a/src/schema/term.rs b/src/schema/term.rs index 044348263..f200e0e21 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -639,12 +639,11 @@ mod tests { /// + + /// /// - is a big endian encoded u32 field id - /// - 's most significant bit expresses whether the term is a json term or not - /// The remaining 7 bits are used to encode the type of the value. - /// If this is a JSON term, the type is the type of the leaf of the json. - /// + /// - 's most significant bit expresses whether the term is a json term or not The + /// remaining 7 bits are used to encode the type of the value. If this is a JSON term, the + /// type is the type of the leaf of the json. /// - is, if this is not the json term, a binary representation specific to the type. - /// If it is a JSON Term, then it is prepended with the path that leads to this leaf value. + /// If it is a JSON Term, then it is prepended with the path that leads to this leaf value. const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8; #[test] diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 70300944d..32d923eab 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -189,8 +189,8 @@ impl TokenizerName { /// /// It defines /// - The amount of information that should be stored about the presence of a term in a document. -/// Essentially, should we store the term frequency and/or the positions (See -/// [`IndexRecordOption`]). +/// Essentially, should we store the term frequency and/or the positions (See +/// [`IndexRecordOption`]). /// - The name of the `Tokenizer` that should be used to process the field. /// - Flag indicating, if fieldnorms should be stored (See [fieldnorm](crate::fieldnorm)). Defaults /// to `true`. diff --git a/src/store/mod.rs b/src/store/mod.rs index 48962bb4b..63327f073 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -25,8 +25,8 @@ //! Most users should not access the `StoreReader` directly //! and should rely on either //! -//! - at the segment level, the -//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc) +//! - at the segment level, the [`SegmentReader`'s `doc` +//! method](../struct.SegmentReader.html#method.doc) //! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method mod compressors; diff --git a/src/termdict/fst_termdict/merger.rs b/src/termdict/fst_termdict/merger.rs index b41748bfb..e8a064deb 100644 --- a/src/termdict/fst_termdict/merger.rs +++ b/src/termdict/fst_termdict/merger.rs @@ -11,8 +11,7 @@ use crate::termdict::{TermOrdinal, TermStreamer}; /// /// The item yielded is actually a pair with /// - the term -/// - a slice with the ordinal of the segments containing -/// the term. +/// - a slice with the ordinal of the segments containing the term. pub struct TermMerger<'a> { dictionaries: Vec<&'a TermDictionary>, union: Union<'a>, diff --git a/src/termdict/sstable_termdict/merger.rs b/src/termdict/sstable_termdict/merger.rs index a9cfda69d..aec10454f 100644 --- a/src/termdict/sstable_termdict/merger.rs +++ b/src/termdict/sstable_termdict/merger.rs @@ -34,8 +34,7 @@ impl<'a> Ord for HeapItem<'a> { /// /// The item yield is actually a pair with /// - the term -/// - a slice with the ordinal of the segments containing -/// the terms. +/// - a slice with the ordinal of the segments containing the terms. pub struct TermMerger<'a> { heap: BinaryHeap>, current_streamers: Vec>, diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index a2be12390..a0bdbcc0c 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -12,14 +12,12 @@ use crate::tokenizer::{ /// /// By default, it is populated with the following managers. /// -/// * `raw` : does not process nor tokenize the text. -/// * `default` : Chops the text on according to whitespace and -/// punctuation, removes tokens that are too long, and lowercases -/// tokens -/// * `en_stem` : Like `default`, but also applies stemming on the -/// resulting tokens. Stemming can improve the recall of your -/// search engine. -/// * `whitespace` : Splits the text on whitespaces. +/// - `raw` : does not process nor tokenize the text. +/// - `default` : Chops the text on according to whitespace and punctuation, removes tokens that are +/// too long, and lowercases tokens. +/// - `en_stem` : Like `default`, but also applies stemming on the resulting tokens. Stemming can +/// improve the recall of your search engine. +/// - `whitespace` : Splits the text on whitespaces. #[derive(Clone)] pub struct TokenizerManager { tokenizers: Arc>>, diff --git a/stacker/src/memory_arena.rs b/stacker/src/memory_arena.rs index 5c5bf44cf..6f3f2f494 100644 --- a/stacker/src/memory_arena.rs +++ b/stacker/src/memory_arena.rs @@ -12,13 +12,13 @@ //! # Limitations //! //! - Your object shall not implement `Drop`. -//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena -//! is 4GB. *(Tantivy's indexer uses one arena per indexing thread.)* -//! - The arena only works for objects much smaller than `1MB`. -//! Allocating more than `1MB` at a time will result in a panic, -//! and allocating a lot of large object (> 500KB) will result in a fragmentation. -//! - Your objects are store in an unaligned fashion. For this reason, -//! the API does not let you access them as references. +//! - `Addr` to the `Arena` are 32-bits. The maximum capacity of the arena is 4GB. *(Tantivy's +//! indexer uses one arena per indexing thread.)* +//! - The arena only works for objects much smaller than `1MB`. Allocating more than `1MB` at a +//! time will result in a panic, and allocating a lot of large object (> 500KB) will result in a +//! fragmentation. +//! - Your objects are store in an unaligned fashion. For this reason, the API does not let you +//! access them as references. //! //! Instead, you store and access your data via `.write(...)` and `.read(...)`, which under the hood //! stores your object using `ptr::write_unaligned` and `ptr::read_unaligned`.