diff --git a/examples/basic_search.rs b/examples/basic_search.rs index 1aba7bf3f..00576be51 100644 --- a/examples/basic_search.rs +++ b/examples/basic_search.rs @@ -230,7 +230,7 @@ fn main() -> tantivy::Result<()> { // a title. for doc_address in doc_addresses { - let retrieved_doc = searcher.doc(&doc_address)?; + let retrieved_doc = searcher.doc(doc_address)?; println!("{}", schema.to_json(&retrieved_doc)); } diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs index 7c5299e00..08236c0e5 100644 --- a/examples/custom_tokenizer.rs +++ b/examples/custom_tokenizer.rs @@ -109,7 +109,7 @@ fn main() -> tantivy::Result<()> { let doc_addresses = top_collector.docs(); for doc_address in doc_addresses { - let retrieved_doc = searcher.doc(&doc_address)?; + let retrieved_doc = searcher.doc(doc_address)?; println!("{}", schema.to_json(&retrieved_doc)); } diff --git a/examples/deleting_updating_documents.rs b/examples/deleting_updating_documents.rs index de0603392..afae85685 100644 --- a/examples/deleting_updating_documents.rs +++ b/examples/deleting_updating_documents.rs @@ -31,7 +31,7 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result tantivy::Result<()> { let doc_addresses = top_collector.docs(); for doc_address in doc_addresses { - let doc = searcher.doc(&doc_address)?; + let doc = searcher.doc(doc_address)?; let snippet = snippet_generator.snippet_from_doc(&doc); println!("title: {}", doc.get_first(title).unwrap().text().unwrap()); println!("snippet: {}", snippet.to_html()); diff --git a/examples/stop_words.rs b/examples/stop_words.rs index 8945f8614..80e78ece2 100644 --- a/examples/stop_words.rs +++ b/examples/stop_words.rs @@ -113,7 +113,7 @@ fn main() -> tantivy::Result<()> { let doc_addresses = top_collector.docs(); for doc_address in doc_addresses { - let retrieved_doc = searcher.doc(&doc_address)?; + let retrieved_doc = searcher.doc(doc_address)?; println!("{}", schema.to_json(&retrieved_doc)); } diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 8e1c95876..a092a8dae 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -377,10 +377,8 @@ impl FacetCollector { } else { collapsed_facet_counts[seg_ord][collapsed_term_id] } - }) - .unwrap_or(0) - }) - .sum(); + }).unwrap_or(0) + }).sum(); if count > 0u64 { let bytes: Vec = facet_merger.key().to_owned(); // may create an corrupted facet if the term dicitonary is corrupted @@ -529,8 +527,7 @@ mod tests { n /= 4; let leaf = n % 5; Facet::from(&format!("/top{}/mid{}/leaf{}", top, mid, leaf)) - }) - .collect(); + }).collect(); for i in 0..num_facets * 10 { let mut doc = Document::new(); doc.add_facet(facet_field, facets[i % num_facets].clone()); @@ -557,7 +554,8 @@ mod tests { ("/top1/mid1", 50), ("/top1/mid2", 50), ("/top1/mid3", 50), - ].iter() + ] + .iter() .map(|&(facet_str, count)| (String::from(facet_str), count)) .collect::>() ); @@ -621,15 +619,13 @@ mod tests { let facet = Facet::from(&format!("/facet/{}", c)); let doc = doc!(facet_field => facet); iter::repeat(doc).take(count) - }) - .map(|mut doc| { + }).map(|mut doc| { doc.add_facet( facet_field, &format!("/facet/{}", thread_rng().sample(&uniform)), ); doc - }) - .collect(); + }).collect(); thread_rng().shuffle(&mut docs[..]); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 6cb61e8b2..265a6981a 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -91,8 +91,7 @@ impl TopCollector { feature, doc_address, }| (feature, doc_address), - ) - .collect() + ).collect() } /// Return true iff at least K documents have gone through diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index 4945796b0..593e36fb8 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -102,6 +102,7 @@ where addr + 8 <= data.len(), "The fast field field should have been padded with 7 bytes." ); + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] let val_unshifted_unmasked: u64 = u64::from_le(unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }); let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; @@ -125,6 +126,7 @@ where for output_val in output.iter_mut() { let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; diff --git a/src/common/bitset.rs b/src/common/bitset.rs index 750d835f8..a125f4cbc 100644 --- a/src/common/bitset.rs +++ b/src/common/bitset.rs @@ -77,7 +77,7 @@ impl TinySet { /// Returns true iff the `TinySet` is empty. #[inline(always)] - pub fn is_empty(&self) -> bool { + pub fn is_empty(self) -> bool { self.0 == 0u64 } @@ -114,7 +114,7 @@ impl TinySet { self.0 = 0u64; } - pub fn len(&self) -> u32 { + pub fn len(self) -> u32 { self.0.count_ones() } } diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 9ae0dbf43..ba9d77c70 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -32,6 +32,10 @@ pub struct InvertedIndexReader { } impl InvertedIndexReader { + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::needless_pass_by_value) + )] // for symetry pub(crate) fn new( termdict: TermDictionary, postings_source: ReadOnlySource, @@ -54,7 +58,7 @@ impl InvertedIndexReader { /// Creates an empty `InvertedIndexReader` object, which /// contains no terms at all. - pub fn empty(field_type: FieldType) -> InvertedIndexReader { + pub fn empty(field_type: &FieldType) -> InvertedIndexReader { let record_option = field_type .get_index_record_option() .unwrap_or(IndexRecordOption::Basic); diff --git a/src/core/searcher.rs b/src/core/searcher.rs index cbe549062..64e5263ee 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -46,8 +46,8 @@ impl Searcher { /// /// The searcher uses the segment ordinal to route the /// the request to the right `Segment`. - pub fn doc(&self, doc_address: &DocAddress) -> Result { - let DocAddress(segment_local_id, doc_id) = *doc_address; + pub fn doc(&self, doc_address: DocAddress) -> Result { + let DocAddress(segment_local_id, doc_id) = doc_address; let segment_reader = &self.segment_readers[segment_local_id as usize]; segment_reader.doc(doc_id) } @@ -61,7 +61,7 @@ impl Searcher { pub fn num_docs(&self) -> u64 { self.segment_readers .iter() - .map(|segment_reader| segment_reader.num_docs() as u64) + .map(|segment_reader| u64::from(segment_reader.num_docs())) .sum::() } @@ -70,8 +70,9 @@ impl Searcher { pub fn doc_freq(&self, term: &Term) -> u64 { self.segment_readers .iter() - .map(|segment_reader| segment_reader.inverted_index(term.field()).doc_freq(term) as u64) - .sum::() + .map(|segment_reader| { + u64::from(segment_reader.inverted_index(term.field()).doc_freq(term)) + }).sum::() } /// Return the list of segment readers diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 03cfdf08d..7cf395c9f 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -153,8 +153,8 @@ impl SegmentReader { /// Accessor to the `BytesFastFieldReader` associated to a given `Field`. pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result { let field_entry = self.schema.get_field_entry(field); - match field_entry.field_type() { - &FieldType::Bytes => {} + match *field_entry.field_type() { + FieldType::Bytes => {} _ => return Err(FastFieldNotAvailableError::new(field_entry)), } let idx_reader = self @@ -177,7 +177,7 @@ impl SegmentReader { "The field {:?} is not a \ hierarchical facet.", field_entry - )).into()); + ))); } let term_ords_reader = self.multi_fast_field_reader(field)?; let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| { @@ -188,7 +188,7 @@ impl SegmentReader { field_entry.name() )) })?; - let termdict = TermDictionary::from_source(termdict_source); + let termdict = TermDictionary::from_source(&termdict_source); let facet_reader = FacetReader::new(term_ords_reader, termdict); Ok(facet_reader) } @@ -312,7 +312,7 @@ impl SegmentReader { // As a result, no data is associated to the inverted index. // // Returns an empty inverted index. - return Arc::new(InvertedIndexReader::empty(field_type.clone())); + return Arc::new(InvertedIndexReader::empty(field_type)); } let postings_source = postings_source_opt.unwrap(); @@ -333,7 +333,7 @@ impl SegmentReader { .expect("Index corrupted. Failed to open field positions in composite file."); let inv_idx_reader = Arc::new(InvertedIndexReader::new( - TermDictionary::from_source(termdict_source), + TermDictionary::from_source(&termdict_source), postings_source, positions_source, positions_idx_source, diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 2f1733e0f..ad79319e7 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -100,8 +100,7 @@ impl InnerDirectory { ); let io_err = make_io_err(msg); OpenReadError::IOError(IOError::with_path(path.to_owned(), io_err)) - }) - .and_then(|readable_map| { + }).and_then(|readable_map| { readable_map .get(path) .ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path))) @@ -121,8 +120,7 @@ impl InnerDirectory { ); let io_err = make_io_err(msg); DeleteError::IOError(IOError::with_path(path.to_owned(), io_err)) - }) - .and_then(|mut writable_map| match writable_map.remove(path) { + }).and_then(|mut writable_map| match writable_map.remove(path) { Some(_) => Ok(()), None => Err(DeleteError::FileDoesNotExist(PathBuf::from(path))), }) diff --git a/src/error.rs b/src/error.rs index d7f0d1d1a..a84befbc8 100644 --- a/src/error.rs +++ b/src/error.rs @@ -84,9 +84,7 @@ impl From> for TantivyError { impl From for TantivyError { fn from(error: OpenReadError) -> TantivyError { match error { - OpenReadError::FileDoesNotExist(filepath) => { - TantivyError::PathDoesNotExist(filepath) - } + OpenReadError::FileDoesNotExist(filepath) => TantivyError::PathDoesNotExist(filepath), OpenReadError::IOError(io_error) => TantivyError::IOError(io_error), } } @@ -105,7 +103,7 @@ impl From for TantivyError { TantivyError::FileAlreadyExists(filepath) } OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error), - }.into() + } } } @@ -115,9 +113,9 @@ impl From for TantivyError { OpenDirectoryError::DoesNotExist(directory_path) => { TantivyError::PathDoesNotExist(directory_path) } - OpenDirectoryError::NotADirectory(directory_path) => TantivyError::InvalidArgument( - format!("{:?} is not a directory", directory_path), - ), + OpenDirectoryError::NotADirectory(directory_path) => { + TantivyError::InvalidArgument(format!("{:?} is not a directory", directory_path)) + } } } } diff --git a/src/fastfield/bytes/writer.rs b/src/fastfield/bytes/writer.rs index 568a5421f..472e8d682 100644 --- a/src/fastfield/bytes/writer.rs +++ b/src/fastfield/bytes/writer.rs @@ -51,7 +51,7 @@ impl BytesFastFieldWriter { self.next_doc(); for field_value in doc.field_values() { if field_value.field() == self.field { - if let &Value::Bytes(ref bytes) = field_value.value() { + if let Value::Bytes(ref bytes) = *field_value.value() { self.vals.extend_from_slice(bytes); } else { panic!( diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index f4b90ac8b..6df8e3775 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -11,7 +11,6 @@ use schema::SchemaBuilder; use schema::FAST; use std::collections::HashMap; use std::marker::PhantomData; -use std::mem; use std::path::Path; use DocId; @@ -80,7 +79,8 @@ impl FastFieldReader { // TODO change start to `u64`. // For multifastfield, start is an index in a second fastfield, not a `DocId` pub fn get_range(&self, start: u32, output: &mut [Item]) { - let output_u64: &mut [u64] = unsafe { mem::transmute(output) }; // ok: Item is either `u64` or `i64` + // ok: Item is either `u64` or `i64` + let output_u64: &mut [u64] = unsafe { &mut *(output as *mut [Item] as *mut [u64]) }; self.bit_unpacker.get_range(start, output_u64); for out in output_u64.iter_mut() { *out = Item::from_u64(*out + self.min_value_u64).as_u64(); diff --git a/src/fieldnorm/code.rs b/src/fieldnorm/code.rs index 230c0e743..3a62d18c2 100644 --- a/src/fieldnorm/code.rs +++ b/src/fieldnorm/code.rs @@ -15,23 +15,23 @@ pub const FIELD_NORMS_TABLE: [u32; 256] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 60, 64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 168, 184, 200, 216, 232, - 248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, 1_048, - 1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120, - 4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, 14360, 15384, - 16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, 45080, 49176, - 53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480, - 163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, 393240, 426008, - 458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, 1048600, - 1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, 2621464, - 2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, 6291480, - 6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, 14680088, - 15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, 31457304, - 33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, 67108888, - 75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968, - 167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912, - 335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800, - 671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576, - 1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944, + 248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, + 1_048, 1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, + 3864, 4120, 4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, + 14360, 15384, 16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, + 45080, 49176, 53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, + 131096, 147480, 163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, + 393240, 426008, 458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, + 1048600, 1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, + 2621464, 2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, + 6291480, 6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, + 14680088, 15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, + 31457304, 33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, + 67108888, 75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, + 150994968, 167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, + 301989912, 335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, + 603979800, 671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, + 1207959576, 1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944, ]; #[cfg(test)] diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index 18eff3387..842b7a2f3 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -186,19 +186,21 @@ impl DeleteCursor { /// `opstamp >= target_opstamp`. pub fn skip_to(&mut self, target_opstamp: u64) { // TODO Can be optimize as we work with block. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::while_let_loop))] - loop { - if let Some(operation) = self.get() { - if operation.opstamp >= target_opstamp { - break; - } - } else { - break; - } + while self.is_behind_opstamp(target_opstamp) { self.advance(); } } + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::wrong_self_convention) + )] + fn is_behind_opstamp(&mut self, target_opstamp: u64) -> bool { + self.get() + .map(|operation| operation.opstamp < target_opstamp) + .unwrap_or(false) + } + /// If the current block has been entirely /// consumed, try to load the next one. /// diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 32017e8f5..66de84c16 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -56,11 +56,12 @@ fn initial_table_size(per_thread_memory_budget: usize) -> usize { (1..) .take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit) .last() - .expect(&format!( - "Per thread memory is too small: {}", - per_thread_memory_budget - )) - .min(19) // we cap it at 512K + .unwrap_or_else(|| { + panic!( + "Per thread memory is too small: {}", + per_thread_memory_budget + ) + }).min(19) // we cap it at 512K } /// `IndexWriter` is the user entry-point to add document to an index. @@ -300,9 +301,7 @@ fn index_documents( let last_docstamp: u64 = *(doc_opstamps.last().unwrap()); - let segment_entry: SegmentEntry; - - if delete_cursor.get().is_some() { + let segment_entry: SegmentEntry = if delete_cursor.get().is_some() { let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps); let segment_reader = SegmentReader::open(segment)?; let mut deleted_bitset = BitSet::with_capacity(num_docs as usize); @@ -313,18 +312,18 @@ fn index_documents( &doc_to_opstamps, last_docstamp, )?; - segment_entry = SegmentEntry::new(segment_meta, delete_cursor, { + SegmentEntry::new(segment_meta, delete_cursor, { if may_have_deletes { Some(deleted_bitset) } else { None } - }); + }) } else { // if there are no delete operation in the queue, no need // to even open the segment. - segment_entry = SegmentEntry::new(segment_meta, delete_cursor, None); - } + SegmentEntry::new(segment_meta, delete_cursor, None) + }; Ok(segment_updater.add_segment(generation, segment_entry)) } @@ -391,11 +390,9 @@ impl IndexWriter { .name(format!( "indexing thread {} for gen {}", self.worker_id, generation - )) - .spawn(move || { + )).spawn(move || { loop { - let mut document_iterator = - document_receiver_clone.clone().into_iter().peekable(); + let mut document_iterator = document_receiver_clone.clone().peekable(); // the peeking here is to avoid // creating a new segment's files diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 87158a947..a42ea6d44 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -40,15 +40,13 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { total_tokens += reader.inverted_index(field).total_num_tokens(); } } - total_tokens - + count - .iter() - .cloned() - .enumerate() - .map(|(fieldnorm_ord, count)| { - count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64 - }) - .sum::() + total_tokens + count + .iter() + .cloned() + .enumerate() + .map(|(fieldnorm_ord, count)| { + count as u64 * u64::from(FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8)) + }).sum::() } pub struct IndexMerger { @@ -111,7 +109,7 @@ impl TermOrdinalMapping { .iter() .flat_map(|term_ordinals| term_ordinals.iter().cloned().max()) .max() - .unwrap_or(TermOrdinal::default()) + .unwrap_or_else(TermOrdinal::default) } } @@ -190,7 +188,7 @@ impl IndexMerger { `term_ordinal_mapping`."); self.write_hierarchical_facet_field( field, - term_ordinal_mapping, + &term_ordinal_mapping, fast_field_serializer, )?; } @@ -314,7 +312,7 @@ impl IndexMerger { fn write_hierarchical_facet_field( &self, field: Field, - term_ordinal_mappings: TermOrdinalMapping, + term_ordinal_mappings: &TermOrdinalMapping, fast_field_serializer: &mut FastFieldSerializer, ) -> Result<()> { // Multifastfield consists in 2 fastfields. @@ -393,8 +391,8 @@ impl IndexMerger { // We can now initialize our serializer, and push it the different values { - let mut serialize_vals = - fast_field_serializer.new_u64_fast_field_with_idx(field, min_value, max_value, 1)?; + let mut serialize_vals = fast_field_serializer + .new_u64_fast_field_with_idx(field, min_value, max_value, 1)?; for reader in &self.readers { let ff_reader: MultiValueIntFastFieldReader = reader.multi_fast_field_reader(field)?; @@ -525,8 +523,7 @@ impl IndexMerger { } } None - }) - .collect(); + }).collect(); // At this point, `segment_postings` contains the posting list // of all of the segments containing the given term. @@ -667,8 +664,7 @@ mod tests { TextFieldIndexing::default() .set_tokenizer("default") .set_index_option(IndexRecordOption::WithFreqs), - ) - .set_stored(); + ).set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let score_fieldtype = schema::IntOptions::default().set_fast(Cardinality::SingleValue); let score_field = schema_builder.add_u64_field("score", score_fieldtype); @@ -770,23 +766,23 @@ mod tests { ); } { - let doc = searcher.doc(&DocAddress(0, 0)).unwrap(); + let doc = searcher.doc(DocAddress(0, 0)).unwrap(); assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b")); } { - let doc = searcher.doc(&DocAddress(0, 1)).unwrap(); + let doc = searcher.doc(DocAddress(0, 1)).unwrap(); assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c")); } { - let doc = searcher.doc(&DocAddress(0, 2)).unwrap(); + let doc = searcher.doc(DocAddress(0, 2)).unwrap(); assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c d")); } { - let doc = searcher.doc(&DocAddress(0, 3)).unwrap(); + let doc = searcher.doc(DocAddress(0, 3)).unwrap(); assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b")); } { - let doc = searcher.doc(&DocAddress(0, 4)).unwrap(); + let doc = searcher.doc(DocAddress(0, 4)).unwrap(); assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c g")); } { @@ -822,8 +818,7 @@ mod tests { let text_fieldtype = schema::TextOptions::default() .set_indexing_options( TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs), - ) - .set_stored(); + ).set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); let score_fieldtype = schema::IntOptions::default().set_fast(Cardinality::SingleValue); let score_field = schema_builder.add_u64_field("score", score_fieldtype); diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 2f1aab70c..1b2cd7c85 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -336,8 +336,7 @@ impl SegmentUpdater { .unwrap() .remove(&merging_thread_id); Ok(()) - }) - .expect("Failed to spawn a thread."); + }).expect("Failed to spawn a thread."); self.0 .merging_threads .write() diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 9627d60ad..ce4b1eb68 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -49,20 +49,20 @@ impl SegmentWriter { ) -> Result { let segment_serializer = SegmentSerializer::for_segment(&mut segment)?; let multifield_postings = MultiFieldPostingsWriter::new(schema, table_bits); - let tokenizers = schema - .fields() - .iter() - .map(|field_entry| field_entry.field_type()) - .map(|field_type| match *field_type { - FieldType::Str(ref text_options) => text_options.get_indexing_options().and_then( - |text_index_option| { - let tokenizer_name = &text_index_option.tokenizer(); - segment.index().tokenizers().get(tokenizer_name) - }, - ), - _ => None, - }) - .collect(); + let tokenizers = + schema + .fields() + .iter() + .map(|field_entry| field_entry.field_type()) + .map(|field_type| match *field_type { + FieldType::Str(ref text_options) => text_options + .get_indexing_options() + .and_then(|text_index_option| { + let tokenizer_name = &text_index_option.tokenizer(); + segment.index().tokenizers().get(tokenizer_name) + }), + _ => None, + }).collect(); Ok(SegmentWriter { max_doc: 0, multifield_postings, @@ -117,8 +117,7 @@ impl SegmentWriter { _ => { panic!("Expected hierarchical facet"); } - }) - .collect(); + }).collect(); let mut term = Term::for_field(field); // we set the Term for facet_bytes in facets { let mut unordered_term_id_opt = None; @@ -146,8 +145,7 @@ impl SegmentWriter { .flat_map(|field_value| match *field_value.value() { Value::Str(ref text) => Some(text.as_str()), _ => None, - }) - .collect(); + }).collect(); if texts.is_empty() { 0 } else { diff --git a/src/lib.rs b/src/lib.rs index 15a2bd567..8e717e82f 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,8 @@ #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::inline_always))] -#![cfg_attr(feature = "cargo-clippy", feature(tool_lints))] #![cfg_attr(all(feature = "unstable", test), feature(test))] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::decimal_literal_representation))] - +#![cfg_attr(feature = "cargo-clippy", feature(tool_lints))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))] #![doc(test(attr(allow(unused_variables), deny(warnings))))] -#![allow(unknown_lints)] #![warn(missing_docs)] #![recursion_limit = "80"] @@ -98,7 +93,7 @@ //! // most relevant doc ids... //! let doc_addresses = top_collector.docs(); //! for doc_address in doc_addresses { -//! let retrieved_doc = searcher.doc(&doc_address)?; +//! let retrieved_doc = searcher.doc(doc_address)?; //! println!("{}", schema.to_json(&retrieved_doc)); //! } //! @@ -184,7 +179,10 @@ mod macros; pub use error::TantivyError; -#[deprecated(since = "0.7.0", note = "please use `tantivy::TantivyError` instead")] +#[deprecated( + since = "0.7.0", + note = "please use `tantivy::TantivyError` instead" +)] pub use error::TantivyError as Error; extern crate census; diff --git a/src/positions/reader.rs b/src/positions/reader.rs index 9a0157725..470abaaa2 100644 --- a/src/positions/reader.rs +++ b/src/positions/reader.rs @@ -137,7 +137,8 @@ impl PositionReader { .iter() .cloned() .map(|num_bit| num_bit as usize) - .sum::() * (COMPRESSION_BLOCK_SIZE / 8); + .sum::() + * (COMPRESSION_BLOCK_SIZE / 8); self.skip_read.advance(num_blocks_to_advance); self.position_read.advance(skip_len); diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index b3f879611..dd0f691ae 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -29,8 +29,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box { IndexRecordOption::WithFreqsAndPositions => { SpecializedPostingsWriter::::new_boxed() } - }) - .unwrap_or_else(|| SpecializedPostingsWriter::::new_boxed()), + }).unwrap_or_else(|| SpecializedPostingsWriter::::new_boxed()), FieldType::U64(_) | FieldType::I64(_) | FieldType::HierarchicalFacet => { SpecializedPostingsWriter::::new_boxed() } @@ -139,8 +138,7 @@ impl MultiFieldPostingsWriter { .enumerate() .map(|(term_ord, unord_term_id)| { (unord_term_id as UnorderedTermId, term_ord as TermOrdinal) - }) - .collect(); + }).collect(); unordered_term_mappings.insert(field, mapping); } FieldType::U64(_) | FieldType::I64(_) => {} diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 38f12fe09..776844f2a 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -205,7 +205,9 @@ impl DocSet for SegmentPostings { return SkipResult::End; } } - } else if self.block_cursor.skip_to(target) == BlockSegmentPostingsSkipResult::Terminated { + } else if self.block_cursor.skip_to(target) + == BlockSegmentPostingsSkipResult::Terminated + { // no positions needed. no need to sum freqs. return SkipResult::End; } @@ -531,8 +533,7 @@ impl BlockSegmentPostings { } else { BlockSegmentPostingsSkipResult::Terminated } - }) - .unwrap_or(BlockSegmentPostingsSkipResult::Terminated); + }).unwrap_or(BlockSegmentPostingsSkipResult::Terminated); } BlockSegmentPostingsSkipResult::Terminated } diff --git a/src/postings/stacker/murmurhash2.rs b/src/postings/stacker/murmurhash2.rs index 68e22e6c3..9626dcb53 100644 --- a/src/postings/stacker/murmurhash2.rs +++ b/src/postings/stacker/murmurhash2.rs @@ -4,7 +4,7 @@ const M: u32 = 0x5bd1_e995; #[inline(always)] pub fn murmurhash2(key: &[u8]) -> u32 { - #[cfg_attr(feature="cargo-clippy", allow(clippy::cast_ptr_alignment))] + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] let mut key_ptr: *const u32 = key.as_ptr() as *const u32; let len = key.len() as u32; let mut h: u32 = SEED ^ len; diff --git a/src/query/bm25.rs b/src/query/bm25.rs index 4a3a25590..eb2546725 100644 --- a/src/query/bm25.rs +++ b/src/query/bm25.rs @@ -63,8 +63,7 @@ impl BM25Weight { .map(|term| { let term_doc_freq = searcher.doc_freq(term); idf(term_doc_freq, total_num_docs) - }) - .sum::(); + }).sum::(); BM25Weight::new(idf, average_fieldnorm) } diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index 353c89806..b530c6b0a 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -47,8 +47,7 @@ impl Query for BooleanQuery { .iter() .map(|&(ref occur, ref subquery)| { Ok((*occur, subquery.weight(searcher, scoring_enabled)?)) - }) - .collect::>()?; + }).collect::>()?; Ok(Box::new(BooleanWeight::new(sub_weights, scoring_enabled))) } @@ -69,8 +68,7 @@ impl BooleanQuery { let term_query: Box = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)); (Occur::Should, term_query) - }) - .collect(); + }).collect(); BooleanQuery::from(occur_term_queries) } diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index 85f075d3a..9b896a46a 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -134,8 +134,7 @@ impl PhraseScorer { .into_iter() .map(|(offset, postings)| { PostingsWithOffset::new(postings, (max_offset - offset) as u32) - }) - .collect::>(); + }).collect::>(); PhraseScorer { intersection_docset: Intersection::new(postings_with_offsets), num_docsets, diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index fcfa19345..6f35971a5 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1,5 +1,3 @@ -#![cfg_attr(feature = "cargo-clippy", allow(clippy::unneeded_field_pattern))] - use super::logical_ast::*; use super::query_grammar::parse_to_ast; use super::user_input_ast::*; @@ -70,8 +68,7 @@ fn trim_ast(logical_ast: LogicalAST) -> Option { .into_iter() .flat_map(|(occur, child)| { trim_ast(child).map(|trimmed_child| (occur, trimmed_child)) - }) - .collect::>(); + }).collect::>(); if trimmed_children.is_empty() { None } else { @@ -237,14 +234,15 @@ impl QueryParser { } FieldType::Str(ref str_options) => { if let Some(option) = str_options.get_indexing_options() { - let mut tokenizer = self.tokenizer_manager.get(option.tokenizer()).ok_or_else( - || { - QueryParserError::UnknownTokenizer( - field_entry.name().to_string(), - option.tokenizer().to_string(), - ) - }, - )?; + let mut tokenizer = + self.tokenizer_manager + .get(option.tokenizer()) + .ok_or_else(|| { + QueryParserError::UnknownTokenizer( + field_entry.name().to_string(), + option.tokenizer().to_string(), + ) + })?; let mut terms: Vec<(usize, Term)> = Vec::new(); let mut token_stream = tokenizer.token_stream(phrase); token_stream.process(&mut |token| { @@ -423,8 +421,7 @@ impl QueryParser { lower: self.resolve_bound(field, &lower)?, upper: self.resolve_bound(field, &upper)?, }))) - }) - .collect::, QueryParserError>>()?; + }).collect::, QueryParserError>>()?; let result_ast = if clauses.len() == 1 { clauses.pop().unwrap() } else { @@ -452,7 +449,9 @@ fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { value_type, lower, upper, - } => Box::new(RangeQuery::new_term_bounds(field, value_type, lower, upper)), + } => Box::new(RangeQuery::new_term_bounds( + field, value_type, &lower, &upper, + )), LogicalLiteral::All => Box::new(AllQuery), } } diff --git a/src/query/range_query.rs b/src/query/range_query.rs index fd739652c..43da4bd8c 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -96,8 +96,8 @@ impl RangeQuery { pub fn new_term_bounds( field: Field, value_type: Type, - left_bound: Bound, - right_bound: Bound, + left_bound: &Bound, + right_bound: &Bound, ) -> RangeQuery { let verify_and_unwrap_term = |val: &Term| { assert_eq!(field, val.field()); @@ -184,11 +184,7 @@ impl RangeQuery { /// /// If the field is not of the type `Str`, tantivy /// will panic when the `Weight` object is created. - pub fn new_str_bounds<'b>( - field: Field, - left: Bound<&'b str>, - right: Bound<&'b str>, - ) -> RangeQuery { + pub fn new_str_bounds(field: Field, left: Bound<&str>, right: Bound<&str>) -> RangeQuery { let make_term_val = |val: &&str| val.as_bytes().to_vec(); RangeQuery { field, @@ -202,7 +198,7 @@ impl RangeQuery { /// /// If the field is not of the type `Str`, tantivy /// will panic when the `Weight` object is created. - pub fn new_str<'b>(field: Field, range: Range<&'b str>) -> RangeQuery { + pub fn new_str(field: Field, range: Range<&str>) -> RangeQuery { RangeQuery::new_str_bounds( field, Bound::Included(range.start), diff --git a/src/query/union.rs b/src/query/union.rs index b4a7441a3..5bbe902a0 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -55,8 +55,7 @@ where None } }, - ) - .collect(); + ).collect(); Union { docsets: non_empty_docsets, bitsets: Box::new([TinySet::empty(); HORIZON_NUM_TINYBITSETS]), @@ -215,7 +214,10 @@ where // The target is outside of the buffered horizon. // advance all docsets to a doc >= to the target. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::clippy::collapsible_if))] + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::clippy::collapsible_if) + )] unordered_drain_filter(&mut self.docsets, |docset| { if docset.doc() < target { if docset.skip_next(target) == SkipResult::End { diff --git a/src/schema/facet.rs b/src/schema/facet.rs index 6a34e8d42..bb685c277 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -97,16 +97,12 @@ impl Facet { } /// Returns `true` iff other is a subfacet of `self`. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::collapsible_if))] pub fn is_prefix_of(&self, other: &Facet) -> bool { let self_bytes: &[u8] = self.encoded_bytes(); let other_bytes: &[u8] = other.encoded_bytes(); - if self_bytes.len() < other_bytes.len() { - if other_bytes.starts_with(self_bytes) { - return other_bytes[self_bytes.len()] == 0u8; - } - } - false + self_bytes.len() < other_bytes.len() + && other_bytes.starts_with(self_bytes) + && other_bytes[self_bytes.len()] == 0u8 } } diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 0855200f4..85d8d14f3 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -441,8 +441,7 @@ mod tests { "count": 4, "popularity": 10 }"#, - ) - .unwrap(); + ).unwrap(); assert_eq!(doc.get_first(title_field).unwrap().text(), Some("my title")); assert_eq!( doc.get_first(author_field).unwrap().text(), diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 4f33dc39e..ba653a411 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -152,10 +152,7 @@ fn search_fragments<'a>( /// /// Takes a vector of `FragmentCandidate`s and the text. /// Figures out the best fragment from it and creates a snippet. -fn select_best_fragment_combination<'a>( - fragments: Vec, - text: &'a str, -) -> Snippet { +fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str) -> Snippet { let best_fragment_opt = fragments.iter().max_by(|left, right| { let cmp_score = left .score @@ -177,8 +174,7 @@ fn select_best_fragment_combination<'a>( item.start - fragment.start_offset, item.stop - fragment.start_offset, ) - }) - .collect(); + }).collect(); Snippet { fragments: fragment_text.to_string(), highlighted, @@ -289,7 +285,7 @@ impl SnippetGenerator { &self.terms_text, self.max_num_chars, ); - select_best_fragment_combination(fragment_candidates, &text) + select_best_fragment_combination(&fragment_candidates[..], &text) } } @@ -332,7 +328,7 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(first.score, 1.9); assert_eq!(first.stop_offset, 89); } - let snippet = select_best_fragment_combination(fragments, &TEST_TEXT); + let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT); assert_eq!(snippet.fragments, "Rust is a systems programming language sponsored by Mozilla which\ndescribes it as a \"safe".to_owned()); assert_eq!(snippet.to_html(), "Rust is a systems programming language sponsored by Mozilla which\ndescribes it as a "safe".to_owned()) } @@ -356,7 +352,7 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(first.stop_offset, 7); } - let snippet = select_best_fragment_combination(fragments, &text); + let snippet = select_best_fragment_combination(&fragments[..], &text); assert_eq!(snippet.fragments, "c d"); assert_eq!(snippet.to_html(), "c d"); } @@ -380,7 +376,7 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(first.start_offset, 8); } - let snippet = select_best_fragment_combination(fragments, &text); + let snippet = select_best_fragment_combination(&fragments[..], &text); assert_eq!(snippet.fragments, "e f"); assert_eq!(snippet.to_html(), "e f"); } @@ -405,7 +401,7 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(first.start_offset, 0); } - let snippet = select_best_fragment_combination(fragments, &text); + let snippet = select_best_fragment_combination(&fragments[..], &text); assert_eq!(snippet.fragments, "e f g"); assert_eq!(snippet.to_html(), "e f g"); } @@ -423,7 +419,7 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(fragments.len(), 0); - let snippet = select_best_fragment_combination(fragments, &text); + let snippet = select_best_fragment_combination(&fragments[..], &text); assert_eq!(snippet.fragments, ""); assert_eq!(snippet.to_html(), ""); } @@ -438,7 +434,7 @@ Survey in 2016, 2017, and 2018."#; let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 0); - let snippet = select_best_fragment_combination(fragments, &text); + let snippet = select_best_fragment_combination(&fragments[..], &text); assert_eq!(snippet.fragments, ""); assert_eq!(snippet.to_html(), ""); } diff --git a/src/store/reader.rs b/src/store/reader.rs index bdf02f00c..428b013f0 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -89,7 +89,10 @@ impl StoreReader { } } -#[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_pass_by_value))] +#[cfg_attr( + feature = "cargo-clippy", + allow(clippy::needless_pass_by_value) +)] fn split_source(data: ReadOnlySource) -> (ReadOnlySource, ReadOnlySource, DocId) { let data_len = data.len(); let footer_offset = data_len - size_of::() - size_of::(); diff --git a/src/store/writer.rs b/src/store/writer.rs index f1446ab8b..3fbdee074 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -51,7 +51,8 @@ impl StoreWriter { stored_document.serialize(&mut self.intermediary_buffer)?; let doc_num_bytes = self.intermediary_buffer.len(); VInt(doc_num_bytes as u64).serialize(&mut self.current_block)?; - self.current_block.write_all(&self.intermediary_buffer[..])?; + self.current_block + .write_all(&self.intermediary_buffer[..])?; self.doc += 1; if self.current_block.len() > BLOCK_SIZE { self.write_and_compress_block()?; diff --git a/src/termdict/merger.rs b/src/termdict/merger.rs index 407a49e90..1d3844067 100644 --- a/src/termdict/merger.rs +++ b/src/termdict/merger.rs @@ -53,8 +53,7 @@ impl<'a> TermMerger<'a> { .map(|(ord, streamer)| HeapItem { streamer, segment_ord: ord, - }) - .collect(), + }).collect(), } } @@ -123,7 +122,10 @@ impl<'a> TermMerger<'a> { } /// Iterates through terms - #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::should_implement_trait) + )] pub fn next(&mut self) -> Option> { if self.advance() { Some(Term::wrap(self.current_streamers[0].streamer.key())) diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index 5ae259425..54102a9f4 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -75,7 +75,7 @@ mod tests { term_dictionary_builder.finish().unwrap(); } let source = directory.open_read(&path).unwrap(); - let term_dict: TermDictionary = TermDictionary::from_source(source); + let term_dict: TermDictionary = TermDictionary::from_source(&source); for (term_ord, term) in COUNTRIES.iter().enumerate() { assert_eq!(term_dict.term_ord(term).unwrap(), term_ord as u64); let mut bytes = vec![]; @@ -102,7 +102,7 @@ mod tests { term_dictionary_builder.finish().unwrap(); } let source = directory.open_read(&path).unwrap(); - let term_dict: TermDictionary = TermDictionary::from_source(source); + let term_dict: TermDictionary = TermDictionary::from_source(&source); assert_eq!(term_dict.get("abc").unwrap().doc_freq, 34u32); assert_eq!(term_dict.get("abcd").unwrap().doc_freq, 346u32); let mut stream = term_dict.stream(); @@ -189,7 +189,7 @@ mod tests { term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionary = TermDictionary::from_source(source); + let term_dictionary: TermDictionary = TermDictionary::from_source(&source); { let mut streamer = term_dictionary.stream(); let mut i = 0; @@ -224,7 +224,7 @@ mod tests { term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionary = TermDictionary::from_source(source); + let term_dictionary: TermDictionary = TermDictionary::from_source(&source); let mut kv_stream = term_dictionary.stream(); assert!(kv_stream.advance()); assert_eq!(kv_stream.key(), "abcdefghijklmnopqrstuvwxy".as_bytes()); @@ -256,7 +256,7 @@ mod tests { let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionary = TermDictionary::from_source(source); + let term_dictionary: TermDictionary = TermDictionary::from_source(&source); { for i in (0..20).chain(6000..8_000) { let &(ref target_key, _) = &ids[i]; @@ -324,7 +324,7 @@ mod tests { term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionary = TermDictionary::from_source(source); + let term_dictionary: TermDictionary = TermDictionary::from_source(&source); let mut stream = term_dictionary.stream(); assert!(stream.advance()); assert!(stream.key().is_empty()); @@ -348,7 +348,7 @@ mod tests { term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionary = TermDictionary::from_source(source); + let term_dictionary: TermDictionary = TermDictionary::from_source(&source); let value_list = |mut streamer: TermStreamer| { let mut res: Vec = vec![]; @@ -417,7 +417,7 @@ mod tests { term_dictionary_builder.finish().unwrap(); } let source = directory.open_read(&path).unwrap(); - let term_dict: TermDictionary = TermDictionary::from_source(source); + let term_dict: TermDictionary = TermDictionary::from_source(&source); // We can now build an entire dfa. let lev_automaton_builder = LevenshteinAutomatonBuilder::new(2, true); diff --git a/src/termdict/streamer.rs b/src/termdict/streamer.rs index 48eb56c7d..98277f2ef 100644 --- a/src/termdict/streamer.rs +++ b/src/termdict/streamer.rs @@ -132,6 +132,10 @@ where } /// Return the next `(key, value)` pair. + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::should_implement_trait) + )] pub fn next(&mut self) -> Option<(&[u8], &TermInfo)> { if self.advance() { Some((self.key(), self.value())) diff --git a/src/termdict/term_info_store.rs b/src/termdict/term_info_store.rs index 8b6a0159b..130b5d62f 100644 --- a/src/termdict/term_info_store.rs +++ b/src/termdict/term_info_store.rs @@ -91,7 +91,7 @@ fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 { assert!(data.len() >= addr_byte + 7); let val_unshifted_unmasked: u64 = unsafe { // ok because the pointer is only accessed using `ptr::read_unaligned` - #[cfg_attr(feature="cargo-clippy", allow(clippy::cast_ptr_alignment))] + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] let addr = data.as_ptr().add(addr_byte) as *const u64; // ok thanks to the 7 byte padding ptr::read_unaligned(addr) diff --git a/src/termdict/termdict.rs b/src/termdict/termdict.rs index 99bea0b09..0f8a28231 100644 --- a/src/termdict/termdict.rs +++ b/src/termdict/termdict.rs @@ -77,7 +77,8 @@ where let mut file = self.fst_builder.into_inner().map_err(convert_fst_error)?; { let mut counting_writer = CountingWriter::wrap(&mut file); - self.term_info_store_writer.serialize(&mut counting_writer)?; + self.term_info_store_writer + .serialize(&mut counting_writer)?; let footer_size = counting_writer.written_bytes(); (footer_size as u64).serialize(&mut counting_writer)?; counting_writer.flush()?; @@ -112,7 +113,7 @@ pub struct TermDictionary { impl TermDictionary { /// Opens a `TermDictionary` given a data source. - pub fn from_source(source: ReadOnlySource) -> Self { + pub fn from_source(source: &ReadOnlySource) -> Self { let total_len = source.len(); let length_offset = total_len - 8; let mut split_len_buffer: &[u8] = &source.as_slice()[length_offset..]; @@ -136,7 +137,7 @@ impl TermDictionary { .finish() .expect("Writing in a Vec should never fail"); let source = ReadOnlySource::from(term_dictionary_data); - Self::from_source(source) + Self::from_source(&source) } /// Returns the number of terms in the dictionary. diff --git a/src/tokenizer/stemmer.rs b/src/tokenizer/stemmer.rs index 4c91bfb93..064662889 100644 --- a/src/tokenizer/stemmer.rs +++ b/src/tokenizer/stemmer.rs @@ -1,3 +1,5 @@ +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] + use super::{Token, TokenFilter, TokenStream}; use rust_stemmers::{self, Algorithm}; use std::sync::Arc;