diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 39efabe13..42b0600c8 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -14,7 +14,7 @@ fn main() { // Let's create a temporary directory for the // sake of this example if let Ok(dir) = TempDir::new("tantivy_example_dir") { - run_example(&dir.path()).unwrap(); + run_example(dir.path()).unwrap(); dir.close().unwrap(); } } diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 29ca0bf8b..e7fd0d018 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -114,7 +114,7 @@ impl Collector for TopCollector { // It's ok to unwrap as long as a limit of 0 is forbidden. let limit_doc: GlobalScoredDoc = *self.heap.peek().expect("Top collector with size 0 is forbidden"); if limit_doc.score < scored_doc.score() { - let mut mut_head = self.heap.peek_mut().unwrap(); + let mut mut_head = self.heap.peek_mut().expect("Top collector with size 0 is forbidden"); mut_head.score = scored_doc.score(); mut_head.doc_address = DocAddress(self.segment_id, scored_doc.doc()); } diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index 7a3500ad5..a2623f9d0 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -3,7 +3,7 @@ use schema::Schema; use core::SegmentId; -/// MetaInformation about the `Index`. +/// Meta information about the `Index`. /// /// This object is serialized on disk in the `meta.json` file. /// It keeps information about diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs index 787e6bcec..55a6dc12c 100644 --- a/src/datastruct/stacker/hashmap.rs +++ b/src/datastruct/stacker/hashmap.rs @@ -25,7 +25,7 @@ impl Default for BytesRef { /// `KeyValue` is the item stored in the hash table. /// The key is actually a `BytesRef` object stored in an external heap. -/// The value_addr also points to an address in the heap. +/// The `value_addr` also points to an address in the heap. /// /// The key and the value are actually stored contiguously. /// For this reason, the (start, stop) information is actually redundant @@ -48,7 +48,7 @@ pub enum Entry { } -/// Customized HashMap with string keys +/// Customized `HashMap` with string keys /// /// This `HashMap` takes String as keys. Keys are /// stored in a user defined heap. diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 02dca74e9..d1e1c8b6d 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -1,11 +1,11 @@ -/// FastField module +/// Fast field module /// -/// FastField are the equivalent of `DocValues` in `Lucene`. -/// FastFields are stored in column-oriented fashion and allow fast +/// Fast fields are the equivalent of `DocValues` in `Lucene`. +/// Fast fields are stored in column-oriented fashion and allow fast /// random access given a `DocId`. /// /// Their performance is comparable to that of an array lookup. -/// FastField are useful when a field is required for all or most of +/// They are useful when a field is required for all or most of /// the `DocSet` : for instance for scoring, grouping, filtering, or facetting. /// /// Currently only u32 fastfield are supported. diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 9dfbfceb8..7c71661d7 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -9,7 +9,7 @@ use super::compute_num_bits; /// `FastFieldSerializer` is in charge of serializing /// fastfields on disk. /// -/// FastField are encoded using bit-packing. +/// Fast fields are encoded using bit-packing. /// /// `FastFieldWriter`s are in charge of pushing the data to /// the serializer. diff --git a/src/indexer/segment_manager.rs b/src/indexer/segment_manager.rs index 411490aa2..3a41adc25 100644 --- a/src/indexer/segment_manager.rs +++ b/src/indexer/segment_manager.rs @@ -53,7 +53,7 @@ impl Debug for SegmentManager { } -/// Returns the segment_metas for (committed segment, uncommitted segments). +/// Returns the `SegmentMeta`s for (committed segment, uncommitted segments). /// The result is consistent with other transactions. /// /// For instance, a segment will not appear in both committed and uncommitted diff --git a/src/indexer/segment_register.rs b/src/indexer/segment_register.rs index 777c14670..5718a6228 100644 --- a/src/indexer/segment_register.rs +++ b/src/indexer/segment_register.rs @@ -52,7 +52,7 @@ pub struct SegmentRegister { impl Debug for SegmentRegister { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { try!(write!(f, "SegmentRegister(")); - for (ref k, ref v) in &self.segment_states { + for (k, v) in &self.segment_states { try!(write!(f, "{}:{}, ", k.short_uuid_string(), v.state.letter_code())); } try!(write!(f, ")")); @@ -91,11 +91,10 @@ impl SegmentRegister { } pub fn segment_ids(&self,) -> Vec { - let segment_ids: Vec = self.segment_metas() + self.segment_metas() .into_iter() .map(|segment_meta| segment_meta.segment_id) - .collect(); - segment_ids + .collect() } #[cfg(test)] @@ -134,7 +133,7 @@ impl SegmentRegister { pub fn start_merge(&mut self, segment_id: &SegmentId) { self.segment_states - .get_mut(&segment_id) + .get_mut(segment_id) .expect("Received a merge notification for a segment that is not registered") .start_merge(); } @@ -147,7 +146,7 @@ impl From> for SegmentRegister { fn from(segment_metas: Vec) -> SegmentRegister { let mut segment_states = HashMap::new(); for segment_meta in segment_metas { - let segment_id = segment_meta.segment_id.clone(); + let segment_id = segment_meta.segment_id; let segment_entry = SegmentEntry { meta: segment_meta, state: SegmentState::Ready, diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 8ef14945d..d0460b462 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -1,3 +1,5 @@ +#![allow(for_kv_map)] + use chan; use core::Index; use core::Segment; @@ -67,8 +69,13 @@ fn end_merge( } -/// The segment updater is in charge of -/// receiving different SegmentUpdate +/// The segment updater is in charge of processing all of the +/// `SegmentUpdate`s. +/// +/// All this processing happens on a single thread +/// consuming a common queue. +/// +/// The segment updates producers are : /// - indexing threads are sending new segments /// - merging threads are sending merge operations /// - the index writer sends "terminate" diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index b5be24658..17ddf91c4 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -20,7 +20,7 @@ use indexer::segment_serializer::SegmentSerializer; use datastruct::stacker::Heap; use indexer::index_writer::MARGIN_IN_BYTES; -/// A SegmentWriter is the object in charge of creating segment index from a +/// A `SegmentWriter` is in charge of creating segment index from a /// documents. /// /// They creates the postings list in anonymous memory. diff --git a/src/postings/freq_handler.rs b/src/postings/freq_handler.rs index 4f44fb9e0..ea9cb6ae6 100644 --- a/src/postings/freq_handler.rs +++ b/src/postings/freq_handler.rs @@ -1,5 +1,4 @@ use compression::SIMDBlockDecoder; -use std::io::Cursor; use common::VInt; use common::BinarySerializable; use compression::CompositeDecoder; @@ -7,7 +6,7 @@ use postings::SegmentPostingsOption; use compression::NUM_DOCS_PER_BLOCK; -/// The FreqHandler object is in charge of decompressing +/// `FreqHandler` is in charge of decompressing /// frequencies and/or positions. pub struct FreqHandler { freq_decoder: SIMDBlockDecoder, @@ -19,11 +18,9 @@ pub struct FreqHandler { fn read_positions(data: &[u8]) -> Vec { let mut composite_reader = CompositeDecoder::new(); - let mut cursor = Cursor::new(data); - // TODO error - let uncompressed_len = VInt::deserialize(&mut cursor).unwrap().0 as usize; - let offset_data = &data[cursor.position() as usize..]; - composite_reader.uncompress_unsorted(offset_data, uncompressed_len); + let mut readable: &[u8] = data; + let uncompressed_len = VInt::deserialize(&mut readable).unwrap().0 as usize; + composite_reader.uncompress_unsorted(readable, uncompressed_len); composite_reader.into() } diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs index 72dd804e7..68cb1ec3b 100644 --- a/src/postings/intersection.rs +++ b/src/postings/intersection.rs @@ -2,9 +2,9 @@ use postings::DocSet; use std::cmp::Ordering; use DocId; -// TODO Find a way to specialize IntersectionDocSet +// TODO Find a way to specialize `IntersectionDocSet` -/// Creates a DocSet that iterator through the intersection of two `DocSet`s. +/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. pub struct IntersectionDocSet<'a> { left: Box, right: Box, diff --git a/src/postings/postings.rs b/src/postings/postings.rs index 071068c95..8b964d0a9 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -12,8 +12,8 @@ use common::HasLen; /// as well as the list of term positions. /// /// Its main implementation is `SegmentPostings`, -/// but other implementations mocking SegmentPostings exist, -/// in order to help when merging segments or for testing. +/// but other implementations mocking `SegmentPostings` exist, +/// for merging segments or for testing. pub trait Postings: DocSet { /// Returns the term frequency fn term_freq(&self,) -> u32; diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 7afe42b14..3b6ddd440 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -54,7 +54,7 @@ pub trait PostingsWriter { } } -/// The SpecializedPostingsWriter is just here to remove dynamic +/// The `SpecializedPostingsWriter` is just here to remove dynamic /// dispatch to the recorder information. pub struct SpecializedPostingsWriter<'a, Rec: Recorder + 'static> { term_index: HashMap<'a, Rec>, diff --git a/src/postings/term_info.rs b/src/postings/term_info.rs index 640caac7d..ac7edf591 100644 --- a/src/postings/term_info.rs +++ b/src/postings/term_info.rs @@ -6,10 +6,10 @@ use std::io; /// associated to terms in the `.term` file. /// /// It consists of -/// * doc_freq : the number of document in the segment +/// * `doc_freq` : the number of document in the segment /// containing this term. It is also the length of the /// posting list associated to this term -/// * postings_offset: an offset in the `.idx` file +/// * `postings_offset` : an offset in the `.idx` file /// addressing the start of the posting list associated /// to this term. #[derive(Debug,Ord,PartialOrd,Eq,PartialEq,Clone)] diff --git a/src/query/daat_multiterm_scorer.rs b/src/query/daat_multiterm_scorer.rs index a3abcfb0a..b0ab1920c 100644 --- a/src/query/daat_multiterm_scorer.rs +++ b/src/query/daat_multiterm_scorer.rs @@ -13,9 +13,8 @@ use Score; /// Each `HeapItem` represents the head of /// a segment postings being merged. /// -/// Heap(doc_id, segment_ordinal) -/// * doc_id - is the current doc id for the given segment postings -/// * segment_ordinal - is the ordinal used to identify to which segment postings +/// * `doc` - is the current doc id for the given segment postings +/// * `ord` - is the ordinal used to identify to which segment postings /// this heap item belong to. #[derive(Eq, PartialEq)] struct HeapItem { @@ -23,7 +22,7 @@ struct HeapItem { ord: u32, } -/// HeapItem are ordered by the document +/// `HeapItem` are ordered by the document impl PartialOrd for HeapItem { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -185,7 +184,7 @@ impl DocSet for DAATMul self.similarity.clear(); let mut ord_bitset = 0u64; match self.queue.peek() { - Some(ref heap_item) => { + Some(heap_item) => { self.doc = heap_item.doc; let ord: usize = heap_item.ord as usize; let fieldnorm = self.get_field_norm(ord, heap_item.doc); diff --git a/src/query/tfidf.rs b/src/query/tfidf.rs index 533a23ef6..749c4663e 100644 --- a/src/query/tfidf.rs +++ b/src/query/tfidf.rs @@ -4,9 +4,9 @@ use super::Explanation; use super::Similarity; -/// TfIdf is the default pertinence score in tantivy. +/// `TfIdf` is the default pertinence score in tantivy. /// -/// See [TfIdf in the global documentation](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html) +/// See [Tf-Idf in the global documentation](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html) #[derive(Clone)] pub struct TfIdf { coords: Vec, diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 2b18d5aff..89d55ea73 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -8,7 +8,7 @@ use rustc_serialize::Encoder; /// Internal representation of a document used for JSON /// serialization. /// -/// A NamedFieldDocument is a simple representation of a document +/// A `NamedFieldDocument` is a simple representation of a document /// as a `BTreeMap>`. /// pub struct NamedFieldDocument(pub BTreeMap>);