Code cleaning.

2026-07-07 17:50:42 +00:00 · 2016-07-31 15:34:32 +09:00
parent 0e5d6720ba
commit e486495cb8
20 changed files with 68 additions and 120 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,6 @@ version = "0.1.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 build = "build.rs"

-
 [dependencies]
 byteorder = "0.4"
 memmap = "0.2"
@@ -34,12 +33,3 @@ gcc = "0.3.24"
 [[bin]]
 name = "tantivy-merge"
 path = "src/cli/merge.rs"
-
-
-# [profile.release]
-# opt-level = 3
-# debug = true
-# rpath = false
-# lto = false
-# debug-assertions = false
-# codegen-units = 1
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -31,7 +31,7 @@ impl BinarySerializable for () {
 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
    fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
        let mut total_size = try!(VInt(self.len() as u64).serialize(writer));
-        for it in self.iter() {
+        for it in self {
            total_size += try!(it.serialize(writer));
        }
        Ok(total_size)
--- a/src/compression/simdcomp.rs
+++ b/src/compression/simdcomp.rs
@@ -50,9 +50,9 @@ impl SIMDBlockEncoder {
    
    pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
        let mut byte_written = 0;
-        for v in input.iter() {
-            let mut to_encode: u32 = *v - offset;
-            offset = *v;
+        for &v in input {
+            let mut to_encode: u32 = v - offset;
+            offset = v;
            loop {
                let next_byte: u8 = (to_encode % 128u32) as u8;
                to_encode /= 128u32;
@@ -72,8 +72,8 @@ impl SIMDBlockEncoder {
    
    pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
        let mut byte_written = 0;
-        for &i in input.iter() {
-            let mut to_encode: u32 = i;
+        for &v in input {
+            let mut to_encode: u32 = v;
            loop {
                let next_byte: u8 = (to_encode % 128u32) as u8;
                to_encode /= 128u32;
@@ -267,15 +267,13 @@ mod tests {
                .map(|i| 4 + i * 7 / 2)
                .into_iter()
                .collect();
-            for offset in [0u32, 1u32, 2u32].iter() {
+            for offset in &[0u32, 1u32, 2u32] {
                let encoded_data = encoder.compress_vint_sorted(&input, *offset);
                assert_eq!(encoded_data.len(), expected_length);
                let mut decoder = SIMDBlockDecoder::new();
                let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
                assert_eq!(0, remaining_data.len());
-                for (&decoded, &expected) in decoder.output_array().iter().zip(input.iter()) {
-                    assert_eq!(decoded, expected);
-                }
+                assert_eq!(input, decoder.output_array());
            }
        }
        {
--- a/src/core/merger.rs
+++ b/src/core/merger.rs
@@ -55,7 +55,7 @@ impl<'a> PostingsMerger<'a> {
    fn new(readers: &'a Vec<SegmentReader>) -> PostingsMerger<'a> {
        let mut doc_offsets: Vec<DocId> = Vec::new();
        let mut max_doc = 0;
-        for reader in readers.iter() {
+        for reader in readers {
            doc_offsets.push(max_doc);
            max_doc += reader.max_doc();
        };
@@ -142,7 +142,7 @@ impl IndexMerger {
    pub fn open(schema: Schema, segments: &Vec<Segment>) -> io::Result<IndexMerger> {
        let mut readers = Vec::new();
        let mut max_doc = 0;
-        for segment in segments.iter() {
+        for segment in segments {
            let reader = try!(SegmentReader::open(segment.clone()));
            max_doc += reader.max_doc();
            readers.push(reader);
@@ -166,7 +166,7 @@ impl IndexMerger {
            let mut u32_readers = Vec::new();
            let mut min_val = u32::min_value();
            let mut max_val = 0;
-            for reader in self.readers.iter() {
+            for reader in &self.readers {
                let u32_reader = try!(reader.get_fast_field_reader(field));
                min_val = min(min_val, u32_reader.min_val());
                max_val = max(max_val, u32_reader.max_val());
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -18,8 +18,8 @@ impl Searcher {

    pub fn doc(&self, doc_address: &DocAddress) -> io::Result<Document> {
        // TODO err
-        let DocAddress(ref segment_local_id, ref doc_id) = *doc_address;
-        let segment_reader = &self.segments[*segment_local_id as usize];
+        let DocAddress(segment_local_id, doc_id) = *doc_address;
+        let segment_reader = &self.segments[segment_local_id as usize];
        segment_reader.doc(doc_id)
    }

--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -101,7 +101,7 @@ impl SegmentReader {
    /// bearing the given doc id.
    /// This method is slow and should seldom be called from
    /// within a collector.
-    pub fn doc(&self, doc_id: &DocId) -> io::Result<Document> {
+    pub fn doc(&self, doc_id: DocId) -> io::Result<Document> {
        self.store_reader.get(doc_id)
    }

--- a/src/datastruct/skip/skiplist_builder.rs
+++ b/src/datastruct/skip/skiplist_builder.rs
@@ -97,13 +97,13 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
        let mut layer_sizes: Vec<u32> = Vec::new();
        size += self.data_layer.buffer.len() as u32;
        layer_sizes.push(size);
-        for layer in self.skip_layers.iter() {
+        for layer in &self.skip_layers {
            size += layer.buffer.len() as u32;
            layer_sizes.push(size);
        }
        try!(layer_sizes.serialize(output));
        try!(self.data_layer.write(output));
-        for layer in self.skip_layers.iter() {
+        for layer in &self.skip_layers {
            try!(layer.write(output));
        }
        Ok(())
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -141,8 +141,8 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::new(write).unwrap();
            let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
-            for x in permutation.iter() {
-                add_single_field_doc(&mut fast_field_writers, field, x.clone());
+            for x in &permutation {
+                add_single_field_doc(&mut fast_field_writers, field, *x);
            }
            fast_field_writers.serialize(&mut serializer).unwrap();
            serializer.close().unwrap();
@@ -196,8 +196,8 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::new(write).unwrap();
            let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
-            for x in permutation.iter() {
-                add_single_field_doc(&mut fast_field_writers, field, x.clone());
+            for x in &permutation {
+                add_single_field_doc(&mut fast_field_writers, field, *x);
            }
            fast_field_writers.serialize(&mut serializer).unwrap();
            serializer.close().unwrap();
@@ -228,8 +228,8 @@ mod tests {
            let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
            let mut serializer = FastFieldSerializer::new(write).unwrap();
            let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
-            for x in permutation.iter() {
-                add_single_field_doc(&mut fast_field_writers, field, x.clone());
+            for x in &permutation {
+                add_single_field_doc(&mut fast_field_writers, field, *x);
            }
            fast_field_writers.serialize(&mut serializer).unwrap();
            serializer.close().unwrap();
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -35,7 +35,7 @@ impl U32FastFieldsWriter {
    }

    pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
-        for field_writer in self.field_writers.iter() {
+        for field_writer in &self.field_writers {
            try!(field_writer.serialize(serializer));
        }
        Ok(())
@@ -83,11 +83,11 @@ impl U32FastFieldWriter {

    pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
        let zero = 0;
-        let min = self.vals.iter().min().unwrap_or(&zero).clone();
-        let max = self.vals.iter().max().unwrap_or(&min).clone();
-        try!(serializer.new_u32_fast_field(self.field.clone(), min, max));
-        for val in self.vals.iter() {
-            try!(serializer.add_val(val.clone()));
+        let min = *self.vals.iter().min().unwrap_or(&zero);
+        let max = *self.vals.iter().max().unwrap_or(&min);
+        try!(serializer.new_u32_fast_field(self.field, min, max));
+        for &val in &self.vals {
+            try!(serializer.add_val(val));
        }
        serializer.close_field()
    }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,15 +35,16 @@ mod macros {
 }

 mod core;
-mod datastruct;
-mod postings;
-mod directory;
+
 mod compression;
 mod fastfield;
 mod store;
 mod common;
-pub mod query;

+pub mod postings;
+pub mod query;
+pub mod directory;
+pub mod datastruct;
 pub mod analyzer;
 pub mod collector;

--- a/src/postings/chained_postings.rs
+++ b/src/postings/chained_postings.rs
@@ -12,10 +12,10 @@ pub struct ChainedPostings<'a> {
 impl<'a> ChainedPostings<'a> {
    
    pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
-        let mut doc_freq: usize = 0;
-        for segment_postings in chained_postings.iter() {
-            doc_freq += segment_postings.doc_freq();
-        }
+        let doc_freq: usize = chained_postings
+            .iter()
+            .map(|segment_postings| segment_postings.doc_freq())
+            .fold(0, |sum, addition| sum + addition);
        ChainedPostings {
            chained_postings: chained_postings,
            posting_id: 0,
--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -1,6 +1,7 @@
 use DocId;
 use std::borrow::Borrow;
 use std::borrow::BorrowMut;
+use std::cmp::Ordering;

 #[derive(PartialEq, Eq, Debug)]
 pub enum SkipResult {
@@ -18,7 +19,19 @@ pub trait DocSet {
    // after skipping position
    // the iterator in such a way that doc() will return a
    // value greater or equal to target.
-    fn skip_next(&mut self, target: DocId) -> SkipResult;
+    fn skip_next(&mut self, target: DocId) -> SkipResult {
+        loop {
+            match self.doc().cmp(&target) {
+                Ordering::Less => {
+                    if !self.next() {
+                        return SkipResult::End;
+                    }
+                },
+                Ordering::Equal => { return SkipResult::Reached },
+                Ordering::Greater => { return SkipResult::OverStep },
+            }
+        }
+    }

    fn doc(&self,) -> DocId;

--- a/src/postings/intersection.rs
+++ b/src/postings/intersection.rs
@@ -1,5 +1,4 @@
 use postings::DocSet;
-use postings::SkipResult;
 use std::cmp::Ordering;
 use DocId;

@@ -76,26 +75,9 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
    
    fn doc_freq(&self,) -> usize {
        // TODO not a great idea.
-        panic!("intersectiond does not implement doc freq");
+        panic!("intersection does not implement doc freq");
    }
    
-
-    fn skip_next(&mut self, target: DocId) -> SkipResult {
-        loop {
-            match self.doc().cmp(&target) {
-                Ordering::Equal => {
-                    return SkipResult::Reached;
-                }
-                Ordering::Greater => {
-                    return SkipResult::OverStep;
-                }
-                Ordering::Less => {}
-            }
-            if !self.next() {
-                return SkipResult::End;
-            }
-        }
-    }
 }

 #[inline(never)]
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,7 +1,6 @@
 use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
 use DocId;
-use std::cmp::Ordering;
-use postings::{Postings, FreqHandler, SkipResult, DocSet};
+use postings::{Postings, FreqHandler, DocSet};
 use std::num::Wrapping;


@@ -82,26 +81,6 @@ impl<'a> DocSet for SegmentPostings<'a> {
        self.block_decoder.output(self.index_within_block())
    }

-    // after skipping position
-    // the iterator in such a way that doc() will return a
-    // value greater or equal to target.
-    fn skip_next(&mut self, target: DocId) -> SkipResult {
-        loop {
-            match self.doc().cmp(&target) {
-                Ordering::Equal => {
-                    return SkipResult::Reached;
-                }
-                Ordering::Greater => {
-                    return SkipResult::OverStep;
-                }
-                Ordering::Less => {}
-            }
-            if !self.next() {
-                return SkipResult::End;
-            }
-        }
-    }
-
    fn doc_freq(&self,) -> usize {
        self.doc_freq
    }
--- a/src/postings/union_postings.rs
+++ b/src/postings/union_postings.rs
@@ -1,9 +1,8 @@

 use DocId;
 use postings::{Postings, DocSet};
-use std::collections::BinaryHeap;
-use postings::SkipResult;
 use std::cmp::Ordering;
+use std::collections::BinaryHeap;
 use query::MultiTermScorer;
 use postings::ScoredDocSet;
 use query::Scorer;
@@ -13,7 +12,7 @@ struct HeapItem(DocId, usize, u32);

 impl PartialOrd for HeapItem {
    fn partial_cmp(&self, other:&Self) -> Option<Ordering> {
-         (self.0, self.1).partial_cmp(&(other.0, other.1)).map(|o| o.reverse())
+         Some(self.cmp(&other))
    }
 }

@@ -87,21 +86,9 @@ impl<TPostings: Postings> DocSet for UnionPostings<TPostings> {
        }
    }

-    fn skip_next(&mut self, target: DocId) -> SkipResult {
-        // TODO skip the underlying posting object.
-        loop {
-            match self.doc.cmp(&target) {
-                Ordering::Less => {
-                    if !self.next() {
-                        return SkipResult::End;
-                    }
-                },
-                Ordering::Equal => { return SkipResult::Reached },
-                Ordering::Greater => { return SkipResult::OverStep },
-            }
-        }
-    }
-    
+
+    // TODO implement a faster skip_next
+        
    fn doc(&self,) -> DocId {
        self.doc
    }
--- a/src/postings/writer.rs
+++ b/src/postings/writer.rs
@@ -94,7 +94,7 @@ impl PostingsWriter {
    }

    pub fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> {
-        for (term, postings_id) in self.term_index.iter() {
+        for (term, postings_id) in &self.term_index {
            let term_postings_writer = &self.postings[postings_id.clone()];
            let term_docfreq = term_postings_writer.doc_freq();
            try!(serializer.new_term(&term, term_docfreq));
--- a/src/schema/field.rs
+++ b/src/schema/field.rs
@@ -3,8 +3,6 @@ use std::io::Write;
 use std::io::Read;
 use common::BinarySerializable;

-// TODO impl Copy trait
-
 #[derive(Copy,Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
 pub struct Field(pub u8);

--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -56,7 +56,7 @@ mod tests {
        let store_source = directory.open_read(&path).unwrap();
        let store = StoreReader::new(store_source);
        for i in (0..10).map(|i| i * 3 / 2) {
-            assert_eq!(*store.get(&i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
+            assert_eq!(*store.get(i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
        }
    }

@@ -78,7 +78,7 @@ mod tests {
        let store_source = directory.open_read(&path).unwrap();
        let store = StoreReader::new(store_source);
        b.iter(|| {
-            store.get(&12).unwrap();
+            store.get(12).unwrap();
        });

    }
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -35,14 +35,14 @@ impl StoreReader {
        offsets
    }

-    fn block_offset(&self, seek: &DocId) -> OffsetIndex {
-        fn search(offsets: &[OffsetIndex], seek: &DocId) -> OffsetIndex {
+    fn block_offset(&self, seek: DocId) -> OffsetIndex {
+        fn search(offsets: &[OffsetIndex], seek: DocId) -> OffsetIndex {
            let m = offsets.len() / 2;
            let pivot_offset = &offsets[m];
            if offsets.len() <= 1 {
                return pivot_offset.clone()
            }
-            match pivot_offset.0.cmp(seek) {
+            match pivot_offset.0.cmp(&seek) {
                Ordering::Less => search(&offsets[m..], seek),
                Ordering::Equal => pivot_offset.clone(),
                Ordering::Greater => search(&offsets[..m], seek),
@@ -62,12 +62,12 @@ impl StoreReader {
        lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())
    }

-    pub fn get(&self, doc_id: &DocId) -> io::Result<Document> {
+    pub fn get(&self, doc_id: DocId) -> io::Result<Document> {
        let OffsetIndex(first_doc_id, block_offset) = self.block_offset(doc_id);
        try!(self.read_block(block_offset as usize));
        let mut current_block_mut = self.current_block.borrow_mut();
        let mut cursor = Cursor::new(&mut current_block_mut[..]);
-        for _ in first_doc_id..*doc_id  {
+        for _ in first_doc_id..doc_id  {
            let block_length = try!(u32::deserialize(&mut cursor));
            try!(cursor.seek(SeekFrom::Current(block_length as i64)));
        }
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -52,7 +52,7 @@ impl StoreWriter {
        match reader.offsets.last() {
            Some(&OffsetIndex(ref num_docs, ref body_size)) => {
                try!(self.writer.write_all(&reader.data.as_slice()[0..*body_size as usize]));
-                for &OffsetIndex(doc, offset) in reader.offsets.iter() {
+                for &OffsetIndex(doc, offset) in &reader.offsets {
                    self.offsets.push(OffsetIndex(self.doc + doc, self.written + offset));
                }
                self.written += *body_size;
@@ -68,7 +68,7 @@ impl StoreWriter {
    pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> {
        self.intermediary_buffer.clear();
        try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer));
-        for field_value in field_values.iter() {
+        for field_value in field_values {
            try!((*field_value).serialize(&mut self.intermediary_buffer));
        }
        try!((self.intermediary_buffer.len() as u32).serialize(&mut self.current_block));