Code cleaning.

This commit is contained in:
Paul Masurel
2016-07-31 15:34:32 +09:00
parent 0e5d6720ba
commit e486495cb8
20 changed files with 68 additions and 120 deletions

View File

@@ -4,7 +4,6 @@ version = "0.1.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
build = "build.rs"
[dependencies]
byteorder = "0.4"
memmap = "0.2"
@@ -34,12 +33,3 @@ gcc = "0.3.24"
[[bin]]
name = "tantivy-merge"
path = "src/cli/merge.rs"
# [profile.release]
# opt-level = 3
# debug = true
# rpath = false
# lto = false
# debug-assertions = false
# codegen-units = 1

View File

@@ -31,7 +31,7 @@ impl BinarySerializable for () {
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
let mut total_size = try!(VInt(self.len() as u64).serialize(writer));
for it in self.iter() {
for it in self {
total_size += try!(it.serialize(writer));
}
Ok(total_size)

View File

@@ -50,9 +50,9 @@ impl SIMDBlockEncoder {
pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
let mut byte_written = 0;
for v in input.iter() {
let mut to_encode: u32 = *v - offset;
offset = *v;
for &v in input {
let mut to_encode: u32 = v - offset;
offset = v;
loop {
let next_byte: u8 = (to_encode % 128u32) as u8;
to_encode /= 128u32;
@@ -72,8 +72,8 @@ impl SIMDBlockEncoder {
pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
let mut byte_written = 0;
for &i in input.iter() {
let mut to_encode: u32 = i;
for &v in input {
let mut to_encode: u32 = v;
loop {
let next_byte: u8 = (to_encode % 128u32) as u8;
to_encode /= 128u32;
@@ -267,15 +267,13 @@ mod tests {
.map(|i| 4 + i * 7 / 2)
.into_iter()
.collect();
for offset in [0u32, 1u32, 2u32].iter() {
for offset in &[0u32, 1u32, 2u32] {
let encoded_data = encoder.compress_vint_sorted(&input, *offset);
assert_eq!(encoded_data.len(), expected_length);
let mut decoder = SIMDBlockDecoder::new();
let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
assert_eq!(0, remaining_data.len());
for (&decoded, &expected) in decoder.output_array().iter().zip(input.iter()) {
assert_eq!(decoded, expected);
}
assert_eq!(input, decoder.output_array());
}
}
{

View File

@@ -55,7 +55,7 @@ impl<'a> PostingsMerger<'a> {
fn new(readers: &'a Vec<SegmentReader>) -> PostingsMerger<'a> {
let mut doc_offsets: Vec<DocId> = Vec::new();
let mut max_doc = 0;
for reader in readers.iter() {
for reader in readers {
doc_offsets.push(max_doc);
max_doc += reader.max_doc();
};
@@ -142,7 +142,7 @@ impl IndexMerger {
pub fn open(schema: Schema, segments: &Vec<Segment>) -> io::Result<IndexMerger> {
let mut readers = Vec::new();
let mut max_doc = 0;
for segment in segments.iter() {
for segment in segments {
let reader = try!(SegmentReader::open(segment.clone()));
max_doc += reader.max_doc();
readers.push(reader);
@@ -166,7 +166,7 @@ impl IndexMerger {
let mut u32_readers = Vec::new();
let mut min_val = u32::min_value();
let mut max_val = 0;
for reader in self.readers.iter() {
for reader in &self.readers {
let u32_reader = try!(reader.get_fast_field_reader(field));
min_val = min(min_val, u32_reader.min_val());
max_val = max(max_val, u32_reader.max_val());

View File

@@ -18,8 +18,8 @@ impl Searcher {
pub fn doc(&self, doc_address: &DocAddress) -> io::Result<Document> {
// TODO err
let DocAddress(ref segment_local_id, ref doc_id) = *doc_address;
let segment_reader = &self.segments[*segment_local_id as usize];
let DocAddress(segment_local_id, doc_id) = *doc_address;
let segment_reader = &self.segments[segment_local_id as usize];
segment_reader.doc(doc_id)
}

View File

@@ -101,7 +101,7 @@ impl SegmentReader {
/// bearing the given doc id.
/// This method is slow and should seldom be called from
/// within a collector.
pub fn doc(&self, doc_id: &DocId) -> io::Result<Document> {
pub fn doc(&self, doc_id: DocId) -> io::Result<Document> {
self.store_reader.get(doc_id)
}

View File

@@ -97,13 +97,13 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
let mut layer_sizes: Vec<u32> = Vec::new();
size += self.data_layer.buffer.len() as u32;
layer_sizes.push(size);
for layer in self.skip_layers.iter() {
for layer in &self.skip_layers {
size += layer.buffer.len() as u32;
layer_sizes.push(size);
}
try!(layer_sizes.serialize(output));
try!(self.data_layer.write(output));
for layer in self.skip_layers.iter() {
for layer in &self.skip_layers {
try!(layer.write(output));
}
Ok(())

View File

@@ -141,8 +141,8 @@ mod tests {
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
for x in permutation.iter() {
add_single_field_doc(&mut fast_field_writers, field, x.clone());
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -196,8 +196,8 @@ mod tests {
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
for x in permutation.iter() {
add_single_field_doc(&mut fast_field_writers, field, x.clone());
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
@@ -228,8 +228,8 @@ mod tests {
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = FastFieldSerializer::new(write).unwrap();
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
for x in permutation.iter() {
add_single_field_doc(&mut fast_field_writers, field, x.clone());
for x in &permutation {
add_single_field_doc(&mut fast_field_writers, field, *x);
}
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();

View File

@@ -35,7 +35,7 @@ impl U32FastFieldsWriter {
}
pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
for field_writer in self.field_writers.iter() {
for field_writer in &self.field_writers {
try!(field_writer.serialize(serializer));
}
Ok(())
@@ -83,11 +83,11 @@ impl U32FastFieldWriter {
pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
let zero = 0;
let min = self.vals.iter().min().unwrap_or(&zero).clone();
let max = self.vals.iter().max().unwrap_or(&min).clone();
try!(serializer.new_u32_fast_field(self.field.clone(), min, max));
for val in self.vals.iter() {
try!(serializer.add_val(val.clone()));
let min = *self.vals.iter().min().unwrap_or(&zero);
let max = *self.vals.iter().max().unwrap_or(&min);
try!(serializer.new_u32_fast_field(self.field, min, max));
for &val in &self.vals {
try!(serializer.add_val(val));
}
serializer.close_field()
}

View File

@@ -35,15 +35,16 @@ mod macros {
}
mod core;
mod datastruct;
mod postings;
mod directory;
mod compression;
mod fastfield;
mod store;
mod common;
pub mod query;
pub mod postings;
pub mod query;
pub mod directory;
pub mod datastruct;
pub mod analyzer;
pub mod collector;

View File

@@ -12,10 +12,10 @@ pub struct ChainedPostings<'a> {
impl<'a> ChainedPostings<'a> {
pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
let mut doc_freq: usize = 0;
for segment_postings in chained_postings.iter() {
doc_freq += segment_postings.doc_freq();
}
let doc_freq: usize = chained_postings
.iter()
.map(|segment_postings| segment_postings.doc_freq())
.fold(0, |sum, addition| sum + addition);
ChainedPostings {
chained_postings: chained_postings,
posting_id: 0,

View File

@@ -1,6 +1,7 @@
use DocId;
use std::borrow::Borrow;
use std::borrow::BorrowMut;
use std::cmp::Ordering;
#[derive(PartialEq, Eq, Debug)]
pub enum SkipResult {
@@ -18,7 +19,19 @@ pub trait DocSet {
// after skipping position
// the iterator in such a way that doc() will return a
// value greater or equal to target.
fn skip_next(&mut self, target: DocId) -> SkipResult;
fn skip_next(&mut self, target: DocId) -> SkipResult {
loop {
match self.doc().cmp(&target) {
Ordering::Less => {
if !self.next() {
return SkipResult::End;
}
},
Ordering::Equal => { return SkipResult::Reached },
Ordering::Greater => { return SkipResult::OverStep },
}
}
}
fn doc(&self,) -> DocId;

View File

@@ -1,5 +1,4 @@
use postings::DocSet;
use postings::SkipResult;
use std::cmp::Ordering;
use DocId;
@@ -76,26 +75,9 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
fn doc_freq(&self,) -> usize {
// TODO not a great idea.
panic!("intersectiond does not implement doc freq");
panic!("intersection does not implement doc freq");
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
loop {
match self.doc().cmp(&target) {
Ordering::Equal => {
return SkipResult::Reached;
}
Ordering::Greater => {
return SkipResult::OverStep;
}
Ordering::Less => {}
}
if !self.next() {
return SkipResult::End;
}
}
}
}
#[inline(never)]

View File

@@ -1,7 +1,6 @@
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
use DocId;
use std::cmp::Ordering;
use postings::{Postings, FreqHandler, SkipResult, DocSet};
use postings::{Postings, FreqHandler, DocSet};
use std::num::Wrapping;
@@ -82,26 +81,6 @@ impl<'a> DocSet for SegmentPostings<'a> {
self.block_decoder.output(self.index_within_block())
}
// after skipping position
// the iterator in such a way that doc() will return a
// value greater or equal to target.
fn skip_next(&mut self, target: DocId) -> SkipResult {
loop {
match self.doc().cmp(&target) {
Ordering::Equal => {
return SkipResult::Reached;
}
Ordering::Greater => {
return SkipResult::OverStep;
}
Ordering::Less => {}
}
if !self.next() {
return SkipResult::End;
}
}
}
fn doc_freq(&self,) -> usize {
self.doc_freq
}

View File

@@ -1,9 +1,8 @@
use DocId;
use postings::{Postings, DocSet};
use std::collections::BinaryHeap;
use postings::SkipResult;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use query::MultiTermScorer;
use postings::ScoredDocSet;
use query::Scorer;
@@ -13,7 +12,7 @@ struct HeapItem(DocId, usize, u32);
impl PartialOrd for HeapItem {
fn partial_cmp(&self, other:&Self) -> Option<Ordering> {
(self.0, self.1).partial_cmp(&(other.0, other.1)).map(|o| o.reverse())
Some(self.cmp(&other))
}
}
@@ -87,21 +86,9 @@ impl<TPostings: Postings> DocSet for UnionPostings<TPostings> {
}
}
fn skip_next(&mut self, target: DocId) -> SkipResult {
// TODO skip the underlying posting object.
loop {
match self.doc.cmp(&target) {
Ordering::Less => {
if !self.next() {
return SkipResult::End;
}
},
Ordering::Equal => { return SkipResult::Reached },
Ordering::Greater => { return SkipResult::OverStep },
}
}
}
// TODO implement a faster skip_next
fn doc(&self,) -> DocId {
self.doc
}

View File

@@ -94,7 +94,7 @@ impl PostingsWriter {
}
pub fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> {
for (term, postings_id) in self.term_index.iter() {
for (term, postings_id) in &self.term_index {
let term_postings_writer = &self.postings[postings_id.clone()];
let term_docfreq = term_postings_writer.doc_freq();
try!(serializer.new_term(&term, term_docfreq));

View File

@@ -3,8 +3,6 @@ use std::io::Write;
use std::io::Read;
use common::BinarySerializable;
// TODO impl Copy trait
#[derive(Copy,Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
pub struct Field(pub u8);

View File

@@ -56,7 +56,7 @@ mod tests {
let store_source = directory.open_read(&path).unwrap();
let store = StoreReader::new(store_source);
for i in (0..10).map(|i| i * 3 / 2) {
assert_eq!(*store.get(&i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
assert_eq!(*store.get(i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
}
}
@@ -78,7 +78,7 @@ mod tests {
let store_source = directory.open_read(&path).unwrap();
let store = StoreReader::new(store_source);
b.iter(|| {
store.get(&12).unwrap();
store.get(12).unwrap();
});
}

View File

@@ -35,14 +35,14 @@ impl StoreReader {
offsets
}
fn block_offset(&self, seek: &DocId) -> OffsetIndex {
fn search(offsets: &[OffsetIndex], seek: &DocId) -> OffsetIndex {
fn block_offset(&self, seek: DocId) -> OffsetIndex {
fn search(offsets: &[OffsetIndex], seek: DocId) -> OffsetIndex {
let m = offsets.len() / 2;
let pivot_offset = &offsets[m];
if offsets.len() <= 1 {
return pivot_offset.clone()
}
match pivot_offset.0.cmp(seek) {
match pivot_offset.0.cmp(&seek) {
Ordering::Less => search(&offsets[m..], seek),
Ordering::Equal => pivot_offset.clone(),
Ordering::Greater => search(&offsets[..m], seek),
@@ -62,12 +62,12 @@ impl StoreReader {
lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())
}
pub fn get(&self, doc_id: &DocId) -> io::Result<Document> {
pub fn get(&self, doc_id: DocId) -> io::Result<Document> {
let OffsetIndex(first_doc_id, block_offset) = self.block_offset(doc_id);
try!(self.read_block(block_offset as usize));
let mut current_block_mut = self.current_block.borrow_mut();
let mut cursor = Cursor::new(&mut current_block_mut[..]);
for _ in first_doc_id..*doc_id {
for _ in first_doc_id..doc_id {
let block_length = try!(u32::deserialize(&mut cursor));
try!(cursor.seek(SeekFrom::Current(block_length as i64)));
}

View File

@@ -52,7 +52,7 @@ impl StoreWriter {
match reader.offsets.last() {
Some(&OffsetIndex(ref num_docs, ref body_size)) => {
try!(self.writer.write_all(&reader.data.as_slice()[0..*body_size as usize]));
for &OffsetIndex(doc, offset) in reader.offsets.iter() {
for &OffsetIndex(doc, offset) in &reader.offsets {
self.offsets.push(OffsetIndex(self.doc + doc, self.written + offset));
}
self.written += *body_size;
@@ -68,7 +68,7 @@ impl StoreWriter {
pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> {
self.intermediary_buffer.clear();
try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer));
for field_value in field_values.iter() {
for field_value in field_values {
try!((*field_value).serialize(&mut self.intermediary_buffer));
}
try!((self.intermediary_buffer.len() as u32).serialize(&mut self.current_block));