mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-20 02:00:40 +00:00
Code cleaning.
This commit is contained in:
10
Cargo.toml
10
Cargo.toml
@@ -4,7 +4,6 @@ version = "0.1.0"
|
||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||
build = "build.rs"
|
||||
|
||||
|
||||
[dependencies]
|
||||
byteorder = "0.4"
|
||||
memmap = "0.2"
|
||||
@@ -34,12 +33,3 @@ gcc = "0.3.24"
|
||||
[[bin]]
|
||||
name = "tantivy-merge"
|
||||
path = "src/cli/merge.rs"
|
||||
|
||||
|
||||
# [profile.release]
|
||||
# opt-level = 3
|
||||
# debug = true
|
||||
# rpath = false
|
||||
# lto = false
|
||||
# debug-assertions = false
|
||||
# codegen-units = 1
|
||||
|
||||
@@ -31,7 +31,7 @@ impl BinarySerializable for () {
|
||||
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
|
||||
fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
|
||||
let mut total_size = try!(VInt(self.len() as u64).serialize(writer));
|
||||
for it in self.iter() {
|
||||
for it in self {
|
||||
total_size += try!(it.serialize(writer));
|
||||
}
|
||||
Ok(total_size)
|
||||
|
||||
@@ -50,9 +50,9 @@ impl SIMDBlockEncoder {
|
||||
|
||||
pub fn compress_vint_sorted(&mut self, input: &[u32], mut offset: u32) -> &[u8] {
|
||||
let mut byte_written = 0;
|
||||
for v in input.iter() {
|
||||
let mut to_encode: u32 = *v - offset;
|
||||
offset = *v;
|
||||
for &v in input {
|
||||
let mut to_encode: u32 = v - offset;
|
||||
offset = v;
|
||||
loop {
|
||||
let next_byte: u8 = (to_encode % 128u32) as u8;
|
||||
to_encode /= 128u32;
|
||||
@@ -72,8 +72,8 @@ impl SIMDBlockEncoder {
|
||||
|
||||
pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] {
|
||||
let mut byte_written = 0;
|
||||
for &i in input.iter() {
|
||||
let mut to_encode: u32 = i;
|
||||
for &v in input {
|
||||
let mut to_encode: u32 = v;
|
||||
loop {
|
||||
let next_byte: u8 = (to_encode % 128u32) as u8;
|
||||
to_encode /= 128u32;
|
||||
@@ -267,15 +267,13 @@ mod tests {
|
||||
.map(|i| 4 + i * 7 / 2)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for offset in [0u32, 1u32, 2u32].iter() {
|
||||
for offset in &[0u32, 1u32, 2u32] {
|
||||
let encoded_data = encoder.compress_vint_sorted(&input, *offset);
|
||||
assert_eq!(encoded_data.len(), expected_length);
|
||||
let mut decoder = SIMDBlockDecoder::new();
|
||||
let remaining_data = decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len());
|
||||
assert_eq!(0, remaining_data.len());
|
||||
for (&decoded, &expected) in decoder.output_array().iter().zip(input.iter()) {
|
||||
assert_eq!(decoded, expected);
|
||||
}
|
||||
assert_eq!(input, decoder.output_array());
|
||||
}
|
||||
}
|
||||
{
|
||||
|
||||
@@ -55,7 +55,7 @@ impl<'a> PostingsMerger<'a> {
|
||||
fn new(readers: &'a Vec<SegmentReader>) -> PostingsMerger<'a> {
|
||||
let mut doc_offsets: Vec<DocId> = Vec::new();
|
||||
let mut max_doc = 0;
|
||||
for reader in readers.iter() {
|
||||
for reader in readers {
|
||||
doc_offsets.push(max_doc);
|
||||
max_doc += reader.max_doc();
|
||||
};
|
||||
@@ -142,7 +142,7 @@ impl IndexMerger {
|
||||
pub fn open(schema: Schema, segments: &Vec<Segment>) -> io::Result<IndexMerger> {
|
||||
let mut readers = Vec::new();
|
||||
let mut max_doc = 0;
|
||||
for segment in segments.iter() {
|
||||
for segment in segments {
|
||||
let reader = try!(SegmentReader::open(segment.clone()));
|
||||
max_doc += reader.max_doc();
|
||||
readers.push(reader);
|
||||
@@ -166,7 +166,7 @@ impl IndexMerger {
|
||||
let mut u32_readers = Vec::new();
|
||||
let mut min_val = u32::min_value();
|
||||
let mut max_val = 0;
|
||||
for reader in self.readers.iter() {
|
||||
for reader in &self.readers {
|
||||
let u32_reader = try!(reader.get_fast_field_reader(field));
|
||||
min_val = min(min_val, u32_reader.min_val());
|
||||
max_val = max(max_val, u32_reader.max_val());
|
||||
|
||||
@@ -18,8 +18,8 @@ impl Searcher {
|
||||
|
||||
pub fn doc(&self, doc_address: &DocAddress) -> io::Result<Document> {
|
||||
// TODO err
|
||||
let DocAddress(ref segment_local_id, ref doc_id) = *doc_address;
|
||||
let segment_reader = &self.segments[*segment_local_id as usize];
|
||||
let DocAddress(segment_local_id, doc_id) = *doc_address;
|
||||
let segment_reader = &self.segments[segment_local_id as usize];
|
||||
segment_reader.doc(doc_id)
|
||||
}
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ impl SegmentReader {
|
||||
/// bearing the given doc id.
|
||||
/// This method is slow and should seldom be called from
|
||||
/// within a collector.
|
||||
pub fn doc(&self, doc_id: &DocId) -> io::Result<Document> {
|
||||
pub fn doc(&self, doc_id: DocId) -> io::Result<Document> {
|
||||
self.store_reader.get(doc_id)
|
||||
}
|
||||
|
||||
|
||||
@@ -97,13 +97,13 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
|
||||
let mut layer_sizes: Vec<u32> = Vec::new();
|
||||
size += self.data_layer.buffer.len() as u32;
|
||||
layer_sizes.push(size);
|
||||
for layer in self.skip_layers.iter() {
|
||||
for layer in &self.skip_layers {
|
||||
size += layer.buffer.len() as u32;
|
||||
layer_sizes.push(size);
|
||||
}
|
||||
try!(layer_sizes.serialize(output));
|
||||
try!(self.data_layer.write(output));
|
||||
for layer in self.skip_layers.iter() {
|
||||
for layer in &self.skip_layers {
|
||||
try!(layer.write(output));
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -141,8 +141,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
||||
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
|
||||
for x in permutation.iter() {
|
||||
add_single_field_doc(&mut fast_field_writers, field, x.clone());
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, field, *x);
|
||||
}
|
||||
fast_field_writers.serialize(&mut serializer).unwrap();
|
||||
serializer.close().unwrap();
|
||||
@@ -196,8 +196,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
||||
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
|
||||
for x in permutation.iter() {
|
||||
add_single_field_doc(&mut fast_field_writers, field, x.clone());
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, field, *x);
|
||||
}
|
||||
fast_field_writers.serialize(&mut serializer).unwrap();
|
||||
serializer.close().unwrap();
|
||||
@@ -228,8 +228,8 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = FastFieldSerializer::new(write).unwrap();
|
||||
let mut fast_field_writers = U32FastFieldsWriter::from_schema(&schema);
|
||||
for x in permutation.iter() {
|
||||
add_single_field_doc(&mut fast_field_writers, field, x.clone());
|
||||
for x in &permutation {
|
||||
add_single_field_doc(&mut fast_field_writers, field, *x);
|
||||
}
|
||||
fast_field_writers.serialize(&mut serializer).unwrap();
|
||||
serializer.close().unwrap();
|
||||
|
||||
@@ -35,7 +35,7 @@ impl U32FastFieldsWriter {
|
||||
}
|
||||
|
||||
pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
|
||||
for field_writer in self.field_writers.iter() {
|
||||
for field_writer in &self.field_writers {
|
||||
try!(field_writer.serialize(serializer));
|
||||
}
|
||||
Ok(())
|
||||
@@ -83,11 +83,11 @@ impl U32FastFieldWriter {
|
||||
|
||||
pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
|
||||
let zero = 0;
|
||||
let min = self.vals.iter().min().unwrap_or(&zero).clone();
|
||||
let max = self.vals.iter().max().unwrap_or(&min).clone();
|
||||
try!(serializer.new_u32_fast_field(self.field.clone(), min, max));
|
||||
for val in self.vals.iter() {
|
||||
try!(serializer.add_val(val.clone()));
|
||||
let min = *self.vals.iter().min().unwrap_or(&zero);
|
||||
let max = *self.vals.iter().max().unwrap_or(&min);
|
||||
try!(serializer.new_u32_fast_field(self.field, min, max));
|
||||
for &val in &self.vals {
|
||||
try!(serializer.add_val(val));
|
||||
}
|
||||
serializer.close_field()
|
||||
}
|
||||
|
||||
@@ -35,15 +35,16 @@ mod macros {
|
||||
}
|
||||
|
||||
mod core;
|
||||
mod datastruct;
|
||||
mod postings;
|
||||
mod directory;
|
||||
|
||||
mod compression;
|
||||
mod fastfield;
|
||||
mod store;
|
||||
mod common;
|
||||
pub mod query;
|
||||
|
||||
pub mod postings;
|
||||
pub mod query;
|
||||
pub mod directory;
|
||||
pub mod datastruct;
|
||||
pub mod analyzer;
|
||||
pub mod collector;
|
||||
|
||||
|
||||
@@ -12,10 +12,10 @@ pub struct ChainedPostings<'a> {
|
||||
impl<'a> ChainedPostings<'a> {
|
||||
|
||||
pub fn new(chained_postings: Vec<OffsetPostings<'a>>) -> ChainedPostings {
|
||||
let mut doc_freq: usize = 0;
|
||||
for segment_postings in chained_postings.iter() {
|
||||
doc_freq += segment_postings.doc_freq();
|
||||
}
|
||||
let doc_freq: usize = chained_postings
|
||||
.iter()
|
||||
.map(|segment_postings| segment_postings.doc_freq())
|
||||
.fold(0, |sum, addition| sum + addition);
|
||||
ChainedPostings {
|
||||
chained_postings: chained_postings,
|
||||
posting_id: 0,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use DocId;
|
||||
use std::borrow::Borrow;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum SkipResult {
|
||||
@@ -18,7 +19,19 @@ pub trait DocSet {
|
||||
// after skipping position
|
||||
// the iterator in such a way that doc() will return a
|
||||
// value greater or equal to target.
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult;
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.next() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
},
|
||||
Ordering::Equal => { return SkipResult::Reached },
|
||||
Ordering::Greater => { return SkipResult::OverStep },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn doc(&self,) -> DocId;
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use postings::DocSet;
|
||||
use postings::SkipResult;
|
||||
use std::cmp::Ordering;
|
||||
use DocId;
|
||||
|
||||
@@ -76,26 +75,9 @@ impl<'a> DocSet for IntersectionDocSet<'a> {
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
// TODO not a great idea.
|
||||
panic!("intersectiond does not implement doc freq");
|
||||
panic!("intersection does not implement doc freq");
|
||||
}
|
||||
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Equal => {
|
||||
return SkipResult::Reached;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
Ordering::Less => {}
|
||||
}
|
||||
if !self.next() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
|
||||
use DocId;
|
||||
use std::cmp::Ordering;
|
||||
use postings::{Postings, FreqHandler, SkipResult, DocSet};
|
||||
use postings::{Postings, FreqHandler, DocSet};
|
||||
use std::num::Wrapping;
|
||||
|
||||
|
||||
@@ -82,26 +81,6 @@ impl<'a> DocSet for SegmentPostings<'a> {
|
||||
self.block_decoder.output(self.index_within_block())
|
||||
}
|
||||
|
||||
// after skipping position
|
||||
// the iterator in such a way that doc() will return a
|
||||
// value greater or equal to target.
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
loop {
|
||||
match self.doc().cmp(&target) {
|
||||
Ordering::Equal => {
|
||||
return SkipResult::Reached;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
return SkipResult::OverStep;
|
||||
}
|
||||
Ordering::Less => {}
|
||||
}
|
||||
if !self.next() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_freq(&self,) -> usize {
|
||||
self.doc_freq
|
||||
}
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
|
||||
use DocId;
|
||||
use postings::{Postings, DocSet};
|
||||
use std::collections::BinaryHeap;
|
||||
use postings::SkipResult;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use query::MultiTermScorer;
|
||||
use postings::ScoredDocSet;
|
||||
use query::Scorer;
|
||||
@@ -13,7 +12,7 @@ struct HeapItem(DocId, usize, u32);
|
||||
|
||||
impl PartialOrd for HeapItem {
|
||||
fn partial_cmp(&self, other:&Self) -> Option<Ordering> {
|
||||
(self.0, self.1).partial_cmp(&(other.0, other.1)).map(|o| o.reverse())
|
||||
Some(self.cmp(&other))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,21 +86,9 @@ impl<TPostings: Postings> DocSet for UnionPostings<TPostings> {
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_next(&mut self, target: DocId) -> SkipResult {
|
||||
// TODO skip the underlying posting object.
|
||||
loop {
|
||||
match self.doc.cmp(&target) {
|
||||
Ordering::Less => {
|
||||
if !self.next() {
|
||||
return SkipResult::End;
|
||||
}
|
||||
},
|
||||
Ordering::Equal => { return SkipResult::Reached },
|
||||
Ordering::Greater => { return SkipResult::OverStep },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO implement a faster skip_next
|
||||
|
||||
fn doc(&self,) -> DocId {
|
||||
self.doc
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ impl PostingsWriter {
|
||||
}
|
||||
|
||||
pub fn serialize(&self, serializer: &mut PostingsSerializer) -> io::Result<()> {
|
||||
for (term, postings_id) in self.term_index.iter() {
|
||||
for (term, postings_id) in &self.term_index {
|
||||
let term_postings_writer = &self.postings[postings_id.clone()];
|
||||
let term_docfreq = term_postings_writer.doc_freq();
|
||||
try!(serializer.new_term(&term, term_docfreq));
|
||||
|
||||
@@ -3,8 +3,6 @@ use std::io::Write;
|
||||
use std::io::Read;
|
||||
use common::BinarySerializable;
|
||||
|
||||
// TODO impl Copy trait
|
||||
|
||||
#[derive(Copy,Clone,Debug,PartialEq,PartialOrd,Eq,Hash)]
|
||||
pub struct Field(pub u8);
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ mod tests {
|
||||
let store_source = directory.open_read(&path).unwrap();
|
||||
let store = StoreReader::new(store_source);
|
||||
for i in (0..10).map(|i| i * 3 / 2) {
|
||||
assert_eq!(*store.get(&i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
|
||||
assert_eq!(*store.get(i).unwrap().get_first(field_title).unwrap().text(), format!("Doc {}", i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ mod tests {
|
||||
let store_source = directory.open_read(&path).unwrap();
|
||||
let store = StoreReader::new(store_source);
|
||||
b.iter(|| {
|
||||
store.get(&12).unwrap();
|
||||
store.get(12).unwrap();
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
@@ -35,14 +35,14 @@ impl StoreReader {
|
||||
offsets
|
||||
}
|
||||
|
||||
fn block_offset(&self, seek: &DocId) -> OffsetIndex {
|
||||
fn search(offsets: &[OffsetIndex], seek: &DocId) -> OffsetIndex {
|
||||
fn block_offset(&self, seek: DocId) -> OffsetIndex {
|
||||
fn search(offsets: &[OffsetIndex], seek: DocId) -> OffsetIndex {
|
||||
let m = offsets.len() / 2;
|
||||
let pivot_offset = &offsets[m];
|
||||
if offsets.len() <= 1 {
|
||||
return pivot_offset.clone()
|
||||
}
|
||||
match pivot_offset.0.cmp(seek) {
|
||||
match pivot_offset.0.cmp(&seek) {
|
||||
Ordering::Less => search(&offsets[m..], seek),
|
||||
Ordering::Equal => pivot_offset.clone(),
|
||||
Ordering::Greater => search(&offsets[..m], seek),
|
||||
@@ -62,12 +62,12 @@ impl StoreReader {
|
||||
lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())
|
||||
}
|
||||
|
||||
pub fn get(&self, doc_id: &DocId) -> io::Result<Document> {
|
||||
pub fn get(&self, doc_id: DocId) -> io::Result<Document> {
|
||||
let OffsetIndex(first_doc_id, block_offset) = self.block_offset(doc_id);
|
||||
try!(self.read_block(block_offset as usize));
|
||||
let mut current_block_mut = self.current_block.borrow_mut();
|
||||
let mut cursor = Cursor::new(&mut current_block_mut[..]);
|
||||
for _ in first_doc_id..*doc_id {
|
||||
for _ in first_doc_id..doc_id {
|
||||
let block_length = try!(u32::deserialize(&mut cursor));
|
||||
try!(cursor.seek(SeekFrom::Current(block_length as i64)));
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ impl StoreWriter {
|
||||
match reader.offsets.last() {
|
||||
Some(&OffsetIndex(ref num_docs, ref body_size)) => {
|
||||
try!(self.writer.write_all(&reader.data.as_slice()[0..*body_size as usize]));
|
||||
for &OffsetIndex(doc, offset) in reader.offsets.iter() {
|
||||
for &OffsetIndex(doc, offset) in &reader.offsets {
|
||||
self.offsets.push(OffsetIndex(self.doc + doc, self.written + offset));
|
||||
}
|
||||
self.written += *body_size;
|
||||
@@ -68,7 +68,7 @@ impl StoreWriter {
|
||||
pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> {
|
||||
self.intermediary_buffer.clear();
|
||||
try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer));
|
||||
for field_value in field_values.iter() {
|
||||
for field_value in field_values {
|
||||
try!((*field_value).serialize(&mut self.intermediary_buffer));
|
||||
}
|
||||
try!((self.intermediary_buffer.len() as u32).serialize(&mut self.current_block));
|
||||
|
||||
Reference in New Issue
Block a user