diff --git a/Cargo.toml b/Cargo.toml index 75e7899b2..2d16c5bdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,5 +44,3 @@ gcc = "0.3" # [profile.release] # debug = true - - diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 04e41d674..960ff641f 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -24,7 +24,7 @@ fn main() { fn create_schema() -> Schema { // We need to declare a schema // to create a new index. - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); // TEXT | STORED is some syntactic sugar to describe // how tantivy should index this field. @@ -69,7 +69,7 @@ fn run(index_path: &Path) -> tantivy::Result<()> { let body = schema.get_field("body").unwrap(); - let mut old_man_doc = Document::new(); + let mut old_man_doc = Document::default(); old_man_doc.add_text(title, "The Old Man and the Sea"); old_man_doc.add_text(body, "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without taking a fish."); diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index 2c72b284a..f34e4b8d9 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -18,29 +18,22 @@ pub trait StreamingIterator<'a, T> { impl<'a, 'b> TokenIter<'b> { fn consume_token(&'a mut self) -> Option<&'a str> { - loop { - match self.chars.next() { - Some(c) => { - if c.is_alphanumeric() { - append_char_lowercase(c, &mut self.term_buffer); - } - else { - break; - } - }, - None => { - break; - } + for c in &mut self.chars { + if c.is_alphanumeric() { + append_char_lowercase(c, &mut self.term_buffer); + } + else { + break; } } - return Some(&self.term_buffer); + Some(&self.term_buffer) } } impl<'a, 'b> StreamingIterator<'a, &'a str> for TokenIter<'b> { - #[inline(always)] + #[inline] fn next(&'a mut self,) -> Option<&'a str> { self.term_buffer.clear(); // skipping non-letter characters. diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs index da7bd24ad..a0b73e4b1 100644 --- a/src/collector/chained_collector.rs +++ b/src/collector/chained_collector.rs @@ -7,11 +7,11 @@ use ScoredDoc; pub struct DoNothingCollector; impl Collector for DoNothingCollector { - #[inline(always)] + #[inline] fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> { Ok(()) } - #[inline(always)] + #[inline] fn collect(&mut self, _: ScoredDoc) {} } @@ -29,7 +29,7 @@ impl ChainedCollector { } } - pub fn add<'b, C: Collector>(self, new_collector: &'b mut C) -> ChainedCollector> { + pub fn push(self, new_collector: &mut C) -> ChainedCollector> { ChainedCollector { left: self, right: MutRefCollector(new_collector), @@ -79,11 +79,11 @@ mod tests { #[test] fn test_chained_collector() { let mut top_collector = TopCollector::with_limit(2); - let mut count_collector = CountCollector::new(); + let mut count_collector = CountCollector::default(); { let mut collectors = chain() - .add(&mut top_collector) - .add(&mut count_collector); + .push(&mut top_collector) + .push(&mut count_collector); collectors.collect(ScoredDoc(0.2, 1)); collectors.collect(ScoredDoc(0.1, 2)); collectors.collect(ScoredDoc(0.5, 3)); diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 2d28da863..4cbd709e5 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -11,13 +11,6 @@ pub struct CountCollector { } impl CountCollector { - pub fn new() -> CountCollector { - CountCollector { - count: 0, - } - } - - // Returns the count of document that where // collected. pub fn count(&self,) -> usize { @@ -25,6 +18,14 @@ impl CountCollector { } } +impl Default for CountCollector { + fn default() -> CountCollector { + CountCollector { + count: 0, + } + } +} + impl Collector for CountCollector { fn set_segment(&mut self, _: SegmentLocalId, _: &SegmentReader) -> io::Result<()> { @@ -47,7 +48,7 @@ mod tests { #[bench] fn build_collector(b: &mut Bencher) { b.iter(|| { - let mut count_collector = CountCollector::new(); + let mut count_collector = CountCollector::default(); let docs: Vec = (0..1_000_000).collect(); for doc in docs { count_collector.collect(ScoredDoc(1f32, doc)); diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 7e459686d..792ddc908 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -29,17 +29,19 @@ pub struct TestCollector { } impl TestCollector { - pub fn new() -> TestCollector { + pub fn docs(self,) -> Vec { + self.docs + } +} + +impl Default for TestCollector { + fn default() -> TestCollector { TestCollector { docs: Vec::new(), offset: 0, segment_max_doc: 0, } } - - pub fn docs(self,) -> Vec { - self.docs - } } impl Collector for TestCollector { @@ -101,7 +103,7 @@ mod tests { #[bench] fn build_collector(b: &mut Bencher) { b.iter(|| { - let mut count_collector = CountCollector::new(); + let mut count_collector = CountCollector::default(); let docs: Vec = (0..1_000_000).collect(); for doc in docs { count_collector.collect(ScoredDoc(1f32, doc)); diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 11bbd53eb..1a393d5a4 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -19,14 +19,14 @@ impl<'a> MultiCollector<'a> { impl<'a> Collector for MultiCollector<'a> { fn set_segment(&mut self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> io::Result<()> { - for collector in self.collectors.iter_mut() { + for collector in &mut self.collectors { try!(collector.set_segment(segment_local_id, segment)); } Ok(()) } fn collect(&mut self, scored_doc: ScoredDoc) { - for collector in self.collectors.iter_mut() { + for collector in &mut self.collectors { collector.collect(scored_doc); } } @@ -44,7 +44,7 @@ mod tests { #[test] fn test_multi_collector() { let mut top_collector = TopCollector::with_limit(2); - let mut count_collector = CountCollector::new(); + let mut count_collector = CountCollector::default(); { let mut collectors = MultiCollector::from(vec!(&mut top_collector, &mut count_collector)); collectors.collect(ScoredDoc(0.2, 1)); diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 4ac17886f..45f79f82a 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -19,7 +19,7 @@ impl PartialOrd for GlobalScoredDoc { } impl Ord for GlobalScoredDoc { - #[inline(always)] + #[inline] fn cmp(&self, other: &GlobalScoredDoc) -> Ordering { other.0.partial_cmp(&self.0) .unwrap_or( @@ -30,7 +30,7 @@ impl Ord for GlobalScoredDoc { impl PartialEq for GlobalScoredDoc { fn eq(&self, other: &GlobalScoredDoc) -> bool { - self.cmp(&other) == Ordering::Equal + self.cmp(other) == Ordering::Equal } } @@ -77,7 +77,7 @@ impl TopCollector { .collect() } - #[inline(always)] + #[inline] pub fn at_capacity(&self, ) -> bool { self.heap.len() >= self.limit } diff --git a/src/common/serialize.rs b/src/common/serialize.rs index be0b59437..13ec26584 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -59,7 +59,7 @@ impl BinarySerializable for impl BinarySerializable for u32 { fn serialize(&self, writer: &mut Write) -> io::Result { - writer.write_u32::(self.clone()) + writer.write_u32::(*self) .map(|_| 4) .map_err(convert_byte_order_error) } @@ -73,7 +73,7 @@ impl BinarySerializable for u32 { impl BinarySerializable for u64 { fn serialize(&self, writer: &mut Write) -> io::Result { - writer.write_u64::(self.clone()) + writer.write_u64::(*self) .map(|_| 8) .map_err(convert_byte_order_error) } @@ -87,7 +87,7 @@ impl BinarySerializable for u64 { impl BinarySerializable for u8 { fn serialize(&self, writer: &mut Write) -> io::Result { // TODO error - try!(writer.write_u8(self.clone()).map_err(convert_byte_order_error)); + try!(writer.write_u8(*self).map_err(convert_byte_order_error)); Ok(1) } fn deserialize(reader: &mut Read) -> io::Result { diff --git a/src/common/timer.rs b/src/common/timer.rs index 0ee3ddf82..ae1d3959e 100644 --- a/src/common/timer.rs +++ b/src/common/timer.rs @@ -41,11 +41,6 @@ pub struct TimerTree { } impl TimerTree { - pub fn new() -> TimerTree { - TimerTree { - timings: Vec::new(), - } - } pub fn total_time(&self,) -> i64 { self.timings.last().unwrap().duration @@ -61,6 +56,13 @@ impl TimerTree { } } +impl Default for TimerTree { + fn default() -> TimerTree { + TimerTree { + timings: Vec::new(), + } + } +} #[cfg(test)] @@ -70,7 +72,7 @@ mod tests { #[test] fn test_timer() { - let mut timer_tree = TimerTree::new(); + let mut timer_tree = TimerTree::default(); { let mut a = timer_tree.open("a"); { diff --git a/src/common/vint.rs b/src/common/vint.rs index 6803b435f..70b1f9589 100644 --- a/src/common/vint.rs +++ b/src/common/vint.rs @@ -11,13 +11,13 @@ pub struct VInt(pub u64); impl VInt { pub fn val(&self,) -> u64 { - self.0.clone() + self.0 } } impl BinarySerializable for VInt { fn serialize(&self, writer: &mut Write) -> io::Result { - let mut remaining = self.0.clone(); + let mut remaining = self.0; let mut written: usize = 0; let mut buffer = [0u8; 10]; loop { diff --git a/src/compression/composite.rs b/src/compression/composite.rs index 7d3e01ffc..e338131cf 100644 --- a/src/compression/composite.rs +++ b/src/compression/composite.rs @@ -22,7 +22,7 @@ impl CompositeEncoder { let mut offset = 0u32; for i in 0..num_blocks { let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK]; - let block_compressed = self.block_encoder.compress_block_sorted(&vals_slice, offset); + let block_compressed = self.block_encoder.compress_block_sorted(vals_slice, offset); offset = vals_slice[NUM_DOCS_PER_BLOCK - 1]; self.output.extend_from_slice(block_compressed); } @@ -36,7 +36,7 @@ impl CompositeEncoder { let num_blocks = vals.len() / NUM_DOCS_PER_BLOCK; for i in 0..num_blocks { let vals_slice = &vals[i * NUM_DOCS_PER_BLOCK .. (i + 1) * NUM_DOCS_PER_BLOCK]; - let block_compressed = self.block_encoder.compress_block_unsorted(&vals_slice); + let block_compressed = self.block_encoder.compress_block_unsorted(vals_slice); self.output.extend_from_slice(block_compressed); } let vint_compressed = self.block_encoder.compress_vint_unsorted(&vals[num_blocks * NUM_DOCS_PER_BLOCK..]); diff --git a/src/compression/simdcomp.rs b/src/compression/simdcomp.rs index fdf049c90..e2b6a7da3 100644 --- a/src/compression/simdcomp.rs +++ b/src/compression/simdcomp.rs @@ -67,7 +67,7 @@ impl SIMDBlockEncoder { } } } - return &self.output[..byte_written]; + &self.output[..byte_written] } pub fn compress_vint_unsorted(&mut self, input: &[u32]) -> &[u8] { @@ -88,7 +88,7 @@ impl SIMDBlockEncoder { } } } - return &self.output[..byte_written]; + &self.output[..byte_written] } } @@ -170,12 +170,12 @@ impl SIMDBlockDecoder { &compressed_data[read_byte..] } - #[inline(always)] + #[inline] pub fn output_array(&self,) -> &[u32] { &self.output[..self.output_len] } - #[inline(always)] + #[inline] pub fn output(&self, idx: usize) -> u32 { self.output[idx] } diff --git a/src/core/index.rs b/src/core/index.rs index cdd25c608..ae042380e 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -131,7 +131,7 @@ impl Index { docstamp: u64) -> Result<()> { { let mut meta_write = try!(self.metas.write()); - meta_write.segments.extend(segment_ids); + meta_write.segments.extend_from_slice(segment_ids); meta_write.docstamp = docstamp; } try!(self.save_metas()); @@ -167,7 +167,7 @@ impl Index { } - pub fn segment(&self, segment_id: SegmentId) -> Segment { + fn segment(&self, segment_id: SegmentId) -> Segment { Segment::new(self.clone(), segment_id) } @@ -193,7 +193,7 @@ impl Index { } pub fn new_segment(&self,) -> Segment { - self.segment(SegmentId::new()) + self.segment(SegmentId::generate_random()) } pub fn save_metas(&mut self,) -> Result<()> { diff --git a/src/core/segment_id.rs b/src/core/segment_id.rs index aa30379a0..70ab88788 100644 --- a/src/core/segment_id.rs +++ b/src/core/segment_id.rs @@ -8,7 +8,7 @@ use std::path::PathBuf; pub struct SegmentId(Uuid); impl SegmentId { - pub fn new() -> SegmentId { + pub fn generate_random() -> SegmentId { SegmentId(Uuid::new_v4()) } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 4afd2edb5..2859d65a0 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -53,11 +53,11 @@ impl SegmentReader { pub fn get_fast_field_reader(&self, field: Field) -> io::Result { let field_entry = self.schema.get_field_entry(field); - match field_entry.field_type() { - &FieldType::Str(_) => { + match *field_entry.field_type() { + FieldType::Str(_) => { Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields.")) }, - &FieldType::U32(_) => { + FieldType::U32(_) => { // TODO check that the schema allows that //Err(io::Error::new(io::ErrorKind::Other, "fast field are not yet supported for text fields.")) self.fast_fields_reader.get_field(field) @@ -106,8 +106,8 @@ impl SegmentReader { let fieldnorms_reader = try!(U32FastFieldsReader::open(fieldnorms_data)); let positions_data = segment - .open_read(SegmentComponent::POSITIONS) - .unwrap_or(ReadOnlySource::empty()); + .open_read(SegmentComponent::POSITIONS) + .unwrap_or_else(|_| ReadOnlySource::empty()); let schema = segment.schema(); Ok(SegmentReader { @@ -145,19 +145,19 @@ impl SegmentReader { let term_info = get!(self.get_term_info(&term)); let offset = term_info.postings_offset as usize; let postings_data = &self.postings_data[offset..]; - let freq_handler = match field_entry.field_type() { - &FieldType::Str(ref options) => { + let freq_handler = match *field_entry.field_type() { + FieldType::Str(ref options) => { let indexing_options = options.get_indexing_options(); match option { SegmentPostingsOption::NoFreq => { - FreqHandler::new() + FreqHandler::new_without_freq() } SegmentPostingsOption::Freq => { if indexing_options.is_termfreq_enabled() { FreqHandler::new_with_freq() } else { - FreqHandler::new() + FreqHandler::new_without_freq() } } SegmentPostingsOption::FreqAndPositions => { @@ -170,34 +170,34 @@ impl SegmentReader { FreqHandler::new_with_freq() } else { - FreqHandler::new() + FreqHandler::new_without_freq() } } } } _ => { - FreqHandler::new() + FreqHandler::new_without_freq() } }; - Some(SegmentPostings::from_data(term_info.doc_freq, &postings_data, freq_handler)) + Some(SegmentPostings::from_data(term_info.doc_freq, postings_data, freq_handler)) } pub fn read_postings_all_info(&self, term: &Term) -> SegmentPostings { let field_entry = self.schema.get_field_entry(term.get_field()); - let segment_posting_option = match field_entry.field_type() { - &FieldType::Str(ref text_options) => { + let segment_posting_option = match *field_entry.field_type() { + FieldType::Str(ref text_options) => { match text_options.get_indexing_options() { TextIndexingOptions::TokenizedWithFreq => SegmentPostingsOption::Freq, TextIndexingOptions::TokenizedWithFreqAndPosition => SegmentPostingsOption::FreqAndPositions, _ => SegmentPostingsOption::NoFreq, } } - &FieldType::U32(_) => SegmentPostingsOption::NoFreq + FieldType::U32(_) => SegmentPostingsOption::NoFreq }; self.read_postings(term, segment_posting_option).expect("Read postings all info should not return None") } - pub fn get_term_info<'a>(&'a self, term: &Term) -> Option { + pub fn get_term_info(&self, term: &Term) -> Option { self.term_infos.get(term.as_slice()) } } diff --git a/src/datastruct/fstmap.rs b/src/datastruct/fstmap.rs index 9adf33458..ecc141672 100644 --- a/src/datastruct/fstmap.rs +++ b/src/datastruct/fstmap.rs @@ -1,3 +1,5 @@ +#![allow(should_implement_trait)] + use std::io; use std::io::Seek; use std::io::Write; @@ -79,9 +81,10 @@ impl<'a, V: 'static + BinarySerializable> FstKeyIter<'a, V> { } } + impl FstMap { - pub fn keys<'a>(&'a self,) -> FstKeyIter<'a, V> { + pub fn keys(&self,) -> FstKeyIter { FstKeyIter { streamer: self.fst_index.stream(), __phantom__: PhantomData, diff --git a/src/datastruct/skip/skiplist.rs b/src/datastruct/skip/skiplist.rs index 61b9f8bf8..a34dede89 100644 --- a/src/datastruct/skip/skiplist.rs +++ b/src/datastruct/skip/skiplist.rs @@ -106,16 +106,14 @@ impl<'a, T: BinarySerializable> SkipList<'a, T> { let mut next_layer_skip: Option<(DocId, u32)> = None; for skip_layer_id in 0..self.skip_layers.len() { let mut skip_layer: &mut Layer<'a, u32> = &mut self.skip_layers[skip_layer_id]; - match next_layer_skip { - Some((_, offset)) => { skip_layer.seek_offset(offset as usize); }, - None => {} - }; - next_layer_skip = skip_layer.seek(doc_id); + if let Some((_, offset)) = next_layer_skip { + skip_layer.seek_offset(offset as usize); + } + next_layer_skip = skip_layer.seek(doc_id); + } + if let Some((_, offset)) = next_layer_skip { + self.data_layer.seek_offset(offset as usize); } - match next_layer_skip { - Some((_, offset)) => { self.data_layer.seek_offset(offset as usize); }, - None => {} - }; self.data_layer.seek(doc_id) } diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs index 41969ff68..e12444f11 100644 --- a/src/datastruct/skip/skiplist_builder.rs +++ b/src/datastruct/skip/skiplist_builder.rs @@ -37,14 +37,14 @@ impl LayerBuilder { self.remaining -= 1; self.len += 1; let offset = self.written_size() as u32; // TODO not sure if we want after or here - let res; - if self.remaining == 0 { - self.remaining = self.period; - res = Some((doc_id, offset)); - } - else { - res = None; - } + let res = + if self.remaining == 0 { + self.remaining = self.period; + Some((doc_id, offset)) + } + else { + None + }; try!(doc_id.serialize(&mut self.buffer)); try!(value.serialize(&mut self.buffer)); Ok(res) @@ -69,7 +69,7 @@ impl SkipListBuilder { } } - fn get_skip_layer<'a>(&'a mut self, layer_id: usize) -> &mut LayerBuilder { + fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder { if layer_id == self.skip_layers.len() { let layer_builder = LayerBuilder::with_period(self.period); self.skip_layers.push(layer_builder); diff --git a/src/datastruct/stacker/expull.rs b/src/datastruct/stacker/expull.rs index e38088fab..96f3a766a 100644 --- a/src/datastruct/stacker/expull.rs +++ b/src/datastruct/stacker/expull.rs @@ -43,7 +43,7 @@ impl ExpUnrolledLinkedList { // and we need to add 1u32 to store the pointer // to the next element. let new_block_size: usize = (self.len as usize + 1) * mem::size_of::(); - let new_block_addr: u32 = heap.allocate(new_block_size); + let new_block_addr: u32 = heap.allocate_space(new_block_size); heap.set(self.end, &new_block_addr); self.end = new_block_addr; } @@ -130,7 +130,7 @@ mod tests { #[test] fn test_stack() { let heap = Heap::with_capacity(1_000_000); - let (addr, stack) = heap.new::(); + let (addr, stack) = heap.allocate_object::(); stack.push(1u32, &heap); stack.push(2u32, &heap); stack.push(4u32, &heap); @@ -167,7 +167,7 @@ mod tests { bench.iter(|| { let mut stacks = Vec::with_capacity(100); for _ in 0..NUM_STACK { - let (_, stack) = heap.new::(); + let (_, stack) = heap.allocate_object::(); stacks.push(stack); } for s in 0..NUM_STACK { diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs index b07374a20..4ebb53927 100644 --- a/src/datastruct/stacker/hashmap.rs +++ b/src/datastruct/stacker/hashmap.rs @@ -61,7 +61,7 @@ impl<'a, V> HashMap<'a, V> where V: From { } } - #[inline(always)] + #[inline] fn bucket(&self, key: &[u8]) -> usize { let hash: u64 = djb2(key); (hash as usize) & self.mask @@ -81,7 +81,7 @@ impl<'a, V> HashMap<'a, V> where V: From { } pub fn iter<'b: 'a>(&'b self,) -> impl Iterator + 'b { - let heap: &'a Heap = &self.heap; + let heap: &'a Heap = self.heap; let table: &'b [KeyValue] = &self.table; self.occupied .iter() @@ -96,7 +96,7 @@ impl<'a, V> HashMap<'a, V> where V: From { } pub fn values_mut<'b: 'a>(&'b self,) -> impl Iterator + 'b { - let heap: &'a Heap = &self.heap; + let heap: &'a Heap = self.heap; let table: &'b [KeyValue] = &self.table; self.occupied .iter() @@ -112,7 +112,7 @@ impl<'a, V> HashMap<'a, V> where V: From { self.heap.get_mut_ref(addr) } Entry::Vacant(bucket) => { - let (addr, val): (u32, &mut V) = self.heap.new(); + let (addr, val): (u32, &mut V) = self.heap.allocate_object(); self.set_bucket(key.as_ref(), bucket, addr); val } diff --git a/src/datastruct/stacker/heap.rs b/src/datastruct/stacker/heap.rs index 18142e584..ead18c054 100644 --- a/src/datastruct/stacker/heap.rs +++ b/src/datastruct/stacker/heap.rs @@ -44,12 +44,12 @@ impl Heap { self.inner().num_free_bytes() } - pub fn allocate(&self, num_bytes: usize) -> u32 { - self.inner().allocate(num_bytes) + pub fn allocate_space(&self, num_bytes: usize) -> u32 { + self.inner().allocate_space(num_bytes) } - pub fn new>(&self,) -> (u32, &mut V) { - let addr = self.inner().allocate(mem::size_of::()); + pub fn allocate_object>(&self,) -> (u32, &mut V) { + let addr = self.inner().allocate_space(mem::size_of::()); let v: V = V::from(addr); self.inner().set(addr, &v); (addr, self.inner().get_mut_ref(addr)) @@ -115,7 +115,7 @@ impl InnerHeap { } } - pub fn allocate(&mut self, num_bytes: usize) -> u32 { + pub fn allocate_space(&mut self, num_bytes: usize) -> u32 { let addr = self.used; self.used += num_bytes as u32; if self.used <= self.buffer_len { @@ -126,7 +126,7 @@ impl InnerHeap { warn!("Exceeded heap size. The margin was apparently unsufficient. The segment will be committed right after indexing this very last document."); self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize))); } - self.next_heap.as_mut().unwrap().allocate(num_bytes) + self.buffer_len + self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len } @@ -151,7 +151,7 @@ impl InnerHeap { } fn allocate_and_set(&mut self, data: &[u8]) -> BytesRef { - let start = self.allocate(data.len()); + let start = self.allocate_space(data.len()); let stop = start + data.len() as u32; self.get_mut_slice(start, stop).clone_from_slice(data); BytesRef { diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 99396ee90..4fed52a04 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -11,9 +11,9 @@ use std::marker::Sync; /// /// There is currently two implementations of `Directory` /// -/// - The [MMapDirectory](struct.MmapDirectory.html), this +/// - The [`MMapDirectory`](struct.MmapDirectory.html), this /// should be your default choice. -/// - The [RAMDirectory](struct.RAMDirectory.html), which +/// - The [`RAMDirectory`](struct.RAMDirectory.html), which /// should be used mostly for tests. /// pub trait Directory: fmt::Debug + Send + Sync + 'static { diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index b97f7c77e..1ee6aacd1 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -23,7 +23,7 @@ use std::fs; use directory::shared_vec_slice::SharedVecSlice; -/// Directory storing data in files, read via MMap. +/// Directory storing data in files, read via mmap. /// /// The Mmap object are cached to limit the /// system calls. @@ -98,7 +98,7 @@ impl MmapDirectory { } /// This Write wraps a File, but has the specificity of -/// call sync_all on flush. +/// call `sync_all` on flush. struct SafeFileWriter { writer: BufWriter, } diff --git a/src/directory/mod.rs b/src/directory/mod.rs index 73091c2ed..ec07239f5 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -17,7 +17,7 @@ impl SeekableWrite for T {} /// Write object for Directory. /// -/// WritePtr are required to implement both Write +/// `WritePtr` are required to implement both Write /// and Seek. pub type WritePtr = Box; diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index b429dae34..e355ad771 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -78,13 +78,13 @@ impl InnerDirectory { InnerDirectory(Arc::new(RwLock::new(HashMap::new()))) } - fn write(&self, path: PathBuf, data: &Vec) -> io::Result { + fn write(&self, path: PathBuf, data: &[u8]) -> io::Result { let mut map = try!( self.0 .write() .map_err(|_| make_io_err(format!("Failed to lock the directory, when trying to write {:?}", path))) ); - let prev_value = map.insert(path, Arc::new(data.clone())); + let prev_value = map.insert(path, Arc::new(Vec::from(data))); Ok(prev_value.is_some()) } diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index f56356268..ee1078d6a 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -32,6 +32,10 @@ impl ReadOnlySource { pub fn len(&self,) -> usize { self.as_slice().len() } + + pub fn is_empty(&self,) -> bool { + self.len() != 0 + } /// Creates an empty ReadOnlySource pub fn empty() -> ReadOnlySource { @@ -52,7 +56,7 @@ impl ReadOnlySource { /// Creates a cursor over the data. - pub fn cursor<'a>(&'a self) -> Cursor<&'a [u8]> { + pub fn cursor(&self) -> Cursor<&[u8]> { Cursor::new(&*self) } diff --git a/src/error.rs b/src/error.rs index 7dc50a88c..532bd25fb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,5 @@ +#![allow(enum_variant_names)] + use std::io; use std::result; use std::path::PathBuf; diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 8137d7fd5..0ecbf1f30 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -15,7 +15,7 @@ fn count_leading_zeros(mut val: u32) -> u8 { val <<= 1; result += 1; } - return result; + result } @@ -44,7 +44,7 @@ mod tests { lazy_static! { static ref SCHEMA: Schema = { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); schema_builder.add_u32_field("field", FAST); schema_builder.build() }; @@ -65,7 +65,7 @@ mod tests { } fn add_single_field_doc(fast_field_writers: &mut U32FastFieldsWriter, field: Field, value: u32) { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_u32(field, value); fast_field_writers.add_document(&doc); } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 7731fd140..ade459789 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -58,7 +58,8 @@ impl U32FastFieldReader { let bit_shift = (doc * self.num_bits) - addr * 8; //doc - long_addr * self.num_in_pack; let val_unshifted_unmasked: u64 = unsafe { * (self.data_ptr.offset(addr as isize) as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u32; - return self.min_val + (val_shifted & self.mask); + self.min_val + (val_shifted & self.mask) + } } @@ -79,13 +80,13 @@ impl U32FastFieldsReader { } let mut end_offsets: Vec = field_offsets .iter() - .map(|&(_, offset)| offset.clone()) + .map(|&(_, offset)| offset) .collect(); end_offsets.push(header_offset); let mut field_offsets_map: HashMap = HashMap::new(); for (field_start_offsets, stop_offset) in field_offsets.iter().zip(end_offsets.iter().skip(1)) { - let (field, start_offset) = field_start_offsets.clone(); - field_offsets_map.insert(field.clone(), (start_offset.clone(), stop_offset.clone())); + let (field, start_offset) = *field_start_offsets; + field_offsets_map.insert(field, (start_offset, *stop_offset)); } Ok(U32FastFieldsReader { field_offsets: field_offsets_map, diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index c1a360aad..4527533df 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -30,14 +30,13 @@ impl U32FastFieldsWriter { } pub fn get_field_writer(&mut self, field: Field) -> Option<&mut U32FastFieldWriter> { - self.field_writers + self.field_writers .iter_mut() - .filter(|field_writer| field_writer.field == field) - .next() + .find(|field_writer| field_writer.field == field) } pub fn add_document(&mut self, doc: &Document) { - for field_writer in self.field_writers.iter_mut() { + for field_writer in &mut self.field_writers { field_writer.add_document(doc); } } @@ -69,7 +68,7 @@ pub struct U32FastFieldWriter { impl U32FastFieldWriter { pub fn new(field: Field) -> U32FastFieldWriter { U32FastFieldWriter { - field: field.clone(), + field: field, vals: Vec::new(), } } @@ -94,13 +93,13 @@ impl U32FastFieldWriter { match doc.get_first(self.field) { Some(v) => { match *v { - Value::U32(ref val) => { return *val; } + Value::U32(ref val) => { *val } _ => { panic!("Expected a u32field, got {:?} ", v) } } }, None => { // TODO make default value configurable - return 0u32; + 0u32 } } } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 6c688c08a..0b9271567 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -29,14 +29,21 @@ pub const HEAP_SIZE_LIMIT: u32 = MARGIN_IN_BYTES * 3u32; // Add document will block if the number of docs waiting in the queue to be indexed reaches PIPELINE_MAX_SIZE_IN_DOCS const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000; + +type DocumentSender = chan::Sender; +type DocumentReceiver = chan::Receiver; + +type NewSegmentSender = chan::Sender>; +type NewSegmentReceiver = chan::Receiver>; + pub struct IndexWriter { index: Index, heap_size_in_bytes_per_thread: usize, workers_join_handle: Vec>, - segment_ready_sender: chan::Sender>, - segment_ready_receiver: chan::Receiver>, - document_receiver: chan::Receiver, - document_sender: chan::Sender, + segment_ready_sender: NewSegmentSender, + segment_ready_receiver: NewSegmentReceiver, + document_receiver: DocumentReceiver, + document_sender: DocumentSender, num_threads: usize, docstamp: u64, } @@ -109,8 +116,8 @@ impl IndexWriter { if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize { panic!(format!("The heap size per thread needs to be at least {}.", HEAP_SIZE_LIMIT)); } - let (document_sender, document_receiver): (chan::Sender, chan::Receiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); - let (segment_ready_sender, segment_ready_receiver): (chan::Sender>, chan::Receiver>) = chan::async(); + let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); + let (segment_ready_sender, segment_ready_receiver): (NewSegmentSender, NewSegmentReceiver) = chan::async(); let mut index_writer = IndexWriter { heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread, index: index.clone(), @@ -133,7 +140,7 @@ impl IndexWriter { Ok(()) } - pub fn merge(&mut self, segments: &Vec) -> Result<()> { + pub fn merge(&mut self, segments: &[Segment]) -> Result<()> { let schema = self.index.schema(); let merger = try!(IndexMerger::open(schema, segments)); let mut merged_segment = self.index.new_segment(); @@ -152,9 +159,9 @@ impl IndexWriter { /// when no documents are remaining. /// /// Returns the former segment_ready channel. - fn recreate_channels(&mut self,) -> (chan::Receiver, chan::Receiver>) { - let (mut document_sender, mut document_receiver): (chan::Sender, chan::Receiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); - let (mut segment_ready_sender, mut segment_ready_receiver): (chan::Sender>, chan::Receiver>) = chan::async(); + fn recreate_channels(&mut self,) -> (DocumentReceiver, chan::Receiver>) { + let (mut document_sender, mut document_receiver): (DocumentSender, DocumentReceiver) = chan::sync(PIPELINE_MAX_SIZE_IN_DOCS); + let (mut segment_ready_sender, mut segment_ready_receiver): (NewSegmentSender, NewSegmentReceiver) = chan::async(); swap(&mut self.document_sender, &mut document_sender); swap(&mut self.document_receiver, &mut document_receiver); swap(&mut self.segment_ready_sender, &mut segment_ready_sender); @@ -282,7 +289,7 @@ mod tests { #[test] fn test_commit_and_rollback() { - let mut schema_builder = schema::SchemaBuilder::new(); + let mut schema_builder = schema::SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", schema::TEXT); let index = Index::create_in_ram(schema_builder.build()); @@ -297,7 +304,7 @@ mod tests { // writing the segment let mut index_writer = index.writer_with_num_threads(3, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a"); index_writer.add_document(doc).unwrap(); } @@ -305,12 +312,12 @@ mod tests { assert_eq!(num_docs_containing("a"), 0); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "b"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "c"); index_writer.add_document(doc).unwrap(); } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index d81bee5c2..b05f48eda 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -26,7 +26,7 @@ struct PostingsMerger<'a> { doc_offsets: Vec, heap: BinaryHeap, term_streams: Vec>, - readers: &'a Vec, + readers: &'a [SegmentReader], } #[derive(PartialEq, Eq, Debug)] @@ -43,12 +43,12 @@ impl PartialOrd for HeapItem { impl Ord for HeapItem { fn cmp(&self, other: &HeapItem) -> Ordering { - return (&other.term, &other.segment_ord).cmp(&(&self.term, &self.segment_ord)) + (&other.term, &other.segment_ord).cmp(&(&self.term, &self.segment_ord)) } } impl<'a> PostingsMerger<'a> { - fn new(readers: &'a Vec) -> PostingsMerger<'a> { + fn new(readers: &'a [SegmentReader]) -> PostingsMerger<'a> { let mut doc_offsets: Vec = Vec::new(); let mut max_doc = 0; for reader in readers { @@ -74,15 +74,12 @@ impl<'a> PostingsMerger<'a> { // pushes the term_reader associated with the given segment ordinal // into the heap. fn push_next_segment_el(&mut self, segment_ord: usize) { - match self.term_streams[segment_ord].next() { - Some(term) => { - let it = HeapItem { - term: Term::from(term), - segment_ord: segment_ord, - }; - self.heap.push(it); - } - None => {} + if let Some(term) = self.term_streams[segment_ord].next() { + let it = HeapItem { + term: Term::from(term), + segment_ord: segment_ord, + }; + self.heap.push(it); } } @@ -100,6 +97,12 @@ impl<'a> PostingsMerger<'a> { self.push_next_segment_el(heap_item.segment_ord); } +} + +impl<'a> Iterator for PostingsMerger<'a> { + + type Item = (Term, ChainedPostings<'a>); + fn next(&mut self,) -> Option<(Term, ChainedPostings<'a>)> { // TODO remove the Vec allocations match self.heap.pop() { @@ -122,6 +125,7 @@ impl<'a> PostingsMerger<'a> { } } + pub struct IndexMerger { schema: Schema, readers: Vec, @@ -145,20 +149,18 @@ impl DeltaPositionComputer { self.buffer.resize(positions.len(), 0u32); } let mut last_pos = 0u32; - let num_positions = positions.len(); - for i in 0..num_positions { - let position = positions[i]; + for (i, position) in positions.iter().cloned().enumerate() { self.buffer[i] = position - last_pos; last_pos = position; } - &self.buffer[..num_positions] + &self.buffer[..positions.len()] } } impl IndexMerger { - pub fn open(schema: Schema, segments: &Vec) -> Result { + pub fn open(schema: Schema, segments: &[Segment]) -> Result { let mut readers = Vec::new(); let mut max_doc = 0; for segment in segments { @@ -232,26 +234,21 @@ impl IndexMerger { } fn write_postings(&self, postings_serializer: &mut PostingsSerializer) -> Result<()> { - let mut postings_merger = PostingsMerger::new(&self.readers); + let postings_merger = PostingsMerger::new(&self.readers); let mut delta_position_computer = DeltaPositionComputer::new(); - loop { - match postings_merger.next() { - Some((term, mut merged_doc_ids)) => { - try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId)); - while merged_doc_ids.advance() { - let delta_positions: &[u32] = delta_position_computer.compute_delta_positions(merged_doc_ids.positions()); - try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), delta_positions)); - } - try!(postings_serializer.close_term()); - } - None => { break; } + for (term, mut merged_doc_ids) in postings_merger { + try!(postings_serializer.new_term(&term, merged_doc_ids.len() as DocId)); + while merged_doc_ids.advance() { + let delta_positions: &[u32] = delta_position_computer.compute_delta_positions(merged_doc_ids.positions()); + try!(postings_serializer.write_doc(merged_doc_ids.doc(), merged_doc_ids.term_freq(), delta_positions)); } + try!(postings_serializer.close_term()); } Ok(()) } fn write_storable_fields(&self, store_writer: &mut StoreWriter) -> Result<()> { - for reader in self.readers.iter() { + for reader in &self.readers { let store_reader = reader.get_store_reader(); try!(store_writer.stack_reader(store_reader)); } @@ -284,10 +281,10 @@ mod tests { #[test] fn test_index_merger() { - let mut schema_builder = schema::SchemaBuilder::new(); - let text_fieldtype = schema::TextOptions::new().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored(); + let mut schema_builder = schema::SchemaBuilder::default(); + let text_fieldtype = schema::TextOptions::default().set_indexing_options(TextIndexingOptions::TokenizedWithFreq).set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); - let score_fieldtype = schema::U32Options::new().set_fast(); + let score_fieldtype = schema::U32Options::default().set_fast(); let score_field = schema_builder.add_u32_field("score", score_fieldtype); let index = Index::create_in_ram(schema_builder.build()); @@ -296,19 +293,19 @@ mod tests { { // writing the segment { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af b"); doc.add_u32(score_field, 3); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); doc.add_u32(score_field, 5); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c d"); doc.add_u32(score_field, 7); index_writer.add_document(doc).unwrap(); @@ -319,13 +316,13 @@ mod tests { { // writing the segment { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af b"); doc.add_u32(score_field, 11); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c g"); doc.add_u32(score_field, 13); index_writer.add_document(doc).unwrap(); @@ -341,7 +338,7 @@ mod tests { { let searcher = index.searcher(); let get_doc_ids = |terms: Vec| { - let mut collector = TestCollector::new(); + let mut collector = TestCollector::default(); let query = MultiTermQuery::from(terms); assert!(searcher.search(&query, &mut collector).is_ok()); collector.docs() diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 13b9aa179..07af613cc 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -40,8 +40,8 @@ fn create_fieldnorms_writer(schema: &Schema) -> U32FastFieldsWriter { } fn posting_from_field_entry<'a>(field_entry: &FieldEntry, heap: &'a Heap) -> Box { - match field_entry.field_type() { - &FieldType::Str(ref text_options) => { + match *field_entry.field_type() { + FieldType::Str(ref text_options) => { match text_options.get_indexing_options() { TextIndexingOptions::TokenizedWithFreq => { SpecializedPostingsWriter::::new_boxed(heap) @@ -54,7 +54,7 @@ fn posting_from_field_entry<'a>(field_entry: &FieldEntry, heap: &'a Heap) -> Box } } } - &FieldType::U32(_) => { + FieldType::U32(_) => { SpecializedPostingsWriter::::new_boxed(heap) } } @@ -90,7 +90,7 @@ impl<'a> SegmentWriter<'a> { // enforced by the fact that "self" is moved. pub fn finalize(mut self,) -> Result<()> { let segment_info = self.segment_info(); - for per_field_postings_writer in self.per_field_postings_writers.iter_mut() { + for per_field_postings_writer in &mut self.per_field_postings_writers { per_field_postings_writer.close(self.heap); } write(&self.per_field_postings_writers, @@ -112,17 +112,18 @@ impl<'a> SegmentWriter<'a> { let field_options = schema.get_field_entry(field); match *field_options.field_type() { FieldType::Str(ref text_options) => { - let mut num_tokens = 0; - if text_options.get_indexing_options().is_tokenized() { - num_tokens = field_posting_writer.index_text(doc_id, field, &field_values, self.heap); - } - else { - for field_value in field_values { - let term = Term::from_field_text(field, field_value.value().text()); - field_posting_writer.suscribe(doc_id, 0, &term, self.heap); - num_tokens += 1u32; + let num_tokens: u32 = + if text_options.get_indexing_options().is_tokenized() { + field_posting_writer.index_text(doc_id, field, &field_values, self.heap) } - } + else { + let num_field_values = field_values.len() as u32; + for field_value in field_values { + let term = Term::from_field_text(field, field_value.value().text()); + field_posting_writer.suscribe(doc_id, 0, &term, self.heap); + } + num_field_values + }; self.fieldnorms_writer .get_field_writer(field) .map(|field_norms_writer| { @@ -141,7 +142,7 @@ impl<'a> SegmentWriter<'a> { } self.fieldnorms_writer.fill_val_up_to(doc_id); - self.fast_field_writers.add_document(&doc); + self.fast_field_writers.add_document(doc); let stored_fieldvalues: Vec<&FieldValue> = doc .get_fields() .iter() @@ -166,7 +167,7 @@ impl<'a> SegmentWriter<'a> { } -fn write<'a>(per_field_postings_writers: &Vec>, +fn write<'a>(per_field_postings_writers: &[Box], fast_field_writers: &U32FastFieldsWriter, fieldnorms_writer: &U32FastFieldsWriter, segment_info: SegmentInfo, diff --git a/src/lib.rs b/src/lib.rs index 257897112..4ab767806 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,8 @@ +#![allow(unknown_lints)] +#![allow(module_inception)] + + + /*! Tantivy is a search engine library. @@ -138,7 +143,7 @@ mod tests { #[test] fn test_indexing() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_from_tempdir(schema).unwrap(); @@ -146,17 +151,17 @@ mod tests { // writing the segment let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af b"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } @@ -167,31 +172,31 @@ mod tests { #[test] fn test_docfreq() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); index_writer.commit().unwrap(); } { { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a a"); index_writer.add_document(doc).unwrap(); } index_writer.commit().unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "c"); index_writer.add_document(doc).unwrap(); index_writer.commit().unwrap(); @@ -212,22 +217,22 @@ mod tests { #[test] fn test_fieldnorm() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { - let doc = Document::new(); + let doc = Document::default(); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b"); index_writer.add_document(doc).unwrap(); } @@ -246,7 +251,7 @@ mod tests { #[test] fn test_termfreq() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -254,7 +259,7 @@ mod tests { // writing the segment let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af af af bc bc"); index_writer.add_document(doc).unwrap(); } @@ -273,7 +278,7 @@ mod tests { #[test] fn test_searcher() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -282,17 +287,17 @@ mod tests { // writing the segment let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af af af b"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } @@ -302,7 +307,7 @@ mod tests { let searcher = index.searcher(); let get_doc_ids = |terms: Vec| { let query = MultiTermQuery::from(terms); - let mut collector = TestCollector::new(); + let mut collector = TestCollector::default(); assert!(searcher.search(&query, &mut collector).is_ok()); collector.docs() }; @@ -342,7 +347,7 @@ mod tests { #[test] fn test_searcher_2() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -351,17 +356,17 @@ mod tests { // writing the segment let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "af b"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c"); index_writer.add_document(doc).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b c d"); index_writer.add_document(doc).unwrap(); } diff --git a/src/postings/chained_postings.rs b/src/postings/chained_postings.rs index debaa9567..9b1408c64 100644 --- a/src/postings/chained_postings.rs +++ b/src/postings/chained_postings.rs @@ -37,7 +37,7 @@ impl<'a> DocSet for ChainedPostings<'a> { return false; } } - return true + true } fn doc(&self,) -> DocId { diff --git a/src/postings/freq_handler.rs b/src/postings/freq_handler.rs index 71a785eb5..b02ec8d1f 100644 --- a/src/postings/freq_handler.rs +++ b/src/postings/freq_handler.rs @@ -28,7 +28,7 @@ fn read_positions(data: &[u8]) -> Vec { impl FreqHandler { - pub fn new() -> FreqHandler { + pub fn new_without_freq() -> FreqHandler { FreqHandler { freq_decoder: SIMDBlockDecoder::with_val(1u32), positions: Vec::new(), @@ -110,7 +110,7 @@ impl FreqHandler { } } - #[inline(always)] + #[inline] pub fn freq(&self, idx: usize)-> u32 { self.freq_decoder.output(idx) } diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs index 77319d53e..4069a602b 100644 --- a/src/postings/intersection.rs +++ b/src/postings/intersection.rs @@ -21,13 +21,13 @@ impl<'a> IntersectionDocSet<'a> { pub fn new(mut postings: Vec>) -> IntersectionDocSet<'a> { let left = postings.pop().unwrap(); - let right; - if postings.len() == 1 { - right = postings.pop().unwrap(); - } - else { - right = Box::new(IntersectionDocSet::new(postings)); - } + let right = + if postings.len() == 1 { + postings.pop().unwrap() + } + else { + Box::new(IntersectionDocSet::new(postings)) + }; IntersectionDocSet::from_pair(left, right) } } @@ -74,13 +74,11 @@ impl<'a> DocSet for IntersectionDocSet<'a> { } } -#[inline(never)] pub fn intersection<'a, TDocSet: DocSet + 'a>(postings: Vec) -> IntersectionDocSet<'a> { let boxed_postings: Vec> = postings .into_iter() - .map(|postings| { - let boxed_p: Box = Box::new(postings); - boxed_p + .map(|postings: TDocSet| { + Box::new(postings) as Box }) .collect(); IntersectionDocSet::new(boxed_postings) diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 8dd90457f..6f65a1878 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -46,7 +46,7 @@ mod tests { #[test] pub fn test_position_write() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -66,7 +66,7 @@ mod tests { #[test] pub fn test_position_and_fieldnorm_write_fullstack() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); @@ -75,18 +75,18 @@ mod tests { { let mut segment_writer = SegmentWriter::for_segment(&heap, segment.clone(), &schema).unwrap(); { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "a b a c a d a a."); doc.add_text(text_field, "d d d d a"); // checking that position works if the field has two values. segment_writer.add_document(&doc, &schema).unwrap(); } { - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "b a"); segment_writer.add_document(&doc, &schema).unwrap(); } for i in 2..1000 { - let mut doc = Document::new(); + let mut doc = Document::default(); let mut text = iter::repeat("e ").take(i).collect::(); text.push_str(" a"); doc.add_text(text_field, &text); diff --git a/src/postings/postings.rs b/src/postings/postings.rs index e18894d83..5af6a859b 100644 --- a/src/postings/postings.rs +++ b/src/postings/postings.rs @@ -47,6 +47,9 @@ impl<'a, TPostings: Postings> Postings for &'a mut TPostings { pub trait HasLen { fn len(&self,) -> usize; + fn is_empty(&self,) -> bool { + self.len() == 0 + } } impl HasLen for Box { @@ -56,7 +59,6 @@ impl HasLen for Box { } } - impl<'a> HasLen for &'a HasLen { fn len(&self,) -> usize { let unref: &HasLen = *self; diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 2a204ecda..2ccaaa5fc 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -17,7 +17,7 @@ pub trait PostingsWriter { fn serialize(&self, serializer: &mut PostingsSerializer, heap: &Heap) -> io::Result<()>; - fn index_text<'a>(&mut self, doc_id: DocId, field: Field, field_values: &Vec<&'a FieldValue>, heap: &Heap) -> u32 { + fn index_text<'a>(&mut self, doc_id: DocId, field: Field, field_values: &[&'a FieldValue], heap: &Heap) -> u32 { let mut pos = 0u32; let mut num_tokens: u32 = 0u32; let mut term = Term::allocate(field, 100); @@ -25,16 +25,11 @@ pub trait PostingsWriter { let mut tokens = SimpleTokenizer.tokenize(field_value.value().text()); // right now num_tokens and pos are redundant, but it should // change when we get proper analyzers - loop { - match tokens.next() { - Some(token) => { - term.set_text(token); - self.suscribe(doc_id, pos, &term, heap); - pos += 1u32; - num_tokens += 1u32; - }, - None => { break; } - } + while let Some(token) = tokens.next() { + term.set_text(token); + self.suscribe(doc_id, pos, &term, heap); + pos += 1u32; + num_tokens += 1u32; } pos += 1; // THIS is to avoid phrase query accross field repetition. @@ -52,13 +47,13 @@ fn hashmap_size_in_bits(heap_capacity: u32) -> usize { let num_buckets_usable = heap_capacity / 100; let hash_table_size = num_buckets_usable * 2; let mut pow = 512; - for num_bit in 10 .. 32 { - pow = pow << 1; + for num_bits in 10 .. 32 { + pow <<= 1; if pow > hash_table_size { - return num_bit; + return num_bits; } } - return 32 + 32 } impl<'a, Rec: Recorder + 'static> SpecializedPostingsWriter<'a, Rec> { @@ -86,7 +81,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<' } } - #[inline(always)] + #[inline] fn suscribe(&mut self, doc: DocId, position: u32, term: &Term, heap: &Heap) { let mut recorder = self.term_index.get_or_create(term); let current_doc = recorder.current_doc(); diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index c4c354b6c..31d384aeb 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -168,31 +168,26 @@ impl Recorder for TFAndPositionRecorder { fn serialize(&self, self_addr: u32, serializer: &mut PostingsSerializer, heap: &Heap) -> io::Result<()> { let mut doc_positions = Vec::with_capacity(100); let mut positions_iter = self.stack.iter(self_addr, heap); - loop { - if let Some(doc) = positions_iter.next() { - let mut prev_position = 0; - doc_positions.clear(); - loop { - match positions_iter.next() { - Some(position) => { - if position == POSITION_END { - break; - } - else { - doc_positions.push(position - prev_position); - prev_position = position; - } + while let Some(doc) = positions_iter.next() { + let mut prev_position = 0; + doc_positions.clear(); + loop { + match positions_iter.next() { + Some(position) => { + if position == POSITION_END { + break; } - None => { - panic!("This should never happen. Pleasee report the bug."); + else { + doc_positions.push(position - prev_position); + prev_position = position; } } + None => { + panic!("This should never happen. Pleasee report the bug."); + } } - try!(serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions)); - } - else { - break; } + try!(serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions)); } Ok(()) } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 61e034562..99b2aa65c 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -25,7 +25,7 @@ impl<'a> SegmentPostings<'a> { len: 0, doc_offset: 0, block_decoder: SIMDBlockDecoder::new(), - freq_handler: FreqHandler::new(), + freq_handler: FreqHandler::new_without_freq(), remaining_data: &EMPTY_ARRAY, cur: Wrapping(usize::max_value()), } @@ -55,7 +55,6 @@ impl<'a> SegmentPostings<'a> { } } - #[inline(always)] fn index_within_block(&self,) -> usize { self.cur.0 % NUM_DOCS_PER_BLOCK } @@ -67,7 +66,7 @@ impl<'a> DocSet for SegmentPostings<'a> { // goes to the next element. // next needs to be called a first time to point to the correct element. - #[inline(always)] + #[inline] fn advance(&mut self,) -> bool { self.cur += Wrapping(1); if self.cur.0 >= self.len { @@ -76,10 +75,10 @@ impl<'a> DocSet for SegmentPostings<'a> { if self.index_within_block() == 0 { self.load_next_block(); } - return true; + true } - #[inline(always)] + #[inline] fn doc(&self,) -> DocId { self.block_decoder.output(self.index_within_block()) } diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 2e8233dd4..a390e8b70 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -62,11 +62,11 @@ impl PostingsSerializer { pub fn load_indexing_options(&mut self, field: Field) { let field_entry: &FieldEntry = self.schema.get_field_entry(field); - self.text_indexing_options = match field_entry.field_type() { - &FieldType::Str(ref text_options) => { + self.text_indexing_options = match *field_entry.field_type() { + FieldType::Str(ref text_options) => { text_options.get_indexing_options() } - &FieldType::U32(ref u32_options) => { + FieldType::U32(ref u32_options) => { if u32_options.is_indexed() { TextIndexingOptions::Unindexed } diff --git a/src/postings/vec_postings.rs b/src/postings/vec_postings.rs index 773efec9b..c980bc4ac 100644 --- a/src/postings/vec_postings.rs +++ b/src/postings/vec_postings.rs @@ -80,10 +80,10 @@ impl DocSet for VecPostings { } self.cursor = Wrapping(start + 1); if self.cursor.0 < self.doc_ids.len() { - return SkipResult::OverStep; + SkipResult::OverStep } else { - return SkipResult::End; + SkipResult::End } } } diff --git a/src/query/daat_multiterm_scorer.rs b/src/query/daat_multiterm_scorer.rs index 798942e4a..cd00bcbee 100644 --- a/src/query/daat_multiterm_scorer.rs +++ b/src/query/daat_multiterm_scorer.rs @@ -14,8 +14,8 @@ use Score; struct HeapItem(DocId, u32); impl PartialOrd for HeapItem { - fn partial_cmp(&self, other:&Self) -> Option { - Some(self.cmp(&other)) + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } } @@ -35,7 +35,7 @@ impl Filter { (self.and_mask & ord_set) == self.result } - fn new(occurs: &Vec) -> Filter { + fn new(occurs: &[Occur]) -> Filter { let mut and_mask = 0u64; let mut result = 0u64; for (i, occur) in occurs.iter().enumerate() { @@ -167,24 +167,19 @@ impl DocSet for DAATMul } } self.advance_head(); - loop { - match self.queue.peek() { - Some(&HeapItem(peek_doc, peek_ord)) => { - if peek_doc != self.doc { - break; - } - else { - let peek_ord: usize = peek_ord as usize; - let peek_tf = self.term_frequencies[peek_ord]; - let peek_fieldnorm = self.get_field_norm(peek_ord, peek_doc); - self.similarity.update(peek_ord, peek_tf, peek_fieldnorm); - ord_bitset |= 1 << peek_ord; - } - } - None => { break; } + while let Some(&HeapItem(peek_doc, peek_ord)) = self.queue.peek() { + if peek_doc == self.doc { + let peek_ord: usize = peek_ord as usize; + let peek_tf = self.term_frequencies[peek_ord]; + let peek_fieldnorm = self.get_field_norm(peek_ord, peek_doc); + self.similarity.update(peek_ord, peek_tf, peek_fieldnorm); + ord_bitset |= 1 << peek_ord; + } + else { + break; } self.advance_head(); - } + } if self.filter.accept(ord_bitset) { return true; } diff --git a/src/query/multi_term_query.rs b/src/query/multi_term_query.rs index 12a5c8c7d..eaca20fa0 100644 --- a/src/query/multi_term_query.rs +++ b/src/query/multi_term_query.rs @@ -40,7 +40,7 @@ impl MultiTermQuery { let num_docs = searcher.num_docs() as f32; let idfs: Vec = self.occur_terms .iter() - .map(|&(_, ref term)| searcher.doc_freq(&term)) + .map(|&(_, ref term)| searcher.doc_freq(term)) .map(|doc_freq| { if doc_freq == 0 { 1. @@ -73,13 +73,10 @@ impl MultiTermQuery { let mut decode_timer = timer.open("decode_all"); for &(occur, ref term) in &self.occur_terms { let _decode_one_timer = decode_timer.open("decode_one"); - match reader.read_postings(&term, SegmentPostingsOption::Freq) { - Some(postings) => { - let field = term.get_field(); - let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field)); - postings_and_fieldnorms.push((occur, postings, fieldnorm_reader)); - } - None => {} + if let Some(postings) = reader.read_postings(term, SegmentPostingsOption::Freq) { + let field = term.get_field(); + let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field)); + postings_and_fieldnorms.push((occur, postings, fieldnorm_reader)); } } } @@ -120,7 +117,7 @@ impl Query for MultiTermQuery { doc_address: &DocAddress) -> Result { let segment_reader = searcher.segment_reader(doc_address.segment_ord() as usize); let similitude = SimilarityExplainer::from(self.similitude(searcher)); - let mut timer_tree = TimerTree::new(); + let mut timer_tree = TimerTree::default(); let mut postings = try!( self.search_segment( segment_reader, @@ -144,7 +141,7 @@ impl Query for MultiTermQuery { &self, searcher: &Searcher, collector: &mut C) -> Result { - let mut timer_tree = TimerTree::new(); + let mut timer_tree = TimerTree::default(); { let mut search_timer = timer_tree.open("search"); for (segment_ord, segment_reader) in searcher.segment_readers().iter().enumerate() { diff --git a/src/query/phrase_query.rs b/src/query/phrase_query.rs index b4ec5b21e..a0ce11748 100644 --- a/src/query/phrase_query.rs +++ b/src/query/phrase_query.rs @@ -18,7 +18,7 @@ pub struct PhraseQuery { impl Query for PhraseQuery { fn search(&self, searcher: &Searcher, collector: &mut C) -> io::Result { - let mut timer_tree = TimerTree::new(); + let mut timer_tree = TimerTree::default(); { let mut search_timer = timer_tree.open("search"); for (segment_ord, segment_reader) in searcher.segments().iter().enumerate() { diff --git a/src/query/query_parser.rs b/src/query/query_parser.rs index 4c45a536f..2a24731e4 100644 --- a/src/query/query_parser.rs +++ b/src/query/query_parser.rs @@ -30,8 +30,8 @@ pub enum StandardQuery { impl StandardQuery { pub fn num_terms(&self,) -> usize { - match self { - &StandardQuery::MultiTerm(ref q) => { + match *self { + StandardQuery::MultiTerm(ref q) => { q.num_terms() } } @@ -51,8 +51,8 @@ impl Query for StandardQuery { &self, searcher: &Searcher, doc_address: &DocAddress) -> tantivy_Error { - match self { - &StandardQuery::MultiTerm(ref q) => q.explain(searcher, doc_address) + match *self { + StandardQuery::MultiTerm(ref q) => q.explain(searcher, doc_address) } } } @@ -62,13 +62,8 @@ fn compute_terms(field: Field, text: &str) -> Vec { let tokenizer = SimpleTokenizer::new(); let mut tokens = Vec::new(); let mut token_it = tokenizer.tokenize(text); - loop { - match token_it.next() { - Some(token_str) => { - tokens.push(Term::from_field_text(field, token_str)); - } - None => { break; } - } + while let Some(token_str) = token_it.next() { + tokens.push(Term::from_field_text(field, token_str)); } tokens } @@ -86,11 +81,11 @@ impl QueryParser { fn transform_field_and_value(&self, field: Field, val: &str) -> Result, ParsingError> { let field_entry = self.schema.get_field_entry(field); - Ok(match field_entry.field_type() { - &FieldType::Str(_) => { + Ok(match *field_entry.field_type() { + FieldType::Str(_) => { compute_terms(field, val) }, - &FieldType::U32(_) => { + FieldType::U32(_) => { let u32_parsed: u32 = try!(val .parse::() .map_err(|_| { @@ -282,7 +277,7 @@ mod tests { #[test] pub fn test_query_parser() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", STRING); let title_field = schema_builder.add_text_field("title", STRING); let author_field = schema_builder.add_text_field("author", STRING); diff --git a/src/query/similarity.rs b/src/query/similarity.rs index b911cd435..f3fbb4409 100644 --- a/src/query/similarity.rs +++ b/src/query/similarity.rs @@ -4,5 +4,5 @@ use query::MultiTermAccumulator; pub trait Similarity: MultiTermAccumulator { fn score(&self, ) -> Score; - fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation; + fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation; } \ No newline at end of file diff --git a/src/query/similarity_explainer.rs b/src/query/similarity_explainer.rs index 19588e329..679cad103 100644 --- a/src/query/similarity_explainer.rs +++ b/src/query/similarity_explainer.rs @@ -42,7 +42,7 @@ impl Similarity for SimilarityExplainer) -> Explanation { + fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation { self.scorer.explain(vals) } } diff --git a/src/query/tfidf.rs b/src/query/tfidf.rs index 3f978ac78..e5fcd7981 100644 --- a/src/query/tfidf.rs +++ b/src/query/tfidf.rs @@ -14,14 +14,14 @@ pub struct TfIdf { impl MultiTermAccumulator for TfIdf { - #[inline(always)] + #[inline] fn update(&mut self, term_ord: usize, term_freq: u32, fieldnorm: u32) { assert!(term_freq != 0u32); self.score += self.term_score(term_ord, term_freq, fieldnorm); self.num_fields += 1; } - #[inline(always)] + #[inline] fn clear(&mut self,) { self.score = 0f32; self.num_fields = 0; @@ -39,7 +39,6 @@ impl TfIdf { } } - #[inline(always)] fn coord(&self,) -> f32 { self.coords[self.num_fields] } @@ -49,25 +48,25 @@ impl TfIdf { } fn term_name(&self, ord: usize) -> String { - match &self.term_names { - &Some(ref term_names_vec) => term_names_vec[ord].clone(), - &None => format!("Field({})", ord) + match self.term_names { + Some(ref term_names_vec) => term_names_vec[ord].clone(), + None => format!("Field({})", ord) } } - #[inline(always)] + #[inline] fn term_score(&self, term_ord: usize, term_freq: u32, field_norm: u32) -> f32 { (term_freq as f32 / field_norm as f32).sqrt() * self.idf[term_ord] } } impl Similarity for TfIdf { - #[inline(always)] + #[inline] fn score(&self, ) -> Score { self.score * self.coord() } - fn explain(&self, vals: &Vec<(usize, u32, u32)>) -> Explanation { + fn explain(&self, vals: &[(usize, u32, u32)]) -> Explanation { let score = self.score(); let mut explanation = Explanation::with_val(score); let formula_components: Vec = vals.iter() @@ -76,7 +75,7 @@ impl Similarity for TfIdf { .collect(); let formula = format!(" * ({})", formula_components.join(" + ")); explanation.set_formula(&formula); - for &(ord, term_freq, field_norm) in vals.iter() { + for &(ord, term_freq, field_norm) in vals { let term_score = self.term_score(ord, term_freq, field_norm); let term_explanation = explanation.add_child(&self.term_name(ord), term_score); term_explanation.set_formula(" sqrt( / ) * "); diff --git a/src/schema/document.rs b/src/schema/document.rs index c86ebea25..eaa9b83da 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -31,18 +31,17 @@ impl PartialEq for Document { impl Eq for Document {} impl Document { - - pub fn new() -> Document { - Document { - field_values: Vec::new(), - } - } /// Returns the number of `(field, value)` pairs. pub fn len(&self,) -> usize { self.field_values.len() } - + + /// Returns true iff the document contains no fields. + pub fn is_empty(&self,) -> bool { + self.field_values.is_empty() + } + /// Add a text field. pub fn add_text(&mut self, field: Field, text: &str) { self.add(FieldValue { @@ -70,18 +69,17 @@ impl Document { pub fn get_sorted_fields(&self) -> Vec<(Field, Vec<&FieldValue>)> { let mut field_values: Vec<&FieldValue> = self.get_fields().iter().collect(); field_values.sort_by_key(|field_value| field_value.field()); - let sorted_fields: Vec<(Field, Vec<&FieldValue>)> = field_values + field_values .into_iter() .group_by(|field_value| field_value.field()) .into_iter() .map(|(key, group)| { (key, group.into_iter().collect()) }) - .collect(); - sorted_fields - } + .collect::)>>() + } - pub fn get_all<'a>(&'a self, field: Field) -> Vec<&'a Value> { + pub fn get_all(&self, field: Field) -> Vec<&Value> { self.field_values .iter() .filter(|field_value| field_value.field() == field) @@ -89,7 +87,7 @@ impl Document { .collect() } - pub fn get_first<'a>(&'a self, field: Field) -> Option<&'a Value> { + pub fn get_first(&self, field: Field) -> Option<&Value> { self.field_values .iter() .filter(|field_value| field_value.field() == field) @@ -98,6 +96,15 @@ impl Document { } } +impl Default for Document { + + fn default() -> Document { + Document { + field_values: Vec::new(), + } + } +} + impl From> for Document { fn from(field_values: Vec) -> Document { Document { @@ -114,9 +121,9 @@ mod tests { #[test] fn test_doc() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("title", TEXT); - let mut doc = Document::new(); + let mut doc = Document::default(); doc.add_text(text_field, "My title"); assert_eq!(doc.get_fields().len(), 1); } diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 746f05f59..e05ff5a78 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -16,20 +16,20 @@ pub enum FieldType { impl FieldType { pub fn value_from_json(&self, json: &Json) -> Result { - match json { - &Json::String(ref field_text) => { - match self { - &FieldType::Str(_) => { + match *json { + Json::String(ref field_text) => { + match *self { + FieldType::Str(_) => { Ok(Value::Str(field_text.clone())) } - &FieldType::U32(_) => { + FieldType::U32(_) => { Err(ValueParsingError::TypeError(format!("Expected a u32 int, got {:?}", json))) } } } - &Json::U64(ref field_val_u64) => { - match self { - &FieldType::U32(_) => { + Json::U64(ref field_val_u64) => { + match *self { + FieldType::U32(_) => { if *field_val_u64 > (u32::max_value() as u64) { Err(ValueParsingError::OverflowError(format!("Expected u32, but value {:?} overflows.", field_val_u64))) } diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 460b90198..e58f19e21 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -27,8 +27,8 @@ directory. ``` use tantivy::schema::*; -let mut schema_builder = SchemaBuilder::new(); -let title_options = TextOptions::new() +let mut schema_builder = SchemaBuilder::default(); +let title_options = TextOptions::default() .set_stored() .set_indexing_options(TextIndexingOptions::TokenizedWithFreqAndPosition); schema_builder.add_text_field("title_options", title_options); @@ -57,7 +57,7 @@ The example can be rewritten : ``` use tantivy::schema::*; -let mut schema_builder = SchemaBuilder::new(); +let mut schema_builder = SchemaBuilder::default(); schema_builder.add_text_field("title_options", TEXT | STORED); let schema = schema_builder.build(); ``` @@ -70,8 +70,8 @@ let schema = schema_builder.build(); ``` use tantivy::schema::*; -let mut schema_builder = SchemaBuilder::new(); -let num_stars_options = U32Options::new() +let mut schema_builder = SchemaBuilder::default(); +let num_stars_options = U32Options::default() .set_stored() .set_indexed(); schema_builder.add_u32_field("num_stars", num_stars_options); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index efc16c0d3..9c6e49088 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -14,9 +14,6 @@ use std::fmt; - - - /// Tantivy has a very strict schema. /// You need to specify in advance, whether a field is indexed or not, /// stored or not, and RAM-based or not. @@ -30,7 +27,7 @@ use std::fmt; /// ``` /// use tantivy::schema::*; /// -/// let mut schema_builder = SchemaBuilder::new(); +/// let mut schema_builder = SchemaBuilder::default(); /// let id_field = schema_builder.add_text_field("id", STRING); /// let title_field = schema_builder.add_text_field("title", TEXT); /// let body_field = schema_builder.add_text_field("body", TEXT); @@ -39,19 +36,11 @@ use std::fmt; /// ``` pub struct SchemaBuilder { fields: Vec, - fields_map: HashMap, // transient + fields_map: HashMap, } impl SchemaBuilder { - - pub fn new() -> SchemaBuilder { - SchemaBuilder { - fields: Vec::new(), - fields_map: HashMap::new(), - } - } /// Adds a new u32 field. /// Returns the associated field handle @@ -108,6 +97,15 @@ impl SchemaBuilder { } +impl Default for SchemaBuilder { + fn default() -> SchemaBuilder { + SchemaBuilder { + fields: Vec::new(), + fields_map: HashMap::new(), + } + } +} + #[derive(Debug)] struct InnerSchema { fields: Vec, @@ -129,7 +127,7 @@ struct InnerSchema { /// ``` /// use tantivy::schema::*; /// -/// let mut schema_builder = SchemaBuilder::new(); +/// let mut schema_builder = SchemaBuilder::default(); /// let id_field = schema_builder.add_text_field("id", STRING); /// let title_field = schema_builder.add_text_field("title", TEXT); /// let body_field = schema_builder.add_text_field("body", TEXT); @@ -163,7 +161,7 @@ impl Schema { /// If panicking is not an option for you, /// you may use `get(&self, field_name: &str)`. pub fn get_field(&self, field_name: &str) -> Option { - self.0.fields_map.get(field_name).map(|field| field.clone()) + self.0.fields_map.get(field_name).cloned() } pub fn to_named_doc(&self, doc: &Document) -> NamedFieldDocument { @@ -190,24 +188,24 @@ impl Schema { let json_node = try!(Json::from_str(doc_json)); let some_json_obj = json_node.as_object(); if !some_json_obj.is_some() { - let doc_json_sample: String; - if doc_json.len() < 20 { - doc_json_sample = String::from(doc_json); - } - else { - doc_json_sample = format!("{:?}...", &doc_json[0..20]); - } + let doc_json_sample: String = + if doc_json.len() < 20 { + String::from(doc_json) + } + else { + format!("{:?}...", &doc_json[0..20]) + }; return Err(DocParsingError::NotJSONObject(doc_json_sample)) } let json_obj = some_json_obj.unwrap(); - let mut doc = Document::new(); + let mut doc = Document::default(); for (field_name, json_value) in json_obj.iter() { match self.get_field(field_name) { Some(field) => { let field_entry = self.get_field_entry(field); let field_type = field_entry.field_type(); - match json_value { - &Json::Array(ref json_items) => { + match *json_value { + Json::Array(ref json_items) => { for json_item in json_items { let value = try!( field_type @@ -251,7 +249,7 @@ impl fmt::Debug for Schema { impl Decodable for Schema { fn decode(d: &mut D) -> Result { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); try!(d.read_seq(|d, num_fields| { for _ in 0..num_fields { let field_entry = try!(FieldEntry::decode(d)); @@ -312,8 +310,8 @@ mod tests { #[test] pub fn test_schema_serialization() { - let mut schema_builder = SchemaBuilder::new(); - let count_options = U32Options::new().set_stored().set_fast(); + let mut schema_builder = SchemaBuilder::default(); + let count_options = U32Options::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u32_field("count", count_options); @@ -355,8 +353,8 @@ mod tests { #[test] pub fn test_document_to_json() { - let mut schema_builder = SchemaBuilder::new(); - let count_options = U32Options::new().set_stored().set_fast(); + let mut schema_builder = SchemaBuilder::default(); + let count_options = U32Options::default().set_stored().set_fast(); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u32_field("count", count_options); @@ -373,8 +371,8 @@ mod tests { #[test] pub fn test_parse_document() { - let mut schema_builder = SchemaBuilder::new(); - let count_options = U32Options::new().set_stored().set_fast(); + let mut schema_builder = SchemaBuilder::default(); + let count_options = U32Options::default().set_stored().set_fast(); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); let count_field = schema_builder.add_u32_field("count", count_options); diff --git a/src/schema/term.rs b/src/schema/term.rs index 08d5b78c5..66c50ef2b 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -83,7 +83,7 @@ mod tests { #[test] pub fn test_term() { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); schema_builder.add_text_field("text", STRING); let title_field = schema_builder.add_text_field("title", STRING); let count_field = schema_builder.add_text_field("count", STRING); diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 579a8149e..6e3b16664 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -30,7 +30,10 @@ impl TextOptions { self } - pub fn new() -> TextOptions { +} + +impl Default for TextOptions { + fn default() -> TextOptions { TextOptions { indexing: TextIndexingOptions::Unindexed, stored: false, @@ -90,17 +93,16 @@ impl Decodable for TextIndexingOptions { impl TextIndexingOptions { pub fn is_termfreq_enabled(&self) -> bool { match *self { - TextIndexingOptions::TokenizedWithFreq => true, - TextIndexingOptions::TokenizedWithFreqAndPosition => true, + TextIndexingOptions::TokenizedWithFreq | TextIndexingOptions::TokenizedWithFreqAndPosition => true, _ => false, } } pub fn is_tokenized(&self,) -> bool { match *self { - TextIndexingOptions::TokenizedNoFreq => true, - TextIndexingOptions::TokenizedWithFreq => true, - TextIndexingOptions::TokenizedWithFreqAndPosition => true, + TextIndexingOptions::TokenizedNoFreq + | TextIndexingOptions::TokenizedWithFreq + | TextIndexingOptions::TokenizedWithFreqAndPosition=> true, _ => false, } } @@ -129,10 +131,7 @@ impl BitOr for TextIndexingOptions { if self == Unindexed { other } - else if other == Unindexed { - self - } - else if self == other { + else if other == Unindexed || self == other { self } else { @@ -156,7 +155,7 @@ pub const TEXT: TextOptions = TextOptions { stored: false, }; -/// A stored fields of a document can be retrieved given its DocId. +/// A stored fields of a document can be retrieved given its `DocId`. /// Stored field are stored together and LZ4 compressed. /// Reading the stored fields of a document is relatively slow. /// (100 microsecs) @@ -171,7 +170,7 @@ impl BitOr for TextOptions { type Output = TextOptions; fn bitor(self, other: TextOptions) -> TextOptions { - let mut res = TextOptions::new(); + let mut res = TextOptions::default(); res.indexing = self.indexing | other.indexing; res.stored = self.stored || other.stored; res @@ -191,7 +190,7 @@ mod tests { assert!(field_options.get_indexing_options().is_tokenized()); } { - let mut schema_builder = SchemaBuilder::new(); + let mut schema_builder = SchemaBuilder::default(); schema_builder.add_text_field("body", TEXT); let schema = schema_builder.build(); let field = schema.get_field("body").unwrap(); diff --git a/src/schema/u32_options.rs b/src/schema/u32_options.rs index 6c3bd0366..a1a8e5a92 100644 --- a/src/schema/u32_options.rs +++ b/src/schema/u32_options.rs @@ -6,15 +6,7 @@ pub struct U32Options { } impl U32Options { - - pub fn new() -> U32Options { - U32Options { - fast: false, - indexed: false, - stored: false, - } - } - + pub fn is_stored(&self,) -> bool { self.stored } @@ -43,6 +35,17 @@ impl U32Options { } } +impl Default for U32Options { + fn default() -> U32Options { + U32Options { + fast: false, + indexed: false, + stored: false, + } + } +} + + /// The field will be tokenized and indexed pub const FAST: U32Options = U32Options { indexed: false, diff --git a/src/store/mod.rs b/src/store/mod.rs index ffaad44b8..4572e9d4a 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -20,9 +20,9 @@ mod tests { use directory::{RAMDirectory, Directory, MmapDirectory, WritePtr}; fn write_lorem_ipsum_store(writer: WritePtr) -> Schema { - let mut schema_builder = SchemaBuilder::new(); - let field_body = schema_builder.add_text_field("body", TextOptions::new().set_stored()); - let field_title = schema_builder.add_text_field("title", TextOptions::new().set_stored()); + let mut schema_builder = SchemaBuilder::default(); + let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored()); + let field_title = schema_builder.add_text_field("title", TextOptions::default().set_stored()); let schema = schema_builder.build(); let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."); { diff --git a/src/store/writer.rs b/src/store/writer.rs index 9fbc0faba..032e65c71 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -46,7 +46,7 @@ impl StoreWriter { } pub fn stack_reader(&mut self, reader: &StoreReader) -> io::Result<()> { - if self.current_block.len() > 0 { + if !self.current_block.is_empty() { try!(self.write_and_compress_block()); } match reader.offsets.last() { @@ -65,7 +65,7 @@ impl StoreWriter { } } - pub fn store<'a>(&mut self, field_values: &Vec<&'a FieldValue>) -> io::Result<()> { + pub fn store<'a>(&mut self, field_values: &[&'a FieldValue]) -> io::Result<()> { self.intermediary_buffer.clear(); try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer)); for field_value in field_values { @@ -98,7 +98,7 @@ impl StoreWriter { } pub fn close(&mut self,) -> io::Result<()> { - if self.current_block.len() > 0 { + if !self.current_block.is_empty() { try!(self.write_and_compress_block()); } let header_offset: u64 = self.written;