From f8710bd4b0b914d37be2671d92d67a4e5fbd941b Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 28 Aug 2017 17:42:26 +0900 Subject: [PATCH] Format --- examples/simple_search.rs | 8 +- src/collector/chained_collector.rs | 9 +- src/collector/count_collector.rs | 12 +- src/collector/facet_collector.rs | 19 +- src/collector/mod.rs | 32 +- src/collector/multi_collector.rs | 13 +- src/collector/top_collector.rs | 24 +- src/common/bitpacker.rs | 12 +- src/common/composite_file.rs | 27 +- src/common/serialize.rs | 6 +- src/common/timer.rs | 18 +- src/common/vint.rs | 7 +- src/compression/mod.rs | 47 +- .../pack/compression_pack_nosimd.rs | 28 +- src/compression/pack/compression_pack_simd.rs | 14 +- src/compression/stream.rs | 24 +- .../vint/compression_vint_nosimd.rs | 15 +- src/compression/vint/compression_vint_simd.rs | 60 +-- src/core/index.rs | 46 +- src/core/index_meta.rs | 2 +- src/core/inverted_index_reader.rs | 62 +-- src/core/pool.rs | 30 +- src/core/searcher.rs | 27 +- src/core/segment.rs | 18 +- src/core/segment_component.rs | 16 +- src/core/segment_meta.rs | 24 +- src/core/segment_reader.rs | 92 ++-- src/datastruct/skip/skiplist_builder.rs | 16 +- src/datastruct/stacker/hashmap.rs | 40 +- src/datastruct/stacker/heap.rs | 40 +- src/directory/error.rs | 16 +- src/directory/managed_directory.rs | 94 ++-- src/directory/mmap_directory.rs | 174 ++++---- src/directory/ram_directory.rs | 64 +-- src/directory/read_only_source.rs | 3 +- src/error.rs | 9 +- src/fastfield/mod.rs | 45 +- src/fastfield/reader.rs | 30 +- src/fastfield/serializer.rs | 35 +- src/fastfield/writer.rs | 12 +- src/indexer/delete_queue.rs | 36 +- src/indexer/doc_opstamp_mapping.rs | 6 +- src/indexer/index_writer.rs | 182 ++++---- src/indexer/log_merge_policy.rs | 50 ++- src/indexer/merger.rs | 406 +++++++++++------- src/indexer/segment_entry.rs | 9 +- src/indexer/segment_manager.rs | 105 +++-- src/indexer/segment_register.rs | 69 +-- src/indexer/segment_serializer.rs | 10 +- src/indexer/segment_updater.rs | 218 +++++----- src/indexer/segment_writer.rs | 108 +++-- src/lib.rs | 113 +++-- src/postings/docset.rs | 3 +- src/postings/mod.rs | 78 ++-- src/postings/postings_writer.rs | 83 ++-- src/postings/recorder.rs | 63 +-- src/postings/segment_postings.rs | 100 ++--- src/postings/segment_postings_option.rs | 1 - src/postings/serializer.rs | 101 ++--- src/postings/term_info.rs | 2 +- src/query/boolean_query/boolean_query.rs | 17 +- src/query/boolean_query/boolean_scorer.rs | 10 +- src/query/boolean_query/boolean_weight.rs | 11 +- src/query/boolean_query/mod.rs | 26 +- src/query/phrase_query/mod.rs | 6 +- src/query/phrase_query/phrase_weight.rs | 6 +- src/query/query.rs | 5 +- src/query/query_parser/query_grammar.rs | 53 ++- src/query/query_parser/query_parser.rs | 237 ++++++---- src/query/term_query/mod.rs | 6 +- src/query/term_query/term_scorer.rs | 12 +- src/query/term_query/term_weight.rs | 15 +- src/schema/field.rs | 2 +- src/schema/field_entry.rs | 20 +- src/schema/field_type.rs | 13 +- src/schema/schema.rs | 87 ++-- src/schema/term.rs | 10 +- src/schema/text_options.rs | 14 +- src/schema/value.rs | 16 +- src/store/mod.rs | 22 +- src/store/reader.rs | 8 +- src/store/writer.rs | 28 +- src/termdict/fstdict/streamer.rs | 26 +- src/termdict/fstdict/termdict.rs | 60 ++- src/termdict/merger.rs | 33 +- src/termdict/mod.rs | 91 ++-- src/termdict/streamdict/delta_encoder.rs | 45 +- src/termdict/streamdict/mod.rs | 3 +- src/termdict/streamdict/streamer.rs | 69 +-- src/termdict/streamdict/termdict.rs | 122 +++--- 90 files changed, 2291 insertions(+), 1795 deletions(-) diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 0d35f0e42..3cc82ae4d 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -91,9 +91,11 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> { let mut old_man_doc = Document::default(); old_man_doc.add_text(title, "The Old Man and the Sea"); - old_man_doc.add_text(body, - "He was an old man who fished alone in a skiff in the Gulf Stream and \ - he had gone eighty-four days now without taking a fish."); + old_man_doc.add_text( + body, + "He was an old man who fished alone in a skiff in the Gulf Stream and \ + he had gone eighty-four days now without taking a fish.", + ); // ... and add it to the `IndexWriter`. index_writer.add_document(old_man_doc); diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs index 6cc5785b4..1dff3e3c6 100644 --- a/src/collector/chained_collector.rs +++ b/src/collector/chained_collector.rs @@ -38,10 +38,11 @@ impl ChainedCollector { } impl Collector for ChainedCollector { - fn set_segment(&mut self, - segment_local_id: SegmentLocalId, - segment: &SegmentReader) - -> Result<()> { + fn set_segment( + &mut self, + segment_local_id: SegmentLocalId, + segment: &SegmentReader, + ) -> Result<()> { try!(self.left.set_segment(segment_local_id, segment)); try!(self.right.set_segment(segment_local_id, segment)); Ok(()) diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index bfb17eb3c..1fd9613ec 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -45,11 +45,11 @@ mod tests { #[bench] fn build_collector(b: &mut Bencher) { b.iter(|| { - let mut count_collector = CountCollector::default(); - for doc in 0..1_000_000 { - count_collector.collect(doc, 1f32); - } - count_collector.count() - }); + let mut count_collector = CountCollector::default(); + for doc in 0..1_000_000 { + count_collector.collect(doc, 1f32); + } + count_collector.count() + }); } } diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 2d760dfc6..b99822089 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -15,8 +15,9 @@ use SegmentLocalId; /// Facet collector for i64/u64 fast field pub struct FacetCollector - where T: FastFieldReader, - T::ValueType: Eq + Hash +where + T: FastFieldReader, + T::ValueType: Eq + Hash, { counters: HashMap, field: Field, @@ -25,8 +26,9 @@ pub struct FacetCollector impl FacetCollector - where T: FastFieldReader, - T::ValueType: Eq + Hash +where + T: FastFieldReader, + T::ValueType: Eq + Hash, { /// Creates a new facet collector for aggregating a given field. pub fn new(field: Field) -> FacetCollector { @@ -40,8 +42,9 @@ impl FacetCollector impl Collector for FacetCollector - where T: FastFieldReader, - T::ValueType: Eq + Hash +where + T: FastFieldReader, + T::ValueType: Eq + Hash, { fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> { self.ff_reader = Some(reader.get_fast_field_reader(self.field)?); @@ -51,7 +54,9 @@ impl Collector for FacetCollector fn collect(&mut self, doc: DocId, _: Score) { let val = self.ff_reader .as_ref() - .expect("collect() was called before set_segment. This should never happen.") + .expect( + "collect() was called before set_segment. This should never happen.", + ) .get(doc); *(self.counters.entry(val).or_insert(0)) += 1; } diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 27435592d..3832abbd1 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -51,20 +51,22 @@ pub use self::chained_collector::chain; pub trait Collector { /// `set_segment` is called before beginning to enumerate /// on this segment. - fn set_segment(&mut self, - segment_local_id: SegmentLocalId, - segment: &SegmentReader) - -> Result<()>; + fn set_segment( + &mut self, + segment_local_id: SegmentLocalId, + segment: &SegmentReader, + ) -> Result<()>; /// The query pushes the scored document to the collector via this method. fn collect(&mut self, doc: DocId, score: Score); } impl<'a, C: Collector> Collector for &'a mut C { - fn set_segment(&mut self, - segment_local_id: SegmentLocalId, - segment: &SegmentReader) - -> Result<()> { + fn set_segment( + &mut self, + segment_local_id: SegmentLocalId, + segment: &SegmentReader, + ) -> Result<()> { (*self).set_segment(segment_local_id, segment) } /// The query pushes the scored document to the collector via this method. @@ -169,12 +171,12 @@ pub mod tests { #[bench] fn build_collector(b: &mut Bencher) { b.iter(|| { - let mut count_collector = CountCollector::default(); - let docs: Vec = (0..1_000_000).collect(); - for doc in docs { - count_collector.collect(doc, 1f32); - } - count_collector.count() - }); + let mut count_collector = CountCollector::default(); + let docs: Vec = (0..1_000_000).collect(); + for doc in docs { + count_collector.collect(doc, 1f32); + } + count_collector.count() + }); } } diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index c2515782d..2e6bf0628 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -23,10 +23,11 @@ impl<'a> MultiCollector<'a> { impl<'a> Collector for MultiCollector<'a> { - fn set_segment(&mut self, - segment_local_id: SegmentLocalId, - segment: &SegmentReader) - -> Result<()> { + fn set_segment( + &mut self, + segment_local_id: SegmentLocalId, + segment: &SegmentReader, + ) -> Result<()> { for collector in &mut self.collectors { try!(collector.set_segment(segment_local_id, segment)); } @@ -53,8 +54,8 @@ mod tests { let mut top_collector = TopCollector::with_limit(2); let mut count_collector = CountCollector::default(); { - let mut collectors = MultiCollector::from(vec![&mut top_collector, - &mut count_collector]); + let mut collectors = + MultiCollector::from(vec![&mut top_collector, &mut count_collector]); collectors.collect(1, 0.2); collectors.collect(2, 0.1); collectors.collect(3, 0.5); diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 7d3c33c9e..e022c4ba9 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -24,10 +24,9 @@ impl PartialOrd for GlobalScoredDoc { impl Ord for GlobalScoredDoc { #[inline] fn cmp(&self, other: &GlobalScoredDoc) -> Ordering { - other - .score - .partial_cmp(&self.score) - .unwrap_or_else(|| other.doc_address.cmp(&self.doc_address)) + other.score.partial_cmp(&self.score).unwrap_or_else(|| { + other.doc_address.cmp(&self.doc_address) + }) } } @@ -87,7 +86,9 @@ impl TopCollector { scored_docs.sort(); scored_docs .into_iter() - .map(|GlobalScoredDoc { score, doc_address }| (score, doc_address)) + .map(|GlobalScoredDoc { score, doc_address }| { + (score, doc_address) + }) .collect() } @@ -108,14 +109,13 @@ impl Collector for TopCollector { fn collect(&mut self, doc: DocId, score: Score) { if self.at_capacity() { // It's ok to unwrap as long as a limit of 0 is forbidden. - let limit_doc: GlobalScoredDoc = - *self.heap - .peek() - .expect("Top collector with size 0 is forbidden"); + let limit_doc: GlobalScoredDoc = *self.heap.peek().expect( + "Top collector with size 0 is forbidden", + ); if limit_doc.score < score { - let mut mut_head = self.heap - .peek_mut() - .expect("Top collector with size 0 is forbidden"); + let mut mut_head = self.heap.peek_mut().expect( + "Top collector with size 0 is forbidden", + ); mut_head.score = score; mut_head.doc_address = DocAddress(self.segment_id, doc); } diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index 7d7aeb23c..a900ae92a 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -88,7 +88,8 @@ impl BitPacker { pub struct BitUnpacker - where Data: Deref +where + Data: Deref, { num_bits: usize, mask: u64, @@ -96,7 +97,8 @@ pub struct BitUnpacker } impl BitUnpacker - where Data: Deref +where + Data: Deref, { pub fn new(data: Data, num_bits: usize) -> BitUnpacker { let mask: u64 = if num_bits == 64 { @@ -121,8 +123,10 @@ impl BitUnpacker let addr_in_bits = idx * num_bits; let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; - debug_assert!(addr + 8 <= data.len(), - "The fast field field should have been padded with 7 bytes."); + debug_assert!( + addr + 8 <= data.len(), + "The fast field field should have been padded with 7 bytes." + ); let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; (val_shifted & mask) diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs index bc0d40786..4ab843d38 100644 --- a/src/common/composite_file.rs +++ b/src/common/composite_file.rs @@ -10,13 +10,12 @@ use common::BinarySerializable; /// A `CompositeWrite` is used to write a `CompositeFile`. -pub struct CompositeWrite { +pub struct CompositeWrite { write: CountingWriter, offsets: HashMap, } impl CompositeWrite { - /// Crate a new API writer that writes a composite file /// in a given write. pub fn wrap(w: W) -> CompositeWrite { @@ -43,7 +42,8 @@ impl CompositeWrite { let footer_offset = self.write.written_bytes(); VInt(self.offsets.len() as u64).serialize(&mut self.write)?; - let mut offset_fields: Vec<_> = self.offsets.iter() + let mut offset_fields: Vec<_> = self.offsets + .iter() .map(|(field, offset)| (offset, field)) .collect(); @@ -51,7 +51,9 @@ impl CompositeWrite { let mut prev_offset = 0; for (offset, field) in offset_fields { - VInt( (offset - prev_offset) as u64).serialize(&mut self.write)?; + VInt((offset - prev_offset) as u64).serialize( + &mut self.write, + )?; field.serialize(&mut self.write)?; prev_offset = *offset; } @@ -77,7 +79,6 @@ pub struct CompositeFile { } impl CompositeFile { - /// Opens a composite file stored in a given /// `ReadOnlySource`. pub fn open(data: ReadOnlySource) -> io::Result { @@ -90,8 +91,8 @@ impl CompositeFile { let mut footer_buffer = footer_data.as_slice(); let num_fields = VInt::deserialize(&mut footer_buffer)?.0 as usize; - let mut fields = vec!(); - let mut offsets = vec!(); + let mut fields = vec![]; + let mut offsets = vec![]; let mut field_index = HashMap::new(); @@ -106,7 +107,7 @@ impl CompositeFile { for i in 0..num_fields { let field = fields[i]; let start_offset = offsets[i]; - let end_offset = offsets[i+1]; + let end_offset = offsets[i + 1]; field_index.insert(field, (start_offset, end_offset)); } @@ -128,11 +129,9 @@ impl CompositeFile { /// Returns the `ReadOnlySource` associated /// to a given `Field` and stored in a `CompositeFile`. pub fn open_read(&self, field: Field) -> Option { - self.offsets_index - .get(&field) - .map(|&(from, to)| { - self.data.slice(from, to) - }) + self.offsets_index.get(&field).map(|&(from, to)| { + self.data.slice(from, to) + }) } } @@ -189,4 +188,4 @@ mod test { } -} \ No newline at end of file +} diff --git a/src/common/serialize.rs b/src/common/serialize.rs index ee86247c5..87b735769 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -101,9 +101,9 @@ impl BinarySerializable for String { fn deserialize(reader: &mut R) -> io::Result { let string_length = VInt::deserialize(reader)?.val() as usize; let mut result = String::with_capacity(string_length); - reader - .take(string_length as u64) - .read_to_string(&mut result)?; + reader.take(string_length as u64).read_to_string( + &mut result, + )?; Ok(result) } } diff --git a/src/common/timer.rs b/src/common/timer.rs index 035bd65de..84e0f8c3a 100644 --- a/src/common/timer.rs +++ b/src/common/timer.rs @@ -24,16 +24,14 @@ impl<'a> OpenTimer<'a> { impl<'a> Drop for OpenTimer<'a> { fn drop(&mut self) { - self.timer_tree - .timings - .push(Timing { - name: self.name, - duration: self.start - .to(PreciseTime::now()) - .num_microseconds() - .unwrap(), - depth: self.depth, - }); + self.timer_tree.timings.push(Timing { + name: self.name, + duration: self.start + .to(PreciseTime::now()) + .num_microseconds() + .unwrap(), + depth: self.depth, + }); } } diff --git a/src/common/vint.rs b/src/common/vint.rs index 39653e8a7..07cdfa24c 100644 --- a/src/common/vint.rs +++ b/src/common/vint.rs @@ -47,7 +47,12 @@ impl BinarySerializable for VInt { } shift += 7; } - _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "Reach end of buffer")), + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Reach end of buffer", + )) + } } } Ok(VInt(result)) diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 03750074b..cd40e4f1a 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -5,13 +5,13 @@ mod stream; pub use self::stream::CompressedIntStream; -#[cfg(not(feature="simdcompression"))] +#[cfg(not(feature = "simdcompression"))] mod pack { mod compression_pack_nosimd; pub use self::compression_pack_nosimd::{BlockEncoder, BlockDecoder}; } -#[cfg(feature="simdcompression")] +#[cfg(feature = "simdcompression")] mod pack { mod compression_pack_simd; pub use self::compression_pack_simd::{BlockEncoder, BlockDecoder}; @@ -19,13 +19,13 @@ mod pack { pub use self::pack::{BlockEncoder, BlockDecoder}; -#[cfg( any(not(feature="simdcompression"), target_env="msvc") )] +#[cfg(any(not(feature = "simdcompression"), target_env = "msvc"))] mod vint { mod compression_vint_nosimd; pub(crate) use self::compression_vint_nosimd::*; } -#[cfg( all(feature="simdcompression", not(target_env="msvc")) )] +#[cfg(all(feature = "simdcompression", not(target_env = "msvc")))] mod vint { mod compression_vint_simd; pub(crate) use self::compression_vint_simd::*; @@ -70,21 +70,19 @@ pub trait VIntDecoder { /// For instance, if delta encoded are `1, 3, 9`, and the /// `offset` is 5, then the output will be: /// `5 + 1 = 6, 6 + 3= 9, 9 + 9 = 18` - fn uncompress_vint_sorted<'a>(&mut self, - compressed_data: &'a [u8], - offset: u32, - num_els: usize) - -> usize; + fn uncompress_vint_sorted<'a>( + &mut self, + compressed_data: &'a [u8], + offset: u32, + num_els: usize, + ) -> usize; /// Uncompress an array of `u32s`, compressed using variable /// byte encoding. /// /// The method takes a number of int to decompress, and returns /// the amount of bytes that were read to decompress them. - fn uncompress_vint_unsorted<'a>(&mut self, - compressed_data: &'a [u8], - num_els: usize) - -> usize; + fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize; } impl VIntEncoder for BlockEncoder { @@ -98,19 +96,17 @@ impl VIntEncoder for BlockEncoder { } impl VIntDecoder for BlockDecoder { - fn uncompress_vint_sorted<'a>(&mut self, - compressed_data: &'a [u8], - offset: u32, - num_els: usize) - -> usize { + fn uncompress_vint_sorted<'a>( + &mut self, + compressed_data: &'a [u8], + offset: u32, + num_els: usize, + ) -> usize { self.output_len = num_els; vint::uncompress_sorted(compressed_data, &mut self.output[..num_els], offset) } - fn uncompress_vint_unsorted<'a>(&mut self, - compressed_data: &'a [u8], - num_els: usize) - -> usize { + fn uncompress_vint_unsorted<'a>(&mut self, compressed_data: &'a [u8], num_els: usize) -> usize { self.output_len = num_els; vint::uncompress_unsorted(compressed_data, &mut self.output[..num_els]) } @@ -125,7 +121,6 @@ pub mod tests { use super::*; use tests; use test::Bencher; - use std::iter; #[test] fn test_encode_sorted_block() { @@ -236,7 +231,7 @@ pub mod tests { #[test] fn test_all_docs_compression_numbits() { for num_bits in 0..33 { - let mut data: Vec = iter::repeat(0u32).take(128).collect(); + let mut data = [0u32; 128]; if num_bits > 0 { data[0] = 1 << (num_bits - 1); } @@ -262,7 +257,9 @@ pub mod tests { let data = tests::generate_array(NUM_INTS_BENCH_VINT, 0.001); let compressed = encoder.compress_vint_sorted(&data, 0u32); let mut decoder = BlockDecoder::new(); - b.iter(|| { decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT); }); + b.iter(|| { + decoder.uncompress_vint_sorted(compressed, 0u32, NUM_INTS_BENCH_VINT); + }); } } diff --git a/src/compression/pack/compression_pack_nosimd.rs b/src/compression/pack/compression_pack_nosimd.rs index 24379b9a4..7780d63b9 100644 --- a/src/compression/pack/compression_pack_nosimd.rs +++ b/src/compression/pack/compression_pack_nosimd.rs @@ -25,9 +25,9 @@ pub fn compress_sorted(vals: &mut [u32], mut output: &mut [u8], offset: u32) -> bit_packer.write(*val, &mut output).unwrap(); } 1 + - bit_packer - .close(&mut output) - .expect("packing in memory should never fail") + bit_packer.close(&mut output).expect( + "packing in memory should never fail", + ) } @@ -56,10 +56,9 @@ impl BlockEncoder { pub fn compress_block_unsorted(&mut self, vals: &[u32]) -> &[u8] { let compressed_size: usize = { let mut output: &mut [u8] = &mut self.output; - let max = vals.iter() - .cloned() - .max() - .expect("compress unsorted called with an empty array"); + let max = vals.iter().cloned().max().expect( + "compress unsorted called with an empty array", + ); let num_bits = compute_num_bits(max); output.write_all(&[num_bits]).unwrap(); let mut bit_packer = BitPacker::new(num_bits as usize); @@ -67,9 +66,9 @@ impl BlockEncoder { bit_packer.write(*val, &mut output).unwrap(); } 1 + - bit_packer - .close(&mut output) - .expect("packing in memory should never fail") + bit_packer.close(&mut output).expect( + "packing in memory should never fail", + ) }; &self.output[..compressed_size] } @@ -93,10 +92,11 @@ impl BlockDecoder { } } - pub fn uncompress_block_sorted<'a>(&mut self, - compressed_data: &'a [u8], - mut offset: u32) - -> &'a [u8] { + pub fn uncompress_block_sorted<'a>( + &mut self, + compressed_data: &'a [u8], + mut offset: u32, + ) -> &'a [u8] { let consumed_size = { let num_bits = compressed_data[0]; let bit_unpacker = BitUnpacker::new(&compressed_data[1..], num_bits as usize); diff --git a/src/compression/pack/compression_pack_simd.rs b/src/compression/pack/compression_pack_simd.rs index d24d0f65b..498eb7852 100644 --- a/src/compression/pack/compression_pack_simd.rs +++ b/src/compression/pack/compression_pack_simd.rs @@ -8,10 +8,11 @@ mod simdcomp { extern "C" { pub fn compress_sorted(data: *const u32, output: *mut u8, offset: u32) -> size_t; - pub fn uncompress_sorted(compressed_data: *const u8, - output: *mut u32, - offset: u32) - -> size_t; + pub fn uncompress_sorted( + compressed_data: *const u8, + output: *mut u32, + offset: u32, + ) -> size_t; pub fn compress_unsorted(data: *const u32, output: *mut u8) -> size_t; @@ -78,10 +79,7 @@ impl BlockDecoder { } } - pub fn uncompress_block_sorted(&mut self, - compressed_data: &[u8], - offset: u32) - -> usize { + pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize { let consumed_size = uncompress_sorted(compressed_data, &mut self.output, offset); self.output_len = COMPRESSION_BLOCK_SIZE; consumed_size diff --git a/src/compression/stream.rs b/src/compression/stream.rs index a4c4422c5..cd1771759 100644 --- a/src/compression/stream.rs +++ b/src/compression/stream.rs @@ -16,7 +16,6 @@ pub struct CompressedIntStream { } impl CompressedIntStream { - /// Opens a compressed int stream. pub(crate) fn wrap(source: ReadOnlySource) -> CompressedIntStream { CompressedIntStream { @@ -35,17 +34,21 @@ impl CompressedIntStream { let available = COMPRESSION_BLOCK_SIZE - self.inner_offset; if num_els >= available { if available > 0 { - let uncompressed_block = &self.block_decoder.output_array()[self.inner_offset..]; + let uncompressed_block = &self.block_decoder.output_array() + [self.inner_offset..]; &mut output[start..start + available].clone_from_slice(uncompressed_block); } num_els -= available; start += available; - let num_consumed_bytes = self.block_decoder.uncompress_block_unsorted(self.buffer.as_ref()); + let num_consumed_bytes = self.block_decoder.uncompress_block_unsorted( + self.buffer.as_ref(), + ); self.buffer.advance(num_consumed_bytes); self.inner_offset = 0; - } - else { - let uncompressed_block = &self.block_decoder.output_array()[self.inner_offset..self.inner_offset + num_els]; + } else { + let uncompressed_block = &self.block_decoder.output_array()[self.inner_offset.. + self.inner_offset + + num_els]; &output[start..start + num_els].clone_from_slice(uncompressed_block); self.inner_offset += num_els; break; @@ -62,8 +65,7 @@ impl CompressedIntStream { let available = COMPRESSION_BLOCK_SIZE - self.inner_offset; if available >= skip_len { self.inner_offset += skip_len; - } - else { + } else { skip_len -= available; // entirely skip decompressing some blocks. while skip_len >= COMPRESSION_BLOCK_SIZE { @@ -72,7 +74,9 @@ impl CompressedIntStream { let block_len = compressed_block_size(num_bits); self.buffer.advance(block_len); } - let num_consumed_bytes = self.block_decoder.uncompress_block_unsorted(self.buffer.as_ref()); + let num_consumed_bytes = self.block_decoder.uncompress_block_unsorted( + self.buffer.as_ref(), + ); self.buffer.advance(num_consumed_bytes); self.inner_offset = skip_len; } @@ -90,7 +94,7 @@ pub mod tests { use directory::ReadOnlySource; fn create_stream_buffer() -> ReadOnlySource { - let mut buffer: Vec = vec!(); + let mut buffer: Vec = vec![]; let mut encoder = BlockEncoder::new(); let vals: Vec = (0u32..1_025u32).collect(); for chunk in vals.chunks(COMPRESSION_BLOCK_SIZE) { diff --git a/src/compression/vint/compression_vint_nosimd.rs b/src/compression/vint/compression_vint_nosimd.rs index 4b5e6ec3d..0e0e272d4 100644 --- a/src/compression/vint/compression_vint_nosimd.rs +++ b/src/compression/vint/compression_vint_nosimd.rs @@ -1,6 +1,10 @@ #[inline(always)] -pub(crate) fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] { +pub(crate) fn compress_sorted<'a>( + input: &[u32], + output: &'a mut [u8], + mut offset: u32, +) -> &'a [u8] { let mut byte_written = 0; for &v in input { let mut to_encode: u32 = v - offset; @@ -43,10 +47,11 @@ pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a } #[inline(always)] -pub(crate) fn uncompress_sorted<'a>(compressed_data: &'a [u8], - output: &mut [u32], - offset: u32) - -> &'a [u8] { +pub(crate) fn uncompress_sorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32], + offset: u32, +) -> &'a [u8] { let mut read_byte = 0; let mut result = offset; let num_els = output.len(); diff --git a/src/compression/vint/compression_vint_simd.rs b/src/compression/vint/compression_vint_simd.rs index 7c4cd9fe0..0b508a812 100644 --- a/src/compression/vint/compression_vint_simd.rs +++ b/src/compression/vint/compression_vint_simd.rs @@ -4,24 +4,27 @@ mod streamvbyte { use libc::size_t; extern "C" { - pub fn streamvbyte_delta_encode(data: *const u32, - num_els: u32, - output: *mut u8, - offset: u32) - -> size_t; + pub fn streamvbyte_delta_encode( + data: *const u32, + num_els: u32, + output: *mut u8, + offset: u32, + ) -> size_t; - pub fn streamvbyte_delta_decode(compressed_data: *const u8, - output: *mut u32, - num_els: u32, - offset: u32) - -> size_t; + pub fn streamvbyte_delta_decode( + compressed_data: *const u8, + output: *mut u32, + num_els: u32, + offset: u32, + ) -> size_t; pub fn streamvbyte_encode(data: *const u32, num_els: u32, output: *mut u8) -> size_t; - pub fn streamvbyte_decode(compressed_data: *const u8, - output: *mut u32, - num_els: usize) - -> size_t; + pub fn streamvbyte_decode( + compressed_data: *const u8, + output: *mut u32, + num_els: usize, + ) -> size_t; } } @@ -29,10 +32,12 @@ mod streamvbyte { #[inline(always)] pub(crate) fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], offset: u32) -> &'a [u8] { let compress_length = unsafe { - streamvbyte::streamvbyte_delta_encode(input.as_ptr(), - input.len() as u32, - output.as_mut_ptr(), - offset) + streamvbyte::streamvbyte_delta_encode( + input.as_ptr(), + input.len() as u32, + output.as_mut_ptr(), + offset, + ) }; &output[..compress_length] } @@ -47,15 +52,18 @@ pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a } #[inline(always)] -pub(crate) fn uncompress_sorted<'a>(compressed_data: &'a [u8], - output: &mut [u32], - offset: u32) - -> usize { +pub(crate) fn uncompress_sorted<'a>( + compressed_data: &'a [u8], + output: &mut [u32], + offset: u32, +) -> usize { unsafe { - streamvbyte::streamvbyte_delta_decode(compressed_data.as_ptr(), - output.as_mut_ptr(), - output.len() as u32, - offset) + streamvbyte::streamvbyte_delta_decode( + compressed_data.as_ptr(), + output.as_mut_ptr(), + output.len() as u32, + offset, + ) } } diff --git a/src/core/index.rs b/src/core/index.rs index 01a0abe54..e4acb8a07 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -48,9 +48,10 @@ impl Index { pub fn create_in_ram(schema: Schema) -> Index { let ram_directory = RAMDirectory::create(); // unwrap is ok here - let directory = ManagedDirectory::new(ram_directory) - .expect("Creating a managed directory from a brand new RAM directory \ - should never fail."); + let directory = ManagedDirectory::new(ram_directory).expect( + "Creating a managed directory from a brand new RAM directory \ + should never fail.", + ); Index::from_directory(directory, schema).expect("Creating a RAMDirectory should never fail") } @@ -127,10 +128,11 @@ impl Index { /// If the lockfile already exists, returns `Error::FileAlreadyExists`. /// # Panics /// If the heap size per thread is too small, panics. - pub fn writer_with_num_threads(&self, - num_threads: usize, - heap_size_in_bytes: usize) - -> Result { + pub fn writer_with_num_threads( + &self, + num_threads: usize, + heap_size_in_bytes: usize, + ) -> Result { open_index_writer(self, num_threads, heap_size_in_bytes) } @@ -155,10 +157,12 @@ impl Index { /// Returns the list of segments that are searchable pub fn searchable_segments(&self) -> Result> { - Ok(self.searchable_segment_metas()? - .into_iter() - .map(|segment_meta| self.segment(segment_meta)) - .collect()) + Ok( + self.searchable_segment_metas()? + .into_iter() + .map(|segment_meta| self.segment(segment_meta)) + .collect(), + ) } #[doc(hidden)] @@ -190,10 +194,12 @@ impl Index { /// Returns the list of segment ids that are searchable. pub fn searchable_segment_ids(&self) -> Result> { - Ok(self.searchable_segment_metas()? - .iter() - .map(|segment_meta| segment_meta.id()) - .collect()) + Ok( + self.searchable_segment_metas()? + .iter() + .map(|segment_meta| segment_meta.id()) + .collect(), + ) } /// Creates a new generation of searchers after @@ -203,10 +209,12 @@ impl Index { /// published or after a merge. pub fn load_searchers(&self) -> Result<()> { let searchable_segments = self.searchable_segments()?; - let segment_readers: Vec = try!(searchable_segments - .into_iter() - .map(SegmentReader::open) - .collect()); + let segment_readers: Vec = try!( + searchable_segments + .into_iter() + .map(SegmentReader::open) + .collect() + ); let searchers = (0..NUM_SEARCHERS) .map(|_| Searcher::from(segment_readers.clone())) .collect(); diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index 785846a0d..6eafddf77 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -9,7 +9,7 @@ use core::SegmentMeta; /// * the index docstamp /// * the schema /// -#[derive(Clone,Debug,Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct IndexMeta { pub segments: Vec, pub schema: Schema, diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index b44cc176c..06816f361 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,6 +1,5 @@ use directory::{SourceRead, ReadOnlySource}; use termdict::{TermDictionary, TermDictionaryImpl}; -use std::io; use postings::{SegmentPostings, BlockSegmentPostings}; use postings::TermInfo; use postings::SegmentPostingsOption; @@ -33,22 +32,21 @@ pub struct InvertedIndexReader { } impl InvertedIndexReader { - pub(crate) fn new( termdict_source: ReadOnlySource, postings_source: ReadOnlySource, positions_source: ReadOnlySource, delete_bitset: DeleteBitSet, schema: Schema, - ) -> io::Result { + ) -> InvertedIndexReader { - Ok(InvertedIndexReader { - termdict: TermDictionaryImpl::from_source(termdict_source)?, + InvertedIndexReader { + termdict: TermDictionaryImpl::from_source(termdict_source), postings_source: postings_source, positions_source: positions_source, delete_bitset: delete_bitset, schema: schema, - }) + } } /// Returns the term info associated with the term. @@ -72,9 +70,11 @@ impl InvertedIndexReader { /// # Warning /// /// This does not reset the positions list. - pub fn reset_block_postings_from_terminfo(&self, - term_info: &TermInfo, - block_postings: &mut BlockSegmentPostings) { + pub fn reset_block_postings_from_terminfo( + &self, + term_info: &TermInfo, + block_postings: &mut BlockSegmentPostings, + ) { let offset = term_info.postings_offset as usize; let end_source = self.postings_source.len(); let postings_slice = self.postings_source.slice(offset, end_source); @@ -88,27 +88,30 @@ impl InvertedIndexReader { /// This method is for an advanced usage only. /// /// Most user should prefer using `read_postings` instead. - pub fn read_block_postings_from_terminfo(&self, - term_info: &TermInfo, - option: SegmentPostingsOption) - -> BlockSegmentPostings { + pub fn read_block_postings_from_terminfo( + &self, + term_info: &TermInfo, + option: SegmentPostingsOption, + ) -> BlockSegmentPostings { let offset = term_info.postings_offset as usize; let postings_data = self.postings_source.slice_from(offset); let has_freq = option.has_freq(); BlockSegmentPostings::from_data( term_info.doc_freq as usize, SourceRead::from(postings_data), - has_freq) + has_freq, + ) } /// Returns a posting object given a `term_info`. /// This method is for an advanced usage only. /// /// Most user should prefer using `read_postings` instead. - pub fn read_postings_from_terminfo(&self, - term_info: &TermInfo, - option: SegmentPostingsOption) - -> SegmentPostings { + pub fn read_postings_from_terminfo( + &self, + term_info: &TermInfo, + option: SegmentPostingsOption, + ) -> SegmentPostings { let block_postings = self.read_block_postings_from_terminfo(term_info, option); let delete_bitset = self.delete_bitset.clone(); let position_stream = { @@ -118,16 +121,11 @@ impl InvertedIndexReader { let mut stream = CompressedIntStream::wrap(positions_source); stream.skip(term_info.positions_inner_offset as usize); Some(stream) - } - else { + } else { None } }; - SegmentPostings::from_block_postings( - block_postings, - delete_bitset, - position_stream - ) + SegmentPostings::from_block_postings(block_postings, delete_bitset, position_stream) } /// Returns the segment postings associated with the term, and with the given option, @@ -140,16 +138,20 @@ impl InvertedIndexReader { /// For instance, requesting `SegmentPostingsOption::FreqAndPositions` for a /// `TextIndexingOptions` that does not index position will return a `SegmentPostings` /// with `DocId`s and frequencies. - pub fn read_postings(&self, - term: &Term, - option: SegmentPostingsOption) - -> Option { + pub fn read_postings( + &self, + term: &Term, + option: SegmentPostingsOption, + ) -> Option { let field = term.field(); let field_entry = self.schema.get_field_entry(field); let term_info = get!(self.get_term_info(term)); let maximum_option = get!(field_entry.field_type().get_segment_postings_option()); let best_effort_option = cmp::min(maximum_option, option); - Some(self.read_postings_from_terminfo(&term_info, best_effort_option)) + Some(self.read_postings_from_terminfo( + &term_info, + best_effort_option, + )) } /// Returns the number of documents containing the term. diff --git a/src/core/pool.rs b/src/core/pool.rs index 805ea3467..1796fc32c 100644 --- a/src/core/pool.rs +++ b/src/core/pool.rs @@ -76,8 +76,11 @@ impl Pool { if former_generation >= generation { break; } - self.freshest_generation - .compare_and_swap(former_generation, generation, Ordering::SeqCst); + self.freshest_generation.compare_and_swap( + former_generation, + generation, + Ordering::SeqCst, + ); } } @@ -91,9 +94,9 @@ impl Pool { let gen_item = self.queue.pop(); if gen_item.generation >= generation { return LeasedItem { - gen_item: Some(gen_item), - recycle_queue: self.queue.clone(), - }; + gen_item: Some(gen_item), + recycle_queue: self.queue.clone(), + }; } else { // this searcher is obsolete, // removing it from the pool. @@ -113,25 +116,26 @@ impl Deref for LeasedItem { fn deref(&self) -> &T { &self.gen_item - .as_ref() - .expect("Unwrapping a leased item should never fail") - .item // unwrap is safe here + .as_ref() + .expect("Unwrapping a leased item should never fail") + .item // unwrap is safe here } } impl DerefMut for LeasedItem { fn deref_mut(&mut self) -> &mut T { &mut self.gen_item - .as_mut() - .expect("Unwrapping a mut leased item should never fail") - .item // unwrap is safe here + .as_mut() + .expect("Unwrapping a mut leased item should never fail") + .item // unwrap is safe here } } impl Drop for LeasedItem { fn drop(&mut self) { - let gen_item: GenerationItem = mem::replace(&mut self.gen_item, None) - .expect("Unwrapping a leased item should never fail"); + let gen_item: GenerationItem = mem::replace(&mut self.gen_item, None).expect( + "Unwrapping a leased item should never fail", + ); self.recycle_queue.push(gen_item); } } diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 5afdc4684..14f1cb141 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -47,10 +47,7 @@ impl Searcher { self.segment_readers .iter() .map(|segment_reader| { - segment_reader - .inverted_index(term.field()) - .unwrap() // TODO error handling - .doc_freq(term) + segment_reader.inverted_index(term.field()).doc_freq(term) }) .fold(0u32, |acc, val| acc + val) } @@ -70,16 +67,13 @@ impl Searcher { query.search(self, collector) } - - /// - pub fn field(&self, field: Field) -> Result { + /// Return the field searcher associated to a `Field`. + pub fn field(&self, field: Field) -> FieldSearcher { let inv_index_readers = self.segment_readers .iter() - .map(|segment_reader| { - segment_reader.inverted_index(field) - }) - .collect::>>()?; - Ok(FieldSearcher::new(inv_index_readers)) + .map(|segment_reader| segment_reader.inverted_index(field)) + .collect::>(); + FieldSearcher::new(inv_index_readers) } } @@ -92,11 +86,8 @@ pub struct FieldSearcher { impl FieldSearcher { - fn new(inv_index_readers: Vec>) -> FieldSearcher { - FieldSearcher { - inv_index_readers: inv_index_readers, - } + FieldSearcher { inv_index_readers: inv_index_readers } } @@ -105,9 +96,7 @@ impl FieldSearcher { pub fn terms(&self) -> TermMerger { let term_streamers: Vec<_> = self.inv_index_readers .iter() - .map(|inverted_index| { - inverted_index.terms().stream() - }) + .map(|inverted_index| inverted_index.terms().stream()) .collect(); TermMerger::new(term_streamers) } diff --git a/src/core/segment.rs b/src/core/segment.rs index 16cb214d2..59b5eaa13 100644 --- a/src/core/segment.rs +++ b/src/core/segment.rs @@ -76,18 +76,20 @@ impl Segment { } /// Open one of the component file for a *regular* read. - pub fn open_read(&self, - component: SegmentComponent) - -> result::Result { + pub fn open_read( + &self, + component: SegmentComponent, + ) -> result::Result { let path = self.relative_path(component); let source = try!(self.index.directory().open_read(&path)); Ok(source) } /// Open one of the component file for *regular* write. - pub fn open_write(&mut self, - component: SegmentComponent) - -> result::Result { + pub fn open_write( + &mut self, + component: SegmentComponent, + ) -> result::Result { let path = self.relative_path(component); let write = try!(self.index.directory_mut().open_write(&path)); Ok(write) @@ -125,11 +127,11 @@ mod tests { { let _file_protection = segment.protect_from_delete(SegmentComponent::POSTINGS); assert!(directory.exists(&*path)); - directory.garbage_collect(|| { living_files.clone() }); + directory.garbage_collect(|| living_files.clone()); assert!(directory.exists(&*path)); } - directory.garbage_collect(|| { living_files }); + directory.garbage_collect(|| living_files); assert!(!directory.exists(&*path)); } diff --git a/src/core/segment_component.rs b/src/core/segment_component.rs index e4cbc0068..b460258c7 100644 --- a/src/core/segment_component.rs +++ b/src/core/segment_component.rs @@ -28,13 +28,15 @@ pub enum SegmentComponent { impl SegmentComponent { /// Iterates through the components. pub fn iterator() -> impl Iterator { - static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [SegmentComponent::POSTINGS, - SegmentComponent::POSITIONS, - SegmentComponent::FASTFIELDS, - SegmentComponent::FIELDNORMS, - SegmentComponent::TERMS, - SegmentComponent::STORE, - SegmentComponent::DELETE]; + static SEGMENT_COMPONENTS: [SegmentComponent; 7] = [ + SegmentComponent::POSTINGS, + SegmentComponent::POSITIONS, + SegmentComponent::FASTFIELDS, + SegmentComponent::FIELDNORMS, + SegmentComponent::TERMS, + SegmentComponent::STORE, + SegmentComponent::DELETE, + ]; SEGMENT_COMPONENTS.into_iter() } } diff --git a/src/core/segment_meta.rs b/src/core/segment_meta.rs index 623b22442..1abe95652 100644 --- a/src/core/segment_meta.rs +++ b/src/core/segment_meta.rs @@ -64,16 +64,14 @@ impl SegmentMeta { pub fn relative_path(&self, component: SegmentComponent) -> PathBuf { let mut path = self.id().uuid_string(); path.push_str(&*match component { - SegmentComponent::POSITIONS => ".pos".to_string(), - SegmentComponent::POSTINGS => ".idx".to_string(), - SegmentComponent::TERMS => ".term".to_string(), - SegmentComponent::STORE => ".store".to_string(), - SegmentComponent::FASTFIELDS => ".fast".to_string(), - SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(), - SegmentComponent::DELETE => { - format!(".{}.del", self.delete_opstamp().unwrap_or(0)) - } - }); + SegmentComponent::POSITIONS => ".pos".to_string(), + SegmentComponent::POSTINGS => ".idx".to_string(), + SegmentComponent::TERMS => ".term".to_string(), + SegmentComponent::STORE => ".store".to_string(), + SegmentComponent::FASTFIELDS => ".fast".to_string(), + SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(), + SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)), + }); PathBuf::from(path) } @@ -111,8 +109,8 @@ impl SegmentMeta { #[doc(hidden)] pub fn set_delete_meta(&mut self, num_deleted_docs: u32, opstamp: u64) { self.deletes = Some(DeleteMeta { - num_deleted_docs: num_deleted_docs, - opstamp: opstamp, - }); + num_deleted_docs: num_deleted_docs, + opstamp: opstamp, + }); } } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 77195304e..c77c71a7b 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -4,7 +4,6 @@ use core::SegmentId; use core::SegmentComponent; use std::sync::RwLock; use common::HasLen; -use error::ErrorKind; use core::SegmentMeta; use fastfield::{self, FastFieldNotAvailableError}; use fastfield::DeleteBitSet; @@ -87,17 +86,17 @@ impl SegmentReader { /// /// # Panics /// May panic if the index is corrupted. - pub fn get_fast_field_reader - (&self, field: Field) -> fastfield::Result { + pub fn get_fast_field_reader( + &self, + field: Field, + ) -> fastfield::Result { let field_entry = self.schema.get_field_entry(field); if !TFastFieldReader::is_enabled(field_entry.field_type()) { Err(FastFieldNotAvailableError::new(field_entry)) } else { self.fast_fields_composite .open_read(field) - .ok_or_else(|| { - FastFieldNotAvailableError::new(field_entry) - }) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) .map(TFastFieldReader::open) } } @@ -111,9 +110,9 @@ impl SegmentReader { /// They are simply stored as a fast field, serialized in /// the `.fieldnorm` file of the segment. pub fn get_fieldnorms_reader(&self, field: Field) -> Option { - self.fieldnorms_composite - .open_read(field) - .map(U64FastFieldReader::open) + self.fieldnorms_composite.open_read(field).map( + U64FastFieldReader::open, + ) } /// Accessor to the segment's `StoreReader`. @@ -131,13 +130,12 @@ impl SegmentReader { let store_reader = StoreReader::from_source(store_source); let postings_source = segment.open_read(SegmentComponent::POSTINGS)?; - let postings_composite = CompositeFile::open(postings_source)?; + let postings_composite = CompositeFile::open(postings_source)?; let positions_composite = { if let Ok(source) = segment.open_read(SegmentComponent::POSITIONS) { CompositeFile::open(source)? - } - else { + } else { CompositeFile::empty() } }; @@ -159,17 +157,17 @@ impl SegmentReader { let schema = segment.schema(); Ok(SegmentReader { - inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())), - segment_meta: segment.meta().clone(), - termdict_composite: termdict_composite, - postings_composite: postings_composite, - fast_fields_composite: fast_fields_composite, - fieldnorms_composite: fieldnorms_composite, - segment_id: segment.id(), - store_reader: store_reader, - delete_bitset: delete_bitset, - positions_composite: positions_composite, - schema: schema, + inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())), + segment_meta: segment.meta().clone(), + termdict_composite: termdict_composite, + postings_composite: postings_composite, + fast_fields_composite: fast_fields_composite, + fieldnorms_composite: fieldnorms_composite, + segment_id: segment.id(), + store_reader: store_reader, + delete_bitset: delete_bitset, + positions_composite: positions_composite, + schema: schema, }) } @@ -179,32 +177,27 @@ impl SegmentReader { /// The field reader is in charge of iterating through the /// term dictionary associated to a specific field, /// and opening the posting list associated to any term. - pub fn inverted_index(&self, field: Field) -> Result> { - if let Some(inv_idx_reader) = self.inv_idx_reader_cache.read() - .expect("Lock poisoned. This should never happen") - .get(&field) { - return Ok(inv_idx_reader.clone()); + pub fn inverted_index(&self, field: Field) -> Arc { + if let Some(inv_idx_reader) = + self.inv_idx_reader_cache + .read() + .expect("Lock poisoned. This should never happen") + .get(&field) + { + inv_idx_reader.clone(); } - let termdict_source: ReadOnlySource = self.termdict_composite - .open_read(field) - .ok_or_else(|| { - ErrorKind::SchemaError( - format!("Could not find {:?} term dictionary", field) - ) - })?; + let termdict_source: ReadOnlySource = self.termdict_composite.open_read(field).expect( + "Index corrupted. Failed to open field term dictionary in composite file.", + ); - let postings_source = self.postings_composite - .open_read(field) - .ok_or_else(|| { - ErrorKind::SchemaError(format!("Could not find {:?} postings", field)) - })?; + let postings_source = self.postings_composite.open_read(field).expect( + "Index corrupted. Failed to open field postings in composite file.", + ); - let positions_source = self.positions_composite - .open_read(field) - .ok_or_else(|| { - ErrorKind::SchemaError(format!("Could not find {:?} positions", field)) - })?; + let positions_source = self.positions_composite.open_read(field).expect( + "Index corrupted. Failed to open field positions in composite file.", + ); let inv_idx_reader = Arc::new(InvertedIndexReader::new( termdict_source, @@ -212,15 +205,18 @@ impl SegmentReader { positions_source, self.delete_bitset.clone(), self.schema.clone(), - )?); + )); // by releasing the lock in between, we may end up opening the inverting index // twice, but this is fine. self.inv_idx_reader_cache .write() - .expect("Field reader cache lock poisoned. This should never happen.") + .expect( + "Field reader cache lock poisoned. This should never happen.", + ) .insert(field, inv_idx_reader.clone()); - Ok(inv_idx_reader) + + inv_idx_reader } /// Returns the document (or to be accurate, its stored field) diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs index eaa439d08..af665ab3c 100644 --- a/src/datastruct/skip/skiplist_builder.rs +++ b/src/datastruct/skip/skiplist_builder.rs @@ -39,11 +39,11 @@ impl LayerBuilder { doc_id.serialize(&mut self.buffer)?; value.serialize(&mut self.buffer)?; Ok(if self.remaining == 0 { - self.remaining = self.period; - Some((doc_id, offset)) - } else { - None - }) + self.remaining = self.period; + Some((doc_id, offset)) + } else { + None + }) } } @@ -78,8 +78,10 @@ impl SkipListBuilder { loop { skip_pointer = match skip_pointer { Some((skip_doc_id, skip_offset)) => { - try!(self.get_skip_layer(layer_id) - .insert(skip_doc_id, &skip_offset)) + try!(self.get_skip_layer(layer_id).insert( + skip_doc_id, + &skip_offset, + )) } None => { return Ok(()); diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs index 03f18ed51..c9054dff2 100644 --- a/src/datastruct/stacker/hashmap.rs +++ b/src/datastruct/stacker/hashmap.rs @@ -68,9 +68,14 @@ pub(crate) fn split_memory(per_thread_memory_budget: usize) -> (usize, usize) { }; let table_num_bits: usize = (1..) .into_iter() - .take_while(|num_bits: &usize| compute_table_size(*num_bits) < table_size_limit) + .take_while(|num_bits: &usize| { + compute_table_size(*num_bits) < table_size_limit + }) .last() - .expect(&format!("Per thread memory is too small: {}", per_thread_memory_budget)); + .expect(&format!( + "Per thread memory is too small: {}", + per_thread_memory_budget + )); let table_size = compute_table_size(table_num_bits); let heap_size = per_thread_memory_budget - table_size; (heap_size, table_num_bits) @@ -174,13 +179,10 @@ impl<'a> HashMap<'a> { } pub fn iter<'b: 'a>(&'b self) -> impl Iterator + 'b { - self.occupied - .iter() - .cloned() - .map(move |bucket: usize| { - let kv = self.table[bucket]; - self.get_key_value(kv.key_value_addr) - }) + self.occupied.iter().cloned().map(move |bucket: usize| { + let kv = self.table[bucket]; + self.get_key_value(kv.key_value_addr) + }) } @@ -282,8 +284,10 @@ mod tests { let s1 = "abcdef"; let s2 = "abcdeg"; for i in 0..5 { - assert_eq!(murmurhash2(&s1[i..5].as_bytes()), - murmurhash2(&s2[i..5].as_bytes())); + assert_eq!( + murmurhash2(&s1[i..5].as_bytes()), + murmurhash2(&s2[i..5].as_bytes()) + ); } } @@ -303,13 +307,13 @@ mod tests { let keys: Vec<&'static str> = vec!["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "]; b.iter(|| { - keys.iter() - .map(|&s| s.as_bytes()) - .map(murmurhash2::murmurhash2) - .map(|h| h as u64) - .last() - .unwrap() - }); + keys.iter() + .map(|&s| s.as_bytes()) + .map(murmurhash2::murmurhash2) + .map(|h| h as u64) + .last() + .unwrap() + }); } diff --git a/src/datastruct/stacker/heap.rs b/src/datastruct/stacker/heap.rs index 9d7a8f885..0bfd01fc2 100644 --- a/src/datastruct/stacker/heap.rs +++ b/src/datastruct/stacker/heap.rs @@ -144,7 +144,8 @@ impl InnerHeap { addr } else { if self.next_heap.is_none() { - info!(r#"Exceeded heap size. The segment will be committed right after indexing this document."#,); + info!(r#"Exceeded heap size. The segment will be committed right + after indexing this document."#,); self.next_heap = Some(Box::new(InnerHeap::with_capacity(self.buffer_len as usize))); } self.next_heap.as_mut().unwrap().allocate_space(num_bytes) + self.buffer_len @@ -154,10 +155,9 @@ impl InnerHeap { fn get_slice(&self, bytes_ref: BytesRef) -> &[u8] { let start = bytes_ref.0; if start >= self.buffer_len { - self.next_heap - .as_ref() - .unwrap() - .get_slice(BytesRef(start - self.buffer_len)) + self.next_heap.as_ref().unwrap().get_slice(BytesRef( + start - self.buffer_len, + )) } else { let start = start as usize; let len = NativeEndian::read_u16(&self.buffer[start..start + 2]) as usize; @@ -167,10 +167,10 @@ impl InnerHeap { fn get_mut_slice(&mut self, start: u32, stop: u32) -> &mut [u8] { if start >= self.buffer_len { - self.next_heap - .as_mut() - .unwrap() - .get_mut_slice(start - self.buffer_len, stop - self.buffer_len) + self.next_heap.as_mut().unwrap().get_mut_slice( + start - self.buffer_len, + stop - self.buffer_len, + ) } else { &mut self.buffer[start as usize..stop as usize] } @@ -188,10 +188,9 @@ impl InnerHeap { fn get_mut(&mut self, addr: u32) -> *mut u8 { if addr >= self.buffer_len { - self.next_heap - .as_mut() - .unwrap() - .get_mut(addr - self.buffer_len) + self.next_heap.as_mut().unwrap().get_mut( + addr - self.buffer_len, + ) } else { let addr_isize = addr as isize; unsafe { self.buffer.as_mut_ptr().offset(addr_isize) } @@ -200,10 +199,9 @@ impl InnerHeap { fn get_mut_ref(&mut self, addr: u32) -> &mut Item { if addr >= self.buffer_len { - self.next_heap - .as_mut() - .unwrap() - .get_mut_ref(addr - self.buffer_len) + self.next_heap.as_mut().unwrap().get_mut_ref( + addr - self.buffer_len, + ) } else { let v_ptr_u8 = self.get_mut(addr) as *mut u8; let v_ptr = v_ptr_u8 as *mut Item; @@ -213,10 +211,10 @@ impl InnerHeap { pub fn set(&mut self, addr: u32, val: &Item) { if addr >= self.buffer_len { - self.next_heap - .as_mut() - .unwrap() - .set(addr - self.buffer_len, val); + self.next_heap.as_mut().unwrap().set( + addr - self.buffer_len, + val, + ); } else { let v_ptr: *const Item = val as *const Item; let v_ptr_u8: *const u8 = v_ptr as *const u8; diff --git a/src/directory/error.rs b/src/directory/error.rs index d864012ea..73424f2e0 100644 --- a/src/directory/error.rs +++ b/src/directory/error.rs @@ -103,9 +103,11 @@ impl fmt::Display for OpenWriteError { write!(f, "the file '{:?}' already exists", path) } OpenWriteError::IOError(ref err) => { - write!(f, - "an io error occurred while opening a file for writing: '{}'", - err) + write!( + f, + "an io error occurred while opening a file for writing: '{}'", + err + ) } } } @@ -147,9 +149,11 @@ impl fmt::Display for OpenReadError { write!(f, "the file '{:?}' does not exist", path) } OpenReadError::IOError(ref err) => { - write!(f, - "an io error occurred while opening a file for reading: '{}'", - err) + write!( + f, + "an io error occurred while opening a file for reading: '{}'", + err + ) } } } diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index 5f4e7e773..8005c62b4 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -45,10 +45,9 @@ pub struct FileProtection { } fn unprotect_file_from_delete(directory: &ManagedDirectory, path: &Path) { - let mut meta_informations_wlock = directory - .meta_informations - .write() - .expect("Managed file lock poisoned"); + let mut meta_informations_wlock = directory.meta_informations.write().expect( + "Managed file lock poisoned", + ); if let Some(counter_ref_mut) = meta_informations_wlock.protected_files.get_mut(path) { (*counter_ref_mut) -= 1; } @@ -68,9 +67,10 @@ impl Drop for FileProtection { /// Saves the file containing the list of existing files /// that were created by tantivy. -fn save_managed_paths(directory: &mut Directory, - wlock: &RwLockWriteGuard) - -> io::Result<()> { +fn save_managed_paths( + directory: &mut Directory, + wlock: &RwLockWriteGuard, +) -> io::Result<()> { let mut w = serde_json::to_vec(&wlock.managed_paths)?; write!(&mut w, "\n")?; directory.atomic_write(&MANAGED_FILEPATH, &w[..])?; @@ -84,22 +84,22 @@ impl ManagedDirectory { Ok(data) => { let managed_files_json = String::from_utf8_lossy(&data); let managed_files: HashSet = - serde_json::from_str(&managed_files_json) - .chain_err(|| ErrorKind::CorruptedFile(MANAGED_FILEPATH.clone()))?; + serde_json::from_str(&managed_files_json).chain_err(|| { + ErrorKind::CorruptedFile(MANAGED_FILEPATH.clone()) + })?; Ok(ManagedDirectory { - directory: box directory, - meta_informations: Arc::new(RwLock::new(MetaInformation { - managed_paths: managed_files, - protected_files: - HashMap::default(), - })), - }) + directory: box directory, + meta_informations: Arc::new(RwLock::new(MetaInformation { + managed_paths: managed_files, + protected_files: HashMap::default(), + })), + }) } Err(OpenReadError::FileDoesNotExist(_)) => { Ok(ManagedDirectory { - directory: box directory, - meta_informations: Arc::default(), - }) + directory: box directory, + meta_informations: Arc::default(), + }) } Err(OpenReadError::IOError(e)) => Err(From::from(e)), } @@ -116,15 +116,14 @@ impl ManagedDirectory { /// If a file cannot be deleted (for permission reasons for instance) /// an error is simply logged, and the file remains in the list of managed /// files. - pub fn garbage_collect HashSet >(&mut self, get_living_files: L) { + pub fn garbage_collect HashSet>(&mut self, get_living_files: L) { info!("Garbage collect"); let mut files_to_delete = vec![]; { // releasing the lock as .delete() will use it too. - let meta_informations_rlock = - self.meta_informations - .read() - .expect("Managed directory rlock poisoned in garbage collect."); + let meta_informations_rlock = self.meta_informations.read().expect( + "Managed directory rlock poisoned in garbage collect.", + ); // It is crucial to get the living files after acquiring the // read lock of meta informations. That way, we @@ -177,9 +176,9 @@ impl ManagedDirectory { if !deleted_files.is_empty() { // update the list of managed files by removing // the file that were removed. - let mut meta_informations_wlock = self.meta_informations - .write() - .expect("Managed directory wlock poisoned (2)."); + let mut meta_informations_wlock = self.meta_informations.write().expect( + "Managed directory wlock poisoned (2).", + ); { let managed_paths_write = &mut meta_informations_wlock.managed_paths; for delete_file in &deleted_files { @@ -202,13 +201,13 @@ impl ManagedDirectory { pub fn protect_file_from_delete(&self, path: &Path) -> FileProtection { let pathbuf = path.to_owned(); { - let mut meta_informations_wlock = self.meta_informations - .write() - .expect("Managed file lock poisoned on protect"); + let mut meta_informations_wlock = self.meta_informations.write().expect( + "Managed file lock poisoned on protect", + ); *meta_informations_wlock - .protected_files - .entry(pathbuf.clone()) - .or_insert(0) += 1; + .protected_files + .entry(pathbuf.clone()) + .or_insert(0) += 1; } FileProtection { directory: self.clone(), @@ -224,9 +223,9 @@ impl ManagedDirectory { /// will not lead to garbage files that will /// never get removed. fn register_file_as_managed(&mut self, filepath: &Path) -> io::Result<()> { - let mut meta_wlock = self.meta_informations - .write() - .expect("Managed file lock poisoned"); + let mut meta_wlock = self.meta_informations.write().expect( + "Managed file lock poisoned", + ); let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned()); if has_changed { save_managed_paths(self.directory.as_mut(), &meta_wlock)?; @@ -241,8 +240,9 @@ impl Directory for ManagedDirectory { } fn open_write(&mut self, path: &Path) -> result::Result { - self.register_file_as_managed(path) - .map_err(|e| IOError::with_path(path.to_owned(), e))?; + self.register_file_as_managed(path).map_err(|e| { + IOError::with_path(path.to_owned(), e) + })?; self.directory.open_write(path) } @@ -257,9 +257,9 @@ impl Directory for ManagedDirectory { fn delete(&self, path: &Path) -> result::Result<(), DeleteError> { { - let metas_rlock = self.meta_informations - .read() - .expect("poisoned lock in managed directory meta"); + let metas_rlock = self.meta_informations.read().expect( + "poisoned lock in managed directory meta", + ); if let Some(counter) = metas_rlock.protected_files.get(path) { if *counter > 0 { return Err(DeleteError::FileProtected(path.to_owned())); @@ -327,7 +327,7 @@ mod tests { { let living_files: HashSet = [TEST_PATH1.to_owned()].into_iter().cloned().collect(); - managed_directory.garbage_collect(|| { living_files }); + managed_directory.garbage_collect(|| living_files); } { assert!(managed_directory.exists(*TEST_PATH1)); @@ -343,7 +343,7 @@ mod tests { } { let living_files: HashSet = HashSet::new(); - managed_directory.garbage_collect(|| { living_files }); + managed_directory.garbage_collect(|| living_files); } { assert!(!managed_directory.exists(*TEST_PATH1)); @@ -366,7 +366,7 @@ mod tests { assert!(managed_directory.exists(*TEST_PATH1)); let _mmap_read = managed_directory.open_read(*TEST_PATH1).unwrap(); - managed_directory.garbage_collect(|| { living_files.clone() }); + managed_directory.garbage_collect(|| living_files.clone()); if cfg!(target_os = "windows") { // On Windows, gc should try and fail the file as it is mmapped. assert!(managed_directory.exists(*TEST_PATH1)); @@ -374,7 +374,7 @@ mod tests { drop(_mmap_read); // The file should still be in the list of managed file and // eventually be deleted once mmap is released. - managed_directory.garbage_collect(|| { living_files }); + managed_directory.garbage_collect(|| living_files); assert!(!managed_directory.exists(*TEST_PATH1)); } else { assert!(!managed_directory.exists(*TEST_PATH1)); @@ -398,11 +398,11 @@ mod tests { { let _file_protection = managed_directory.protect_file_from_delete(*TEST_PATH1); - managed_directory.garbage_collect(|| { living_files.clone() }); + managed_directory.garbage_collect(|| living_files.clone()); assert!(managed_directory.exists(*TEST_PATH1)); } - managed_directory.garbage_collect(|| { living_files.clone() }); + managed_directory.garbage_collect(|| living_files.clone()); assert!(!managed_directory.exists(*TEST_PATH1)); diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index a3d5748b8..970b987cb 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -24,15 +24,17 @@ use std::sync::Weak; use tempdir::TempDir; fn open_mmap(full_path: &PathBuf) -> result::Result>, OpenReadError> { - let file = File::open(&full_path) - .map_err(|e| if e.kind() == io::ErrorKind::NotFound { - OpenReadError::FileDoesNotExist(full_path.clone()) - } else { - OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e)) - })?; + let file = File::open(&full_path).map_err(|e| if e.kind() == + io::ErrorKind::NotFound + { + OpenReadError::FileDoesNotExist(full_path.clone()) + } else { + OpenReadError::IOError(IOError::with_path(full_path.to_owned(), e)) + })?; - let meta_data = file.metadata() - .map_err(|e| IOError::with_path(full_path.to_owned(), e))?; + let meta_data = file.metadata().map_err(|e| { + IOError::with_path(full_path.to_owned(), e) + })?; if meta_data.len() == 0 { // if the file size is 0, it will not be possible // to mmap the file, so we return an anonymous mmap_cache @@ -46,7 +48,7 @@ fn open_mmap(full_path: &PathBuf) -> result::Result>, OpenReadE } -#[derive(Default,Clone,Debug,Serialize,Deserialize)] +#[derive(Default, Clone, Debug, Serialize, Deserialize)] pub struct CacheCounters { // Number of time the cache prevents to call `mmap` pub hit: usize, @@ -58,7 +60,7 @@ pub struct CacheCounters { pub miss_weak: usize, } -#[derive(Clone,Debug,Serialize,Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct CacheInfo { pub counters: CacheCounters, pub mmapped: Vec, @@ -113,31 +115,31 @@ impl MmapCache { self.cleanup(); } Ok(match self.cache.entry(full_path.clone()) { - HashMapEntry::Occupied(mut occupied_entry) => { - if let Some(mmap_arc) = occupied_entry.get().upgrade() { - self.counters.hit += 1; - Some(mmap_arc.clone()) - } else { - // The entry exists but the weak ref has been destroyed. - self.counters.miss_weak += 1; - if let Some(mmap_arc) = open_mmap(&full_path)? { - occupied_entry.insert(Arc::downgrade(&mmap_arc)); - Some(mmap_arc) - } else { - None - } - } - } - HashMapEntry::Vacant(vacant_entry) => { - self.counters.miss_empty += 1; - if let Some(mmap_arc) = open_mmap(&full_path)? { - vacant_entry.insert(Arc::downgrade(&mmap_arc)); - Some(mmap_arc) - } else { - None - } - } - }) + HashMapEntry::Occupied(mut occupied_entry) => { + if let Some(mmap_arc) = occupied_entry.get().upgrade() { + self.counters.hit += 1; + Some(mmap_arc.clone()) + } else { + // The entry exists but the weak ref has been destroyed. + self.counters.miss_weak += 1; + if let Some(mmap_arc) = open_mmap(&full_path)? { + occupied_entry.insert(Arc::downgrade(&mmap_arc)); + Some(mmap_arc) + } else { + None + } + } + } + HashMapEntry::Vacant(vacant_entry) => { + self.counters.miss_empty += 1; + if let Some(mmap_arc) = open_mmap(&full_path)? { + vacant_entry.insert(Arc::downgrade(&mmap_arc)); + Some(mmap_arc) + } else { + None + } + } + }) } } @@ -180,15 +182,19 @@ impl MmapDirectory { /// exist or if it is not a directory. pub fn open(directory_path: &Path) -> Result { if !directory_path.exists() { - Err(OpenDirectoryError::DoesNotExist(PathBuf::from(directory_path))) + Err(OpenDirectoryError::DoesNotExist( + PathBuf::from(directory_path), + )) } else if !directory_path.is_dir() { - Err(OpenDirectoryError::NotADirectory(PathBuf::from(directory_path))) + Err(OpenDirectoryError::NotADirectory( + PathBuf::from(directory_path), + )) } else { Ok(MmapDirectory { - root_path: PathBuf::from(directory_path), - mmap_cache: Arc::new(RwLock::new(MmapCache::default())), - _temp_directory: Arc::new(None), - }) + root_path: PathBuf::from(directory_path), + mmap_cache: Arc::new(RwLock::new(MmapCache::default())), + _temp_directory: Arc::new(None), + }) } } @@ -215,9 +221,9 @@ impl MmapDirectory { use std::os::windows::fs::OpenOptionsExt; use winapi::winbase; - open_opts - .write(true) - .custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS); + open_opts.write(true).custom_flags( + winbase::FILE_FLAG_BACKUP_SEMANTICS, + ); } let fd = try!(open_opts.open(&self.root_path)); @@ -270,46 +276,50 @@ impl Directory for MmapDirectory { debug!("Open Read {:?}", path); let full_path = self.resolve_path(path); - let mut mmap_cache = self.mmap_cache - .write() - .map_err(|_| { - let msg = format!("Failed to acquired write lock \ + let mut mmap_cache = self.mmap_cache.write().map_err(|_| { + let msg = format!( + "Failed to acquired write lock \ on mmap cache while reading {:?}", - path); - IOError::with_path(path.to_owned(), make_io_err(msg)) - })?; + path + ); + IOError::with_path(path.to_owned(), make_io_err(msg)) + })?; - Ok(mmap_cache - .get_mmap(full_path)? - .map(MmapReadOnly::from) - .map(ReadOnlySource::Mmap) - .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty()))) + Ok( + mmap_cache + .get_mmap(full_path)? + .map(MmapReadOnly::from) + .map(ReadOnlySource::Mmap) + .unwrap_or_else(|| ReadOnlySource::Anonymous(SharedVecSlice::empty())), + ) } fn open_write(&mut self, path: &Path) -> Result { debug!("Open Write {:?}", path); let full_path = self.resolve_path(path); - let open_res = OpenOptions::new() - .write(true) - .create_new(true) - .open(full_path); + let open_res = OpenOptions::new().write(true).create_new(true).open( + full_path, + ); - let mut file = open_res - .map_err(|err| if err.kind() == io::ErrorKind::AlreadyExists { - OpenWriteError::FileAlreadyExists(path.to_owned()) - } else { - IOError::with_path(path.to_owned(), err).into() - })?; + let mut file = open_res.map_err(|err| if err.kind() == + io::ErrorKind::AlreadyExists + { + OpenWriteError::FileAlreadyExists(path.to_owned()) + } else { + IOError::with_path(path.to_owned(), err).into() + })?; // making sure the file is created. - file.flush() - .map_err(|e| IOError::with_path(path.to_owned(), e))?; + file.flush().map_err( + |e| IOError::with_path(path.to_owned(), e), + )?; // Apparetntly, on some filesystem syncing the parent // directory is required. - self.sync_directory() - .map_err(|e| IOError::with_path(path.to_owned(), e))?; + self.sync_directory().map_err(|e| { + IOError::with_path(path.to_owned(), e) + })?; let writer = SafeFileWriter::new(file); Ok(BufWriter::new(Box::new(writer))) @@ -318,22 +328,23 @@ impl Directory for MmapDirectory { fn delete(&self, path: &Path) -> result::Result<(), DeleteError> { debug!("Deleting file {:?}", path); let full_path = self.resolve_path(path); - let mut mmap_cache = self.mmap_cache - .write() - .map_err(|_| { - let msg = format!("Failed to acquired write lock \ + let mut mmap_cache = self.mmap_cache.write().map_err(|_| { + let msg = format!( + "Failed to acquired write lock \ on mmap cache while deleting {:?}", - path); - IOError::with_path(path.to_owned(), make_io_err(msg)) - })?; + path + ); + IOError::with_path(path.to_owned(), make_io_err(msg)) + })?; // Removing the entry in the MMap cache. // The munmap will appear on Drop, // when the last reference is gone. mmap_cache.cache.remove(&full_path); match fs::remove_file(&full_path) { Ok(_) => { - self.sync_directory() - .map_err(|e| IOError::with_path(path.to_owned(), e).into()) + self.sync_directory().map_err(|e| { + IOError::with_path(path.to_owned(), e).into() + }) } Err(e) => { if e.kind() == io::ErrorKind::NotFound { @@ -355,8 +366,9 @@ impl Directory for MmapDirectory { let mut buffer = Vec::new(); match File::open(&full_path) { Ok(mut file) => { - file.read_to_end(&mut buffer) - .map_err(|e| IOError::with_path(path.to_owned(), e))?; + file.read_to_end(&mut buffer).map_err(|e| { + IOError::with_path(path.to_owned(), e) + })?; Ok(buffer) } Err(e) => { diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 0f205c6f1..ca23bc07c 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -41,8 +41,10 @@ impl VecWriter { impl Drop for VecWriter { fn drop(&mut self) { if !self.is_flushed { - panic!("You forgot to flush {:?} before its writter got Drop. Do not rely on drop.", - self.path) + panic!( + "You forgot to flush {:?} before its writter got Drop. Do not rely on drop.", + self.path + ) } } } @@ -62,8 +64,10 @@ impl Write for VecWriter { fn flush(&mut self) -> io::Result<()> { self.is_flushed = true; - try!(self.shared_directory - .write(self.path.clone(), self.data.get_ref())); + try!(self.shared_directory.write( + self.path.clone(), + self.data.get_ref(), + )); Ok(()) } } @@ -79,11 +83,11 @@ impl InnerDirectory { } fn write(&self, path: PathBuf, data: &[u8]) -> io::Result { - let mut map = try!(self.0 - .write() - .map_err(|_| { - make_io_err(format!("Failed to lock the directory, when trying to write {:?}", - path)) + let mut map = try!(self.0.write().map_err(|_| { + make_io_err(format!( + "Failed to lock the directory, when trying to write {:?}", + path + )) })); let prev_value = map.insert(path, Arc::new(Vec::from(data))); Ok(prev_value.is_some()) @@ -93,17 +97,21 @@ impl InnerDirectory { self.0 .read() .map_err(|_| { - let msg = format!("Failed to acquire read lock for the \ + let msg = format!( + "Failed to acquire read lock for the \ directory when trying to read {:?}", - path); - let io_err = make_io_err(msg); - OpenReadError::IOError(IOError::with_path(path.to_owned(), io_err)) - }) + path + ); + let io_err = make_io_err(msg); + OpenReadError::IOError(IOError::with_path(path.to_owned(), io_err)) + }) .and_then(|readable_map| { readable_map .get(path) .ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path))) - .map(|data| ReadOnlySource::Anonymous(SharedVecSlice::new(data.clone()))) + .map(|data| { + ReadOnlySource::Anonymous(SharedVecSlice::new(data.clone())) + }) }) } @@ -111,16 +119,18 @@ impl InnerDirectory { self.0 .write() .map_err(|_| { - let msg = format!("Failed to acquire write lock for the \ + let msg = format!( + "Failed to acquire write lock for the \ directory when trying to delete {:?}", - path); - let io_err = make_io_err(msg); - DeleteError::IOError(IOError::with_path(path.to_owned(), io_err)) - }) + path + ); + let io_err = make_io_err(msg); + DeleteError::IOError(IOError::with_path(path.to_owned(), io_err)) + }) .and_then(|mut writable_map| match writable_map.remove(path) { - Some(_) => Ok(()), - None => Err(DeleteError::FileDoesNotExist(PathBuf::from(path))), - }) + Some(_) => Ok(()), + None => Err(DeleteError::FileDoesNotExist(PathBuf::from(path))), + }) } fn exists(&self, path: &Path) -> bool { @@ -164,9 +174,11 @@ impl Directory for RAMDirectory { let path_buf = PathBuf::from(path); let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone()); - let exists = self.fs - .write(path_buf.clone(), &Vec::new()) - .map_err(|err| IOError::with_path(path.to_owned(), err))?; + let exists = self.fs.write(path_buf.clone(), &Vec::new()).map_err( + |err| { + IOError::with_path(path.to_owned(), err) + }, + )?; // force the creation of the file to mimic the MMap directory. if exists { diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index 32423ff96..9b1506217 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -114,7 +114,7 @@ impl From> for ReadOnlySource { /// Acts as a owning cursor over the data backed up by a ReadOnlySource pub(crate) struct SourceRead { _data_owner: ReadOnlySource, - cursor: &'static [u8] + cursor: &'static [u8], } impl SourceRead { @@ -131,7 +131,6 @@ impl AsRef<[u8]> for SourceRead { } impl From for SourceRead { - // Creates a new `SourceRead` from a given `ReadOnlySource` fn from(source: ReadOnlySource) -> SourceRead { let len = source.len(); diff --git a/src/error.rs b/src/error.rs index 8b345717d..7a2db9d2b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -112,12 +112,9 @@ impl From for Error { impl From for Error { fn from(error: OpenWriteError) -> Error { match error { - OpenWriteError::FileAlreadyExists(filepath) => { - ErrorKind::FileAlreadyExists(filepath) - } - OpenWriteError::IOError(io_error) => ErrorKind::IOError(io_error), - } - .into() + OpenWriteError::FileAlreadyExists(filepath) => ErrorKind::FileAlreadyExists(filepath), + OpenWriteError::IOError(io_error) => ErrorKind::IOError(io_error), + }.into() } } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index cce503f21..3e83f239d 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -100,8 +100,7 @@ mod tests { { let composite_file = CompositeFile::open(source).unwrap(); let field_source = composite_file.open_read(*FIELD).unwrap(); - let fast_field_reader: U64FastFieldReader = - U64FastFieldReader::open(field_source); + let fast_field_reader: U64FastFieldReader = U64FastFieldReader::open(field_source); assert_eq!(fast_field_reader.get(0), 13u64); assert_eq!(fast_field_reader.get(1), 14u64); assert_eq!(fast_field_reader.get(2), 2u64); @@ -190,9 +189,11 @@ mod tests { // forcing the amplitude to be high add_single_field_doc(&mut fast_field_writers, *FIELD, 0u64); for i in 0u64..10_000u64 { - add_single_field_doc(&mut fast_field_writers, - *FIELD, - 5_000_000_000_000_000_000u64 + i); + add_single_field_doc( + &mut fast_field_writers, + *FIELD, + 5_000_000_000_000_000_000u64 + i, + ); } fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); @@ -208,8 +209,10 @@ mod tests { assert_eq!(fast_field_reader.get(0), 0u64); for doc in 1..10_001 { - assert_eq!(fast_field_reader.get(doc), - 5_000_000_000_000_000_000u64 + doc as u64 - 1u64); + assert_eq!( + fast_field_reader.get(doc), + 5_000_000_000_000_000_000u64 + doc as u64 - 1u64 + ); } } } @@ -339,13 +342,13 @@ mod tests { fn bench_intfastfield_veclookup(b: &mut Bencher) { let permutation = generate_permutation(); b.iter(|| { - let n = test::black_box(1000u32); - let mut a = 0u64; - for _ in 0u32..n { - a = permutation[a as usize]; - } - a - }); + let n = test::black_box(1000u32); + let mut a = 0u64; + for _ in 0u32..n { + a = permutation[a as usize]; + } + a + }); } #[bench] @@ -403,13 +406,13 @@ mod tests { U64FastFieldReader::open(fast_fields_composite.open_read(*FIELD).unwrap()); b.iter(|| { - let n = test::black_box(1000u32); - let mut a = 0u32; - for _ in 0u32..n { - a = fast_field_reader.get(a) as u32; - } - a - }); + let n = test::black_box(1000u32); + let mut a = 0u32; + for _ in 0u32..n { + a = fast_field_reader.get(a) as u32; + } + a + }); } } } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 0c59cba05..8e37688e5 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -2,7 +2,7 @@ use directory::ReadOnlySource; use common::{self, BinarySerializable}; use common::bitpacker::{compute_num_bits, BitUnpacker}; use DocId; -use schema::{SchemaBuilder}; +use schema::SchemaBuilder; use std::path::Path; use schema::FAST; use directory::{WritePtr, RAMDirectory, Directory}; @@ -106,10 +106,10 @@ impl FastFieldReader for U64FastFieldReader { let amplitude: u64; { let mut cursor = data.as_slice(); - min_value = u64::deserialize(&mut cursor) - .expect("Failed to read the min_value of fast field."); - amplitude = u64::deserialize(&mut cursor) - .expect("Failed to read the amplitude of fast field."); + min_value = + u64::deserialize(&mut cursor).expect("Failed to read the min_value of fast field."); + amplitude = + u64::deserialize(&mut cursor).expect("Failed to read the amplitude of fast field."); } let max_value = min_value + amplitude; @@ -130,15 +130,14 @@ impl From> for U64FastFieldReader { let mut schema_builder = SchemaBuilder::default(); let field = schema_builder.add_u64_field("field", FAST); let schema = schema_builder.build(); - let path = Path::new("test"); + let path = Path::new("__dummy__"); let mut directory: RAMDirectory = RAMDirectory::create(); { - let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); - let mut serializer = FastFieldSerializer::from_write(write).unwrap(); + let write: WritePtr = directory.open_write(path).expect("With a RAMDirectory, this should never fail."); + let mut serializer = FastFieldSerializer::from_write(write).expect("With a RAMDirectory, this should never fail."); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); - // TODO Error not unwrap { - let fast_field_writer = fast_field_writers.get_field_writer(field).unwrap(); + let fast_field_writer = fast_field_writers.get_field_writer(field).expect("With a RAMDirectory, this should never fail."); for val in vals { fast_field_writer.add_val(val); } @@ -147,13 +146,12 @@ impl From> for U64FastFieldReader { serializer.close().unwrap(); } - let source = directory - .open_read(path) - .expect("Failed to open the file"); - let composite_file = CompositeFile::open(source) - .expect("Failed to read the composite file"); + let source = directory.open_read(path).expect("Failed to open the file"); + let composite_file = + CompositeFile::open(source).expect("Failed to read the composite file"); - let field_source = composite_file.open_read(field) + let field_source = composite_file + .open_read(field) .expect("File component not found"); U64FastFieldReader::open(field_source) } diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index ce2184fde..d26366de0 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -31,30 +31,22 @@ pub struct FastFieldSerializer { } impl FastFieldSerializer { - - /// Constructor pub fn from_write(write: WritePtr) -> io::Result { // just making room for the pointer to header. let composite_write = CompositeWrite::wrap(write); - Ok(FastFieldSerializer { - composite_write: composite_write, - }) + Ok(FastFieldSerializer { composite_write: composite_write }) } /// Start serializing a new u64 fast field - pub fn new_u64_fast_field(&mut self, - field: Field, - min_value: u64, - max_value: u64) - -> io::Result>> { - let field_write = self - .composite_write - .for_field(field); - FastSingleFieldSerializer::open( - field_write, - min_value, - max_value) + pub fn new_u64_fast_field( + &mut self, + field: Field, + min_value: u64, + max_value: u64, + ) -> io::Result>> { + let field_write = self.composite_write.for_field(field); + FastSingleFieldSerializer::open(field_write, min_value, max_value) } @@ -73,10 +65,11 @@ pub struct FastSingleFieldSerializer<'a, W: Write + 'a> { } impl<'a, W: Write> FastSingleFieldSerializer<'a, W> { - - fn open(write: &'a mut W, - min_value: u64, - max_value: u64) -> io::Result> { + fn open( + write: &'a mut W, + min_value: u64, + max_value: u64, + ) -> io::Result> { min_value.serialize(write)?; let amplitude = max_value - min_value; amplitude.serialize(write)?; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 1427a7b36..1750f90ca 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -58,9 +58,9 @@ impl FastFieldsWriter { /// Get the `FastFieldWriter` associated to a field. pub fn get_field_writer(&mut self, field: Field) -> Option<&mut IntFastFieldWriter> { // TODO optimize - self.field_writers - .iter_mut() - .find(|field_writer| field_writer.field == field) + self.field_writers.iter_mut().find(|field_writer| { + field_writer.field == field + }) } @@ -155,9 +155,9 @@ impl IntFastFieldWriter { /// associated to the document with the `DocId` n. /// (Well, `n-1` actually because of 0-indexing) pub fn add_val(&mut self, val: u64) { - VInt(val) - .serialize(&mut self.vals) - .expect("unable to serialize VInt to Vec"); + VInt(val).serialize(&mut self.vals).expect( + "unable to serialize VInt to Vec", + ); if val > self.val_max { self.val_max = val; diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index fc22dbc84..da09c49c9 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -40,9 +40,9 @@ impl DeleteQueue { { let mut delete_queue_wlock = delete_queue.inner.write().unwrap(); delete_queue_wlock.last_block = Some(Arc::new(Block { - operations: Arc::default(), - next: next_block, - })); + operations: Arc::default(), + next: next_block, + })); } delete_queue @@ -59,9 +59,11 @@ impl DeleteQueue { .expect("Read lock poisoned when opening delete queue cursor") .last_block .clone() - .expect("Failed to unwrap last_block. This should never happen + .expect( + "Failed to unwrap last_block. This should never happen as the Option<> is only here to make - initialization possible"); + initialization possible", + ); let operations_len = last_block.operations.len(); DeleteCursor { block: last_block, @@ -92,9 +94,9 @@ impl DeleteQueue { // be some unflushed operations. // fn flush(&self) -> Option> { - let mut self_wlock = self.inner - .write() - .expect("Failed to acquire write lock on delete queue writer"); + let mut self_wlock = self.inner.write().expect( + "Failed to acquire write lock on delete queue writer", + ); let delete_operations; { @@ -108,9 +110,9 @@ impl DeleteQueue { let next_block = NextBlock::from(self.clone()); { self_wlock.last_block = Some(Arc::new(Block { - operations: Arc::new(delete_operations), - next: next_block, - })); + operations: Arc::new(delete_operations), + next: next_block, + })); } self_wlock.last_block.clone() } @@ -132,18 +134,18 @@ impl From for NextBlock { impl NextBlock { fn next_block(&self) -> Option> { { - let next_read_lock = self.0 - .read() - .expect("Failed to acquire write lock in delete queue"); + let next_read_lock = self.0.read().expect( + "Failed to acquire write lock in delete queue", + ); if let InnerNextBlock::Closed(ref block) = *next_read_lock { return Some(block.clone()); } } let next_block; { - let mut next_write_lock = self.0 - .write() - .expect("Failed to acquire write lock in delete queue"); + let mut next_write_lock = self.0.write().expect( + "Failed to acquire write lock in delete queue", + ); match *next_write_lock { InnerNextBlock::Closed(ref block) => { return Some(block.clone()); diff --git a/src/indexer/doc_opstamp_mapping.rs b/src/indexer/doc_opstamp_mapping.rs index 03556ef17..e7d277f00 100644 --- a/src/indexer/doc_opstamp_mapping.rs +++ b/src/indexer/doc_opstamp_mapping.rs @@ -56,8 +56,10 @@ mod tests { #[test] fn test_doc_to_opstamp_mapping_none() { let doc_to_opstamp_mapping = DocToOpstampMapping::None; - assert_eq!(doc_to_opstamp_mapping.compute_doc_limit(1), - u32::max_value()); + assert_eq!( + doc_to_opstamp_mapping.compute_doc_limit(1), + u32::max_value() + ); } #[test] diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 26e6c6330..57acc00a7 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -102,14 +102,17 @@ impl !Sync for IndexWriter {} /// If the lockfile already exists, returns `Error::FileAlreadyExists`. /// # Panics /// If the heap size per thread is too small, panics. -pub fn open_index_writer(index: &Index, - num_threads: usize, - heap_size_in_bytes_per_thread: usize) - -> Result { +pub fn open_index_writer( + index: &Index, + num_threads: usize, + heap_size_in_bytes_per_thread: usize, +) -> Result { if heap_size_in_bytes_per_thread <= HEAP_SIZE_LIMIT as usize { - panic!(format!("The heap size per thread needs to be at least {}.", - HEAP_SIZE_LIMIT)); + panic!(format!( + "The heap size per thread needs to be at least {}.", + HEAP_SIZE_LIMIT + )); } let directory_lock = DirectoryLock::lock(index.directory().box_clone())?; @@ -156,12 +159,13 @@ pub fn open_index_writer(index: &Index, -pub fn compute_deleted_bitset(delete_bitset: &mut BitSet, - segment_reader: &SegmentReader, - delete_cursor: &mut DeleteCursor, - doc_opstamps: &DocToOpstampMapping, - target_opstamp: u64) - -> Result { +pub fn compute_deleted_bitset( + delete_bitset: &mut BitSet, + segment_reader: &SegmentReader, + delete_cursor: &mut DeleteCursor, + doc_opstamps: &DocToOpstampMapping, + target_opstamp: u64, +) -> Result { let mut might_have_changed = false; @@ -177,9 +181,12 @@ pub fn compute_deleted_bitset(delete_bitset: &mut BitSet, // Limit doc helps identify the first document // that may be affected by the delete operation. let limit_doc = doc_opstamps.compute_doc_limit(delete_op.opstamp); - let inverted_index = segment_reader.inverted_index(delete_op.term.field())?; - if let Some(mut docset) = - inverted_index.read_postings(&delete_op.term, SegmentPostingsOption::NoFreq) { + let inverted_index = segment_reader.inverted_index(delete_op.term.field()); + if let Some(mut docset) = inverted_index.read_postings( + &delete_op.term, + SegmentPostingsOption::NoFreq, + ) + { while docset.advance() { let deleted_doc = docset.doc(); if deleted_doc < limit_doc { @@ -199,10 +206,11 @@ pub fn compute_deleted_bitset(delete_bitset: &mut BitSet, /// Advance delete for the given segment up /// to the target opstamp. -pub fn advance_deletes(mut segment: Segment, - segment_entry: &mut SegmentEntry, - target_opstamp: u64) - -> Result> { +pub fn advance_deletes( + mut segment: Segment, + segment_entry: &mut SegmentEntry, + target_opstamp: u64, +) -> Result> { let mut file_protect: Option = None; @@ -223,11 +231,13 @@ pub fn advance_deletes(mut segment: Segment, let delete_cursor = segment_entry.delete_cursor(); - compute_deleted_bitset(&mut delete_bitset, - &segment_reader, - delete_cursor, - &DocToOpstampMapping::None, - target_opstamp)?; + compute_deleted_bitset( + &mut delete_bitset, + &segment_reader, + delete_cursor, + &DocToOpstampMapping::None, + target_opstamp, + )?; for doc in 0u32..max_doc { if segment_reader.is_deleted(doc) { @@ -248,15 +258,16 @@ pub fn advance_deletes(mut segment: Segment, Ok(file_protect) } -fn index_documents(heap: &mut Heap, - table_size: usize, - segment: Segment, - schema: &Schema, - generation: usize, - document_iterator: &mut Iterator, - segment_updater: &mut SegmentUpdater, - mut delete_cursor: DeleteCursor) - -> Result { +fn index_documents( + heap: &mut Heap, + table_size: usize, + segment: Segment, + schema: &Schema, + generation: usize, + document_iterator: &mut Iterator, + segment_updater: &mut SegmentUpdater, + mut delete_cursor: DeleteCursor, +) -> Result { heap.clear(); let segment_id = segment.id(); let mut segment_writer = SegmentWriter::for_segment(heap, table_size, segment.clone(), schema)?; @@ -266,8 +277,10 @@ fn index_documents(heap: &mut Heap, // One is the memory arena dedicated to the segment is // getting full. if segment_writer.is_buffer_full() { - info!("Buffer limit reached, flushing segment with maxdoc={}.", - segment_writer.max_doc()); + info!( + "Buffer limit reached, flushing segment with maxdoc={}.", + segment_writer.max_doc() + ); break; } // The second is the term dictionary hash table @@ -276,8 +289,10 @@ fn index_documents(heap: &mut Heap, // Tantivy does not resize its hashtable. When it reaches // capacity, we just stop indexing new document. if segment_writer.is_term_saturated() { - info!("Term dic saturated, flushing segment with maxdoc={}.", - segment_writer.max_doc()); + info!( + "Term dic saturated, flushing segment with maxdoc={}.", + segment_writer.max_doc() + ); break; } } @@ -297,11 +312,13 @@ fn index_documents(heap: &mut Heap, let doc_to_opstamps = DocToOpstampMapping::from(doc_opstamps); let segment_reader = SegmentReader::open(segment)?; let mut deleted_bitset = BitSet::with_capacity(num_docs as usize); - let may_have_deletes = compute_deleted_bitset(&mut deleted_bitset, - &segment_reader, - &mut delete_cursor, - &doc_to_opstamps, - last_docstamp)?; + let may_have_deletes = compute_deleted_bitset( + &mut deleted_bitset, + &segment_reader, + &mut delete_cursor, + &doc_to_opstamps, + last_docstamp, + )?; let segment_entry = SegmentEntry::new(segment_meta, delete_cursor, { if may_have_deletes { @@ -328,14 +345,15 @@ impl IndexWriter { join_handle .join() .expect("Indexing Worker thread panicked") - .chain_err(|| ErrorKind::ErrorInThread("Error in indexing worker thread.".into()))?; + .chain_err(|| { + ErrorKind::ErrorInThread("Error in indexing worker thread.".into()) + })?; } drop(self.workers_join_handle); - let result = - self.segment_updater - .wait_merging_thread() - .chain_err(|| ErrorKind::ErrorInThread("Failed to join merging thread.".into())); + let result = self.segment_updater.wait_merging_thread().chain_err(|| { + ErrorKind::ErrorInThread("Failed to join merging thread.".into()) + }); if let Err(ref e) = result { error!("Some merging thread failed {:?}", e); @@ -348,8 +366,10 @@ impl IndexWriter { pub fn add_segment(&mut self, segment_meta: SegmentMeta) { let delete_cursor = self.delete_queue.cursor(); let segment_entry = SegmentEntry::new(segment_meta, delete_cursor, None); - self.segment_updater - .add_segment(self.generation, segment_entry); + self.segment_updater.add_segment( + self.generation, + segment_entry, + ); } #[doc(hidden)] @@ -373,7 +393,11 @@ impl IndexWriter { let mut delete_cursor = self.delete_queue.cursor(); let join_handle: JoinHandle> = thread::Builder::new() - .name(format!("indexing thread {} for gen {}", self.worker_id, generation)) + .name(format!( + "indexing thread {} for gen {}", + self.worker_id, + generation + )) .spawn(move || { loop { @@ -397,14 +421,16 @@ impl IndexWriter { return Ok(()); } let segment = segment_updater.new_segment(); - index_documents(&mut heap, - table_size, - segment, - &schema, - generation, - &mut document_iterator, - &mut segment_updater, - delete_cursor.clone())?; + index_documents( + &mut heap, + table_size, + segment, + &schema, + generation, + &mut document_iterator, + &mut segment_updater, + delete_cursor.clone(), + )?; } })?; @@ -437,9 +463,10 @@ impl IndexWriter { } /// Merges a given list of segments - pub fn merge(&mut self, - segment_ids: &[SegmentId]) - -> impl Future { + pub fn merge( + &mut self, + segment_ids: &[SegmentId], + ) -> impl Future { self.segment_updater.start_merge(segment_ids) } @@ -523,14 +550,15 @@ impl IndexWriter { self.recreate_document_channel(); let mut former_workers_join_handle = Vec::new(); - swap(&mut former_workers_join_handle, - &mut self.workers_join_handle); + swap( + &mut former_workers_join_handle, + &mut self.workers_join_handle, + ); for worker_handle in former_workers_join_handle { - let indexing_worker_result = - worker_handle - .join() - .map_err(|e| Error::from_kind(ErrorKind::ErrorInThread(format!("{:?}", e))))?; + let indexing_worker_result = worker_handle.join().map_err(|e| { + Error::from_kind(ErrorKind::ErrorInThread(format!("{:?}", e))) + })?; indexing_worker_result?; // add a new worker for the next generation. @@ -624,13 +652,17 @@ mod tests { let schema_builder = schema::SchemaBuilder::default(); let index = Index::create_in_ram(schema_builder.build()); let index_writer = index.writer(40_000_000).unwrap(); - assert_eq!(format!("{:?}", index_writer.get_merge_policy()), - "LogMergePolicy { min_merge_size: 8, min_layer_size: 10000, \ - level_log_size: 0.75 }"); + assert_eq!( + format!("{:?}", index_writer.get_merge_policy()), + "LogMergePolicy { min_merge_size: 8, min_layer_size: 10000, \ + level_log_size: 0.75 }" + ); let merge_policy = box NoMergePolicy::default(); index_writer.set_merge_policy(merge_policy); - assert_eq!(format!("{:?}", index_writer.get_merge_policy()), - "NoMergePolicy"); + assert_eq!( + format!("{:?}", index_writer.get_merge_policy()), + "NoMergePolicy" + ); } #[test] @@ -720,9 +752,9 @@ mod tests { } // this should create 8 segments and trigger a merge. index_writer.commit().expect("commit failed"); - index_writer - .wait_merging_threads() - .expect("waiting merging thread failed"); + index_writer.wait_merging_threads().expect( + "waiting merging thread failed", + ); index.load_searchers().unwrap(); assert_eq!(num_docs_containing("a"), 200); diff --git a/src/indexer/log_merge_policy.rs b/src/indexer/log_merge_policy.rs index 47f496998..c5e55d41c 100644 --- a/src/indexer/log_merge_policy.rs +++ b/src/indexer/log_merge_policy.rs @@ -62,7 +62,9 @@ impl MergePolicy for LogMergePolicy { let size_sorted_log_tuples: Vec<_> = size_sorted_tuples .into_iter() - .map(|(ind, num_docs)| (ind, (self.clip_min_size(num_docs) as f64).log2())) + .map(|(ind, num_docs)| { + (ind, (self.clip_min_size(num_docs) as f64).log2()) + }) .collect(); let (first_ind, first_score) = size_sorted_log_tuples[0]; @@ -79,7 +81,9 @@ impl MergePolicy for LogMergePolicy { levels .iter() .filter(|level| level.len() >= self.min_merge_size) - .map(|ind_vec| MergeCandidate(ind_vec.iter().map(|&ind| segments[ind].id()).collect())) + .map(|ind_vec| { + MergeCandidate(ind_vec.iter().map(|&ind| segments[ind].id()).collect()) + }) .collect() } @@ -134,12 +138,14 @@ mod tests { #[test] fn test_log_merge_policy_levels() { // multiple levels all get merged correctly - let test_input = vec![seg_meta(10), - seg_meta(10), - seg_meta(10), - seg_meta(1000), - seg_meta(1000), - seg_meta(1000)]; + let test_input = vec![ + seg_meta(10), + seg_meta(10), + seg_meta(10), + seg_meta(1000), + seg_meta(1000), + seg_meta(1000), + ]; let result_list = test_merge_policy().compute_merge_candidates(&test_input); assert_eq!(result_list.len(), 2); } @@ -147,24 +153,28 @@ mod tests { #[test] fn test_log_merge_policy_within_levels() { // multiple levels all get merged correctly - let test_input = vec![seg_meta(10), - seg_meta(11), - seg_meta(12), - seg_meta(1000), - seg_meta(1000), - seg_meta(1000)]; + let test_input = vec![ + seg_meta(10), + seg_meta(11), + seg_meta(12), + seg_meta(1000), + seg_meta(1000), + seg_meta(1000), + ]; let result_list = test_merge_policy().compute_merge_candidates(&test_input); assert_eq!(result_list.len(), 2); } #[test] fn test_log_merge_policy_small_segments() { // multiple levels all get merged correctly - let test_input = vec![seg_meta(1), - seg_meta(1), - seg_meta(1), - seg_meta(2), - seg_meta(2), - seg_meta(2)]; + let test_input = vec![ + seg_meta(1), + seg_meta(1), + seg_meta(1), + seg_meta(2), + seg_meta(2), + seg_meta(2), + ]; let result_list = test_merge_policy().compute_merge_candidates(&test_input); assert_eq!(result_list.len(), 1); } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 8e26784da..594f952d5 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -28,10 +28,11 @@ pub struct IndexMerger { } -fn compute_min_max_val(u64_reader: &U64FastFieldReader, - max_doc: DocId, - delete_bitset: &DeleteBitSet) - -> Option<(u64, u64)> { +fn compute_min_max_val( + u64_reader: &U64FastFieldReader, + max_doc: DocId, + delete_bitset: &DeleteBitSet, +) -> Option<(u64, u64)> { if max_doc == 0 { None } else if !delete_bitset.has_deletes() { @@ -49,17 +50,18 @@ fn compute_min_max_val(u64_reader: &U64FastFieldReader, } } -fn extract_fieldnorm_reader(segment_reader: &SegmentReader, - field: Field) - -> Option { +fn extract_fieldnorm_reader( + segment_reader: &SegmentReader, + field: Field, +) -> Option { segment_reader.get_fieldnorms_reader(field) } -fn extract_fast_field_reader(segment_reader: &SegmentReader, - field: Field) - -> Option { - segment_reader.get_fast_field_reader(field) - .ok() +fn extract_fast_field_reader( + segment_reader: &SegmentReader, + field: Field, +) -> Option { + segment_reader.get_fast_field_reader(field).ok() } @@ -100,10 +102,10 @@ impl IndexMerger { } } Ok(IndexMerger { - schema: schema, - readers: readers, - max_doc: max_doc, - }) + schema: schema, + readers: readers, + max_doc: max_doc, + }) } fn write_fieldnorms(&self, fast_field_serializer: &mut FastFieldSerializer) -> Result<()> { @@ -114,9 +116,11 @@ impl IndexMerger { .filter(|&(_, field_entry)| field_entry.is_indexed()) .map(|(field_id, _)| Field(field_id as u32)) .collect(); - self.generic_write_fast_field(fieldnorm_fastfields, - &extract_fieldnorm_reader, - fast_field_serializer) + self.generic_write_fast_field( + fieldnorm_fastfields, + &extract_fieldnorm_reader, + fast_field_serializer, + ) } fn write_fast_fields(&self, fast_field_serializer: &mut FastFieldSerializer) -> Result<()> { @@ -127,19 +131,21 @@ impl IndexMerger { .filter(|&(_, field_entry)| field_entry.is_int_fast()) .map(|(field_id, _)| Field(field_id as u32)) .collect(); - self.generic_write_fast_field(fast_fields, - &extract_fast_field_reader, - fast_field_serializer) + self.generic_write_fast_field( + fast_fields, + &extract_fast_field_reader, + fast_field_serializer, + ) } // used both to merge field norms and regular u64 fast fields. - fn generic_write_fast_field(&self, - fields: Vec, - field_reader_extractor: &Fn(&SegmentReader, Field) - -> Option, - fast_field_serializer: &mut FastFieldSerializer) - -> Result<()> { + fn generic_write_fast_field( + &self, + fields: Vec, + field_reader_extractor: &Fn(&SegmentReader, Field) -> Option, + fast_field_serializer: &mut FastFieldSerializer, + ) -> Result<()> { for field in fields { @@ -151,19 +157,25 @@ impl IndexMerger { match field_reader_extractor(reader, field) { Some(u64_reader) => { if let Some((seg_min_val, seg_max_val)) = - compute_min_max_val(&u64_reader, - reader.max_doc(), - reader.delete_bitset()) { + compute_min_max_val( + &u64_reader, + reader.max_doc(), + reader.delete_bitset(), + ) + { // the segment has some non-deleted documents min_val = min(min_val, seg_min_val); max_val = max(max_val, seg_max_val); - u64_readers - .push((reader.max_doc(), u64_reader, reader.delete_bitset())); + u64_readers.push(( + reader.max_doc(), + u64_reader, + reader.delete_bitset(), + )); } } None => { - let error_msg = format!("Failed to find a u64_reader for field {:?}", - field); + let error_msg = + format!("Failed to find a u64_reader for field {:?}", field); error!("{}", error_msg); bail!(ErrorKind::SchemaError(error_msg)); } @@ -179,8 +191,11 @@ impl IndexMerger { assert!(min_val <= max_val); - let mut fast_single_field_serializer = fast_field_serializer - .new_u64_fast_field(field, min_val, max_val)?; + let mut fast_single_field_serializer = fast_field_serializer.new_u64_fast_field( + field, + min_val, + max_val, + )?; for (max_doc, u64_reader, delete_bitset) in u64_readers { for doc_id in 0..max_doc { if !delete_bitset.is_deleted(doc_id) { @@ -199,9 +214,8 @@ impl IndexMerger { let mut delta_computer = DeltaComputer::new(); - let mut indexed_fields = vec!(); + let mut indexed_fields = vec![]; for (field_ord, field_entry) in self.schema.fields().iter().enumerate() { - // if field_entry if field_entry.is_indexed() { indexed_fields.push(Field(field_ord as u32)); } @@ -211,9 +225,8 @@ impl IndexMerger { let field_readers = self.readers .iter() - .map(|reader| - reader.inverted_index(indexed_field)) - .collect::>>()?; + .map(|reader| reader.inverted_index(indexed_field)) + .collect::>(); let field_term_streams = field_readers .iter() @@ -224,7 +237,8 @@ impl IndexMerger { let mut max_doc = 0; // map from segment doc ids to the resulting merged segment doc id. - let mut merged_doc_id_map: Vec>> = Vec::with_capacity(self.readers.len()); + let mut merged_doc_id_map: Vec>> = + Vec::with_capacity(self.readers.len()); for reader in &self.readers { let mut segment_local_map = Vec::with_capacity(reader.max_doc() as usize); @@ -258,8 +272,10 @@ impl IndexMerger { let segment_postings_option = field_entry .field_type() .get_segment_postings_option() - .expect("Encountered a field that is not supposed to be - indexed. Have you modified the schema?"); + .expect( + "Encountered a field that is not supposed to be + indexed. Have you modified the schema?", + ); while merged_terms.advance() { @@ -273,9 +289,11 @@ impl IndexMerger { let segment_ord = heap_item.segment_ord; let term_info = heap_item.streamer.value(); let segment_reader = &self.readers[heap_item.segment_ord]; - let inverted_index = segment_reader.inverted_index(term.field()).unwrap(); // TODO fix unwrap - let mut segment_postings = inverted_index - .read_postings_from_terminfo(term_info, segment_postings_option); + let inverted_index = segment_reader.inverted_index(term.field()); + let mut segment_postings = inverted_index.read_postings_from_terminfo( + term_info, + segment_postings_option, + ); if segment_postings.advance() { Some((segment_ord, segment_postings)) } else { @@ -304,14 +322,18 @@ impl IndexMerger { // `.advance()` has been called once before the loop. // Hence we cannot use a `while segment_postings.advance()` loop. if let Some(remapped_doc_id) = - old_to_new_doc_id[segment_postings.doc() as usize] { + old_to_new_doc_id[segment_postings.doc() as usize] + { // we make sure to only write the term iff // there is at least one document. let positions: &[u32] = segment_postings.positions(); let term_freq = segment_postings.term_freq(); let delta_positions = delta_computer.compute_delta(positions); - field_serializer - .write_doc(remapped_doc_id, term_freq, delta_positions)?; + field_serializer.write_doc( + remapped_doc_id, + term_freq, + delta_positions, + )?; } if !segment_postings.advance() { break; @@ -349,8 +371,12 @@ impl IndexMerger { impl SerializableSegment for IndexMerger { fn write(&self, mut serializer: SegmentSerializer) -> Result { self.write_postings(serializer.get_postings_serializer())?; - self.write_fieldnorms(serializer.get_fieldnorms_serializer())?; - self.write_fast_fields(serializer.get_fast_field_serializer())?; + self.write_fieldnorms( + serializer.get_fieldnorms_serializer(), + )?; + self.write_fast_fields( + serializer.get_fast_field_serializer(), + )?; self.write_storable_fields(serializer.get_store_writer())?; serializer.close()?; Ok(self.max_doc) @@ -429,14 +455,13 @@ mod tests { } } { - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); + let segment_ids = index.searchable_segment_ids().expect( + "Searchable segments failed.", + ); let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - index_writer - .merge(&segment_ids) - .wait() - .expect("Merging failed"); + index_writer.merge(&segment_ids).wait().expect( + "Merging failed", + ); index_writer.wait_merging_threads().unwrap(); } { @@ -449,14 +474,22 @@ mod tests { collector.docs() }; { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "a")]), - vec![1, 2, 4]); - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "af")]), - vec![0, 3]); - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "g")]), - vec![4]); - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "b")]), - vec![0, 1, 2, 3, 4]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "a")]), + vec![1, 2, 4] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "af")]), + vec![0, 3] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "g")]), + vec![4] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "b")]), + vec![0, 1, 2, 3, 4] + ); } { let doc = searcher.doc(&DocAddress(0, 0)).unwrap(); @@ -485,8 +518,10 @@ mod tests { assert!(searcher.search(&query, &mut collector).is_ok()); collector.vals() }; - assert_eq!(get_fast_vals(vec![Term::from_field_text(text_field, "a")]), - vec![5, 7, 13]); + assert_eq!( + get_fast_vals(vec![Term::from_field_text(text_field, "a")]), + vec![5, 7, 13] + ); } } } @@ -533,14 +568,22 @@ mod tests { assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].max_doc(), 3); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), - vec![1]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), - vec![1]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), - vec![3]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), - vec![1, 3]); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "a")), + vec![1] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "b")), + vec![1] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "c")), + vec![3] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "d")), + vec![1, 3] + ); } { // a second commit @@ -572,20 +615,34 @@ mod tests { assert_eq!(searcher.segment_readers()[0].max_doc(), 3); assert_eq!(searcher.segment_readers()[1].num_docs(), 2); assert_eq!(searcher.segment_readers()[1].max_doc(), 4); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), - vec![3]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), - vec![3]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), - vec![6_000]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), - vec![6_000, 7_000]); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "a")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "b")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "c")), + vec![3] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "d")), + vec![3] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "e")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "f")), + vec![6_000] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "g")), + vec![6_000, 7_000] + ); let score_field_reader: U64FastFieldReader = searcher .segment_reader(0) @@ -603,33 +660,46 @@ mod tests { } { // merging the segments - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - index_writer - .merge(&segment_ids) - .wait() - .expect("Merging failed"); + let segment_ids = index.searchable_segment_ids().expect( + "Searchable segments failed.", + ); + index_writer.merge(&segment_ids).wait().expect( + "Merging failed", + ); index.load_searchers().unwrap(); let ref searcher = *index.searcher(); assert_eq!(searcher.segment_readers().len(), 1); assert_eq!(searcher.num_docs(), 3); assert_eq!(searcher.segment_readers()[0].num_docs(), 3); assert_eq!(searcher.segment_readers()[0].max_doc(), 3); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), - vec![3]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), - vec![3]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), - vec![6_000]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), - vec![6_000, 7_000]); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "a")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "b")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "c")), + vec![3] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "d")), + vec![3] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "e")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "f")), + vec![6_000] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "g")), + vec![6_000, 7_000] + ); let score_field_reader: U64FastFieldReader = searcher .segment_reader(0) .get_fast_field_reader(score_field) @@ -648,20 +718,34 @@ mod tests { assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].max_doc(), 3); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), - vec![6_000]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), - vec![6_000, 7_000]); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "a")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "b")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "c")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "d")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "e")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "f")), + vec![6_000] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "g")), + vec![6_000, 7_000] + ); let score_field_reader: U64FastFieldReader = searcher .segment_reader(0) .get_fast_field_reader(score_field) @@ -671,13 +755,12 @@ mod tests { } { // Test merging a single segment in order to remove deletes. - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - index_writer - .merge(&segment_ids) - .wait() - .expect("Merging failed"); + let segment_ids = index.searchable_segment_ids().expect( + "Searchable segments failed.", + ); + index_writer.merge(&segment_ids).wait().expect( + "Merging failed", + ); index.load_searchers().unwrap(); let ref searcher = *index.searcher(); @@ -685,20 +768,34 @@ mod tests { assert_eq!(searcher.num_docs(), 2); assert_eq!(searcher.segment_readers()[0].num_docs(), 2); assert_eq!(searcher.segment_readers()[0].max_doc(), 2); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "a")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "b")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "c")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "d")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "e")), - empty_vec); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "f")), - vec![6_000]); - assert_eq!(search_term(&searcher, Term::from_field_text(text_field, "g")), - vec![6_000, 7_000]); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "a")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "b")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "c")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "d")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "e")), + empty_vec + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "f")), + vec![6_000] + ); + assert_eq!( + search_term(&searcher, Term::from_field_text(text_field, "g")), + vec![6_000, 7_000] + ); let score_field_reader: U64FastFieldReader = searcher .segment_reader(0) .get_fast_field_reader(score_field) @@ -710,13 +807,12 @@ mod tests { { // Test removing all docs index_writer.delete_term(Term::from_field_text(text_field, "g")); - let segment_ids = index - .searchable_segment_ids() - .expect("Searchable segments failed."); - index_writer - .merge(&segment_ids) - .wait() - .expect("Merging failed"); + let segment_ids = index.searchable_segment_ids().expect( + "Searchable segments failed.", + ); + index_writer.merge(&segment_ids).wait().expect( + "Merging failed", + ); index.load_searchers().unwrap(); let ref searcher = *index.searcher(); diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs index 082f9e1c1..9e8ad74a5 100644 --- a/src/indexer/segment_entry.rs +++ b/src/indexer/segment_entry.rs @@ -44,10 +44,11 @@ pub struct SegmentEntry { impl SegmentEntry { /// Create a new `SegmentEntry` - pub fn new(segment_meta: SegmentMeta, - delete_cursor: DeleteCursor, - delete_bitset: Option) - -> SegmentEntry { + pub fn new( + segment_meta: SegmentMeta, + delete_cursor: DeleteCursor, + delete_bitset: Option, + ) -> SegmentEntry { SegmentEntry { meta: segment_meta, state: SegmentState::Ready, diff --git a/src/indexer/segment_manager.rs b/src/indexer/segment_manager.rs index 7a37f3574..a040c2ed5 100644 --- a/src/indexer/segment_manager.rs +++ b/src/indexer/segment_manager.rs @@ -32,31 +32,36 @@ pub struct SegmentManager { impl Debug for SegmentManager { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { let lock = self.read(); - write!(f, - "{{ uncommitted: {:?}, committed: {:?} }}", - lock.uncommitted, - lock.committed) + write!( + f, + "{{ uncommitted: {:?}, committed: {:?} }}", + lock.uncommitted, + lock.committed + ) } } -pub fn get_mergeable_segments(segment_manager: &SegmentManager) - -> (Vec, Vec) { +pub fn get_mergeable_segments( + segment_manager: &SegmentManager, +) -> (Vec, Vec) { let registers_lock = segment_manager.read(); - (registers_lock.committed.get_mergeable_segments(), - registers_lock.uncommitted.get_mergeable_segments()) + ( + registers_lock.committed.get_mergeable_segments(), + registers_lock.uncommitted.get_mergeable_segments(), + ) } impl SegmentManager { - pub fn from_segments(segment_metas: Vec, - delete_cursor: DeleteCursor) - -> SegmentManager { + pub fn from_segments( + segment_metas: Vec, + delete_cursor: DeleteCursor, + ) -> SegmentManager { SegmentManager { registers: RwLock::new(SegmentRegisters { - uncommitted: SegmentRegister::default(), - committed: SegmentRegister::new(segment_metas, - delete_cursor), - writing: HashSet::new(), - }), + uncommitted: SegmentRegister::default(), + committed: SegmentRegister::new(segment_metas, delete_cursor), + writing: HashSet::new(), + }), } } @@ -94,25 +99,24 @@ impl SegmentManager { pub fn segment_entry(&self, segment_id: &SegmentId) -> Option { let registers = self.read(); - registers - .committed - .segment_entry(segment_id) - .or_else(|| registers.uncommitted.segment_entry(segment_id)) + registers.committed.segment_entry(segment_id).or_else(|| { + registers.uncommitted.segment_entry(segment_id) + }) } // Lock poisoning should never happen : // The lock is acquired and released within this class, // and the operations cannot panic. fn read(&self) -> RwLockReadGuard { - self.registers - .read() - .expect("Failed to acquire read lock on SegmentManager.") + self.registers.read().expect( + "Failed to acquire read lock on SegmentManager.", + ) } fn write(&self) -> RwLockWriteGuard { - self.registers - .write() - .expect("Failed to acquire write lock on SegmentManager.") + self.registers.write().expect( + "Failed to acquire write lock on SegmentManager.", + ) } pub fn commit(&self, segment_entries: Vec) { @@ -140,9 +144,11 @@ impl SegmentManager { } - pub fn cancel_merge(&self, - before_merge_segment_ids: &[SegmentId], - after_merge_segment_id: SegmentId) { + pub fn cancel_merge( + &self, + before_merge_segment_ids: &[SegmentId], + after_merge_segment_id: SegmentId, + ) { let mut registers_lock = self.write(); @@ -150,13 +156,15 @@ impl SegmentManager { { let target_segment_register: &mut SegmentRegister; target_segment_register = { - if registers_lock - .uncommitted - .contains_all(before_merge_segment_ids) { + if registers_lock.uncommitted.contains_all( + before_merge_segment_ids, + ) + { &mut registers_lock.uncommitted - } else if registers_lock - .committed - .contains_all(before_merge_segment_ids) { + } else if registers_lock.committed.contains_all( + before_merge_segment_ids, + ) + { &mut registers_lock.committed } else { warn!("couldn't find segment in SegmentManager"); @@ -185,23 +193,26 @@ impl SegmentManager { registers_lock.uncommitted.add_segment_entry(segment_entry); } - pub fn end_merge(&self, - before_merge_segment_ids: &[SegmentId], - after_merge_segment_entry: SegmentEntry) { + pub fn end_merge( + &self, + before_merge_segment_ids: &[SegmentId], + after_merge_segment_entry: SegmentEntry, + ) { let mut registers_lock = self.write(); - registers_lock - .writing - .remove(&after_merge_segment_entry.segment_id()); + registers_lock.writing.remove(&after_merge_segment_entry + .segment_id()); let target_register: &mut SegmentRegister = { - if registers_lock - .uncommitted - .contains_all(before_merge_segment_ids) { + if registers_lock.uncommitted.contains_all( + before_merge_segment_ids, + ) + { &mut registers_lock.uncommitted - } else if registers_lock - .committed - .contains_all(before_merge_segment_ids) { + } else if registers_lock.committed.contains_all( + before_merge_segment_ids, + ) + { &mut registers_lock.committed } else { warn!("couldn't find segment in SegmentManager"); diff --git a/src/indexer/segment_register.rs b/src/indexer/segment_register.rs index af7e778d1..97be73c85 100644 --- a/src/indexer/segment_register.rs +++ b/src/indexer/segment_register.rs @@ -24,7 +24,12 @@ impl Debug for SegmentRegister { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { try!(write!(f, "SegmentRegister(")); for (k, v) in &self.segment_states { - try!(write!(f, "{}:{}, ", k.short_uuid_string(), v.state().letter_code())); + try!(write!( + f, + "{}:{}, ", + k.short_uuid_string(), + v.state().letter_code() + )); } try!(write!(f, ")")); Ok(()) @@ -74,9 +79,9 @@ impl SegmentRegister { } pub fn contains_all(&mut self, segment_ids: &[SegmentId]) -> bool { - segment_ids - .iter() - .all(|segment_id| self.segment_states.contains_key(segment_id)) + segment_ids.iter().all(|segment_id| { + self.segment_states.contains_key(segment_id) + }) } pub fn add_segment_entry(&mut self, segment_entry: SegmentEntry) { @@ -91,14 +96,18 @@ impl SegmentRegister { pub fn cancel_merge(&mut self, segment_id: &SegmentId) { self.segment_states .get_mut(segment_id) - .expect("Received a merge notification for a segment that is not registered") + .expect( + "Received a merge notification for a segment that is not registered", + ) .cancel_merge(); } pub fn start_merge(&mut self, segment_id: &SegmentId) { self.segment_states .get_mut(segment_id) - .expect("Received a merge notification for a segment that is not registered") + .expect( + "Received a merge notification for a segment that is not registered", + ) .start_merge(); } @@ -144,34 +153,42 @@ mod tests { let segment_entry = SegmentEntry::new(segment_meta, delete_queue.cursor(), None); segment_register.add_segment_entry(segment_entry); } - assert_eq!(segment_register - .segment_entry(&segment_id_a) - .unwrap() - .state(), - SegmentState::Ready); + assert_eq!( + segment_register + .segment_entry(&segment_id_a) + .unwrap() + .state(), + SegmentState::Ready + ); assert_eq!(segment_ids(&segment_register), vec![segment_id_a]); { let segment_meta = SegmentMeta::new(segment_id_b); let segment_entry = SegmentEntry::new(segment_meta, delete_queue.cursor(), None); segment_register.add_segment_entry(segment_entry); } - assert_eq!(segment_register - .segment_entry(&segment_id_b) - .unwrap() - .state(), - SegmentState::Ready); + assert_eq!( + segment_register + .segment_entry(&segment_id_b) + .unwrap() + .state(), + SegmentState::Ready + ); segment_register.start_merge(&segment_id_a); segment_register.start_merge(&segment_id_b); - assert_eq!(segment_register - .segment_entry(&segment_id_a) - .unwrap() - .state(), - SegmentState::InMerge); - assert_eq!(segment_register - .segment_entry(&segment_id_b) - .unwrap() - .state(), - SegmentState::InMerge); + assert_eq!( + segment_register + .segment_entry(&segment_id_a) + .unwrap() + .state(), + SegmentState::InMerge + ); + assert_eq!( + segment_register + .segment_entry(&segment_id_b) + .unwrap() + .state(), + SegmentState::InMerge + ); segment_register.remove_segment(&segment_id_a); segment_register.remove_segment(&segment_id_b); { diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index 719c98c14..c2aa4bcae 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -28,11 +28,11 @@ impl SegmentSerializer { let postings_serializer = try!(InvertedIndexSerializer::open(segment)); Ok(SegmentSerializer { - postings_serializer: postings_serializer, - store_writer: StoreWriter::new(store_write), - fast_field_serializer: fast_field_serializer, - fieldnorms_serializer: fieldnorms_serializer, - }) + postings_serializer: postings_serializer, + store_writer: StoreWriter::new(store_write), + fast_field_serializer: fast_field_serializer, + fieldnorms_serializer: fieldnorms_serializer, + }) } /// Accessor to the `PostingsSerializer`. diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index feeb33d03..db7add226 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -61,11 +61,12 @@ pub fn save_new_metas(schema: Schema, opstamp: u64, directory: &mut Directory) - /// and flushed. /// /// This method is not part of tantivy's public API -pub fn save_metas(segment_metas: Vec, - schema: Schema, - opstamp: u64, - directory: &mut Directory) - -> Result<()> { +pub fn save_metas( + segment_metas: Vec, + schema: Schema, + opstamp: u64, + directory: &mut Directory, +) -> Result<()> { let metas = IndexMeta { segments: segment_metas, schema: schema, @@ -89,11 +90,12 @@ pub struct SegmentUpdater(Arc); -fn perform_merge(segment_ids: &[SegmentId], - segment_updater: &SegmentUpdater, - mut merged_segment: Segment, - target_opstamp: u64) - -> Result { +fn perform_merge( + segment_ids: &[SegmentId], + segment_updater: &SegmentUpdater, + mut merged_segment: Segment, + target_opstamp: u64, +) -> Result { // first we need to apply deletes to our segment. info!("Start merge: {:?}", segment_ids); @@ -105,17 +107,21 @@ fn perform_merge(segment_ids: &[SegmentId], for segment_id in segment_ids { if let Some(mut segment_entry) = - segment_updater.0.segment_manager.segment_entry(segment_id) { + segment_updater.0.segment_manager.segment_entry(segment_id) + { let segment = index.segment(segment_entry.meta().clone()); if let Some(file_protection) = - advance_deletes(segment, &mut segment_entry, target_opstamp)? { + advance_deletes(segment, &mut segment_entry, target_opstamp)? + { file_protections.push(file_protection); } segment_entries.push(segment_entry); } else { error!("Error, had to abort merge as some of the segment is not managed anymore."); - let msg = format!("Segment {:?} requested for merge is not managed.", - segment_id); + let msg = format!( + "Segment {:?} requested for merge is not managed.", + segment_id + ); bail!(ErrorKind::InvalidArgument(msg)); } } @@ -134,12 +140,13 @@ fn perform_merge(segment_ids: &[SegmentId], // ... we just serialize this index merger in our new segment // to merge the two segments. - let segment_serializer = SegmentSerializer::for_segment(&mut merged_segment) - .expect("Creating index serializer failed"); + let segment_serializer = SegmentSerializer::for_segment(&mut merged_segment).expect( + "Creating index serializer failed", + ); - let num_docs = merger - .write(segment_serializer) - .expect("Serializing merged index failed"); + let num_docs = merger.write(segment_serializer).expect( + "Serializing merged index failed", + ); let mut segment_meta = SegmentMeta::new(merged_segment.id()); segment_meta.set_max_doc(num_docs); @@ -161,23 +168,24 @@ struct InnerSegmentUpdater { } impl SegmentUpdater { - pub fn new(index: Index, - stamper: Stamper, - delete_cursor: DeleteCursor) - -> Result { + pub fn new( + index: Index, + stamper: Stamper, + delete_cursor: DeleteCursor, + ) -> Result { let segments = index.searchable_segment_metas()?; let segment_manager = SegmentManager::from_segments(segments, delete_cursor); Ok(SegmentUpdater(Arc::new(InnerSegmentUpdater { - pool: CpuPool::new(1), - index: index, - segment_manager: segment_manager, - merge_policy: RwLock::new(box DefaultMergePolicy::default()), - merging_thread_id: AtomicUsize::default(), - merging_threads: RwLock::new(HashMap::new()), - generation: AtomicUsize::default(), - killed: AtomicBool::new(false), - stamper: stamper, - }))) + pool: CpuPool::new(1), + index: index, + segment_manager: segment_manager, + merge_policy: RwLock::new(box DefaultMergePolicy::default()), + merging_thread_id: AtomicUsize::default(), + merging_threads: RwLock::new(HashMap::new()), + generation: AtomicUsize::default(), + killed: AtomicBool::new(false), + stamper: stamper, + }))) } pub fn new_segment(&self) -> Segment { @@ -199,10 +207,10 @@ impl SegmentUpdater { self.0.merging_thread_id.fetch_add(1, Ordering::SeqCst) } - fn run_async T> - (&self, - f: F) - -> CpuFuture { + fn run_async T>( + &self, + f: F, + ) -> CpuFuture { let me_clone = self.clone(); self.0.pool.spawn_fn(move || Ok(f(me_clone))) } @@ -211,11 +219,10 @@ impl SegmentUpdater { pub fn add_segment(&self, generation: usize, segment_entry: SegmentEntry) -> bool { if generation >= self.0.generation.load(Ordering::Acquire) { self.run_async(|segment_updater| { - segment_updater.0.segment_manager.add_segment(segment_entry); - segment_updater.consider_merge_options(); - true - }) - .forget(); + segment_updater.0.segment_manager.add_segment(segment_entry); + segment_updater.consider_merge_options(); + true + }).forget(); true } else { false @@ -249,46 +256,46 @@ impl SegmentUpdater { if self.is_alive() { let index = &self.0.index; let directory = index.directory(); - save_metas(self.0.segment_manager.committed_segment_metas(), - index.schema(), - opstamp, - directory.box_clone().borrow_mut()) - .expect("Could not save metas."); + save_metas( + self.0.segment_manager.committed_segment_metas(), + index.schema(), + opstamp, + directory.box_clone().borrow_mut(), + ).expect("Could not save metas."); } } pub fn garbage_collect_files(&self) -> Result<()> { self.run_async(move |segment_updater| { segment_updater.garbage_collect_files_exec(); - }) - .wait() + }).wait() } fn garbage_collect_files_exec(&self) { info!("Running garbage collection"); let mut index = self.0.index.clone(); - index.directory_mut().garbage_collect(|| { - self.0.segment_manager.list_files() - }); + index.directory_mut().garbage_collect( + || self.0.segment_manager.list_files(), + ); } pub fn commit(&self, opstamp: u64) -> Result<()> { self.run_async(move |segment_updater| if segment_updater.is_alive() { - let segment_entries = segment_updater - .purge_deletes(opstamp) - .expect("Failed purge deletes"); - segment_updater.0.segment_manager.commit(segment_entries); - segment_updater.save_metas(opstamp); - segment_updater.garbage_collect_files_exec(); - segment_updater.consider_merge_options(); - }) - .wait() + let segment_entries = segment_updater.purge_deletes(opstamp).expect( + "Failed purge deletes", + ); + segment_updater.0.segment_manager.commit(segment_entries); + segment_updater.save_metas(opstamp); + segment_updater.garbage_collect_files_exec(); + segment_updater.consider_merge_options(); + }).wait() } - pub fn start_merge(&self, - segment_ids: &[SegmentId]) - -> impl Future { + pub fn start_merge( + &self, + segment_ids: &[SegmentId], + ) -> impl Future { self.0.segment_manager.start_merge(segment_ids); let segment_updater_clone = self.clone(); @@ -308,10 +315,12 @@ impl SegmentUpdater { // first we need to apply deletes to our segment. let merged_segment = segment_updater_clone.new_segment(); let merged_segment_id = merged_segment.id(); - let merge_result = perform_merge(&segment_ids_vec, - &segment_updater_clone, - merged_segment, - target_opstamp); + let merge_result = perform_merge( + &segment_ids_vec, + &segment_updater_clone, + merged_segment, + target_opstamp, + ); match merge_result { Ok(after_merge_segment_entry) => { @@ -345,11 +354,10 @@ impl SegmentUpdater { .remove(&merging_thread_id); Ok(()) }); - self.0 - .merging_threads - .write() - .unwrap() - .insert(merging_thread_id, merging_join_handle); + self.0.merging_threads.write().unwrap().insert( + merging_thread_id, + merging_join_handle, + ); merging_future_recv } @@ -368,19 +376,23 @@ impl SegmentUpdater { } } - fn cancel_merge(&self, - before_merge_segment_ids: &[SegmentId], - after_merge_segment_entry: SegmentId) { - self.0 - .segment_manager - .cancel_merge(before_merge_segment_ids, after_merge_segment_entry); + fn cancel_merge( + &self, + before_merge_segment_ids: &[SegmentId], + after_merge_segment_entry: SegmentId, + ) { + self.0.segment_manager.cancel_merge( + before_merge_segment_ids, + after_merge_segment_entry, + ); } - fn end_merge(&self, - before_merge_segment_ids: Vec, - mut after_merge_segment_entry: SegmentEntry) - -> Result<()> { + fn end_merge( + &self, + before_merge_segment_ids: Vec, + mut after_merge_segment_entry: SegmentEntry, + ) -> Result<()> { self.run_async(move |segment_updater| { info!("End merge {:?}", after_merge_segment_entry.meta()); @@ -391,28 +403,37 @@ impl SegmentUpdater { if delete_operation.opstamp < committed_opstamp { let index = &segment_updater.0.index; let segment = index.segment(after_merge_segment_entry.meta().clone()); - match advance_deletes(segment, - &mut after_merge_segment_entry, - committed_opstamp) { + match advance_deletes( + segment, + &mut after_merge_segment_entry, + committed_opstamp, + ) { Ok(file_protection_opt_res) => { _file_protection_opt = file_protection_opt_res; } Err(e) => { - error!("Merge of {:?} was cancelled (advancing deletes failed): {:?}", - before_merge_segment_ids, e); + error!( + "Merge of {:?} was cancelled (advancing deletes failed): {:?}", + before_merge_segment_ids, + e + ); // ... cancel merge if cfg!(test) { panic!("Merge failed."); } - segment_updater.cancel_merge(&before_merge_segment_ids, - after_merge_segment_entry.segment_id()); + segment_updater.cancel_merge( + &before_merge_segment_ids, + after_merge_segment_entry.segment_id(), + ); return; } } } } - segment_updater.0.segment_manager.end_merge(&before_merge_segment_ids, - after_merge_segment_entry); + segment_updater.0.segment_manager.end_merge( + &before_merge_segment_ids, + after_merge_segment_entry, + ); segment_updater.consider_merge_options(); info!("save metas"); segment_updater.save_metas(segment_updater.0.index.opstamp()); @@ -450,10 +471,9 @@ impl SegmentUpdater { } debug!("wait merging thread {}", new_merging_threads.len()); for (_, merging_thread_handle) in new_merging_threads { - merging_thread_handle - .join() - .map(|_| ()) - .map_err(|_| ErrorKind::ErrorInThread("Merging thread failed.".into()))?; + merging_thread_handle.join().map(|_| ()).map_err(|_| { + ErrorKind::ErrorInThread("Merging thread failed.".into()) + })?; } // Our merging thread may have queued their completed self.run_async(move |_| {}).wait()?; @@ -522,9 +542,9 @@ mod tests { assert_eq!(index.searcher().num_docs(), 302); { - index_writer - .wait_merging_threads() - .expect("waiting for merging threads"); + index_writer.wait_merging_threads().expect( + "waiting for merging threads", + ); } index.load_searchers().unwrap(); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index bbafb37ef..93c5ee5ee 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -54,22 +54,23 @@ impl<'a> SegmentWriter<'a> { /// the flushing behavior as a buffer limit /// - segment: The segment being written /// - schema - pub fn for_segment(heap: &'a Heap, - table_bits: usize, - mut segment: Segment, - schema: &Schema) - -> Result> { + pub fn for_segment( + heap: &'a Heap, + table_bits: usize, + mut segment: Segment, + schema: &Schema, + ) -> Result> { let segment_serializer = SegmentSerializer::for_segment(&mut segment)?; let multifield_postings = MultiFieldPostingsWriter::new(schema, table_bits, heap); Ok(SegmentWriter { - heap: heap, - max_doc: 0, - multifield_postings: multifield_postings, - fieldnorms_writer: create_fieldnorms_writer(schema), - segment_serializer: segment_serializer, - fast_field_writers: FastFieldsWriter::from_schema(schema), - doc_opstamps: Vec::with_capacity(1_000), - }) + heap: heap, + max_doc: 0, + multifield_postings: multifield_postings, + fieldnorms_writer: create_fieldnorms_writer(schema), + segment_serializer: segment_serializer, + fast_field_writers: FastFieldsWriter::from_schema(schema), + doc_opstamps: Vec::with_capacity(1_000), + }) } /// Lay on disk the current content of the `SegmentWriter` @@ -77,10 +78,12 @@ impl<'a> SegmentWriter<'a> { /// Finalize consumes the `SegmentWriter`, so that it cannot /// be used afterwards. pub fn finalize(self) -> Result> { - write(&self.multifield_postings, - &self.fast_field_writers, - &self.fieldnorms_writer, - self.segment_serializer)?; + write( + &self.multifield_postings, + &self.fast_field_writers, + &self.fieldnorms_writer, + self.segment_serializer, + )?; Ok(self.doc_opstamps) } @@ -107,10 +110,11 @@ impl<'a> SegmentWriter<'a> { /// Indexes a new document /// /// As a user, you should rather use `IndexWriter`'s add_document. - pub fn add_document(&mut self, - add_operation: &AddOperation, - schema: &Schema) - -> io::Result<()> { + pub fn add_document( + &mut self, + add_operation: &AddOperation, + schema: &Schema, + ) -> io::Result<()> { let doc_id = self.max_doc; let doc = &add_operation.document; self.doc_opstamps.push(add_operation.opstamp); @@ -122,8 +126,11 @@ impl<'a> SegmentWriter<'a> { match *field_options.field_type() { FieldType::Str(ref text_options) => { let num_tokens: u32 = if text_options.get_indexing_options().is_tokenized() { - self.multifield_postings - .index_text(doc_id, field, &field_values) + self.multifield_postings.index_text( + doc_id, + field, + &field_values, + ) } else { let num_field_values = field_values.len() as u32; for field_value in field_values { @@ -132,15 +139,17 @@ impl<'a> SegmentWriter<'a> { } num_field_values }; - self.fieldnorms_writer - .get_field_writer(field) - .map(|field_norms_writer| field_norms_writer.add_val(num_tokens as u64)); + self.fieldnorms_writer.get_field_writer(field).map( + |field_norms_writer| field_norms_writer.add_val(num_tokens as u64), + ); } FieldType::U64(ref int_option) => { if int_option.is_indexed() { for field_value in field_values { - let term = Term::from_field_u64(field_value.field(), - field_value.value().u64_value()); + let term = Term::from_field_u64( + field_value.field(), + field_value.value().u64_value(), + ); self.multifield_postings.suscribe(doc_id, &term); } } @@ -148,8 +157,10 @@ impl<'a> SegmentWriter<'a> { FieldType::I64(ref int_option) => { if int_option.is_indexed() { for field_value in field_values { - let term = Term::from_field_i64(field_value.field(), - field_value.value().i64_value()); + let term = Term::from_field_i64( + field_value.field(), + field_value.value().i64_value(), + ); self.multifield_postings.suscribe(doc_id, &term); } } @@ -160,7 +171,9 @@ impl<'a> SegmentWriter<'a> { self.fast_field_writers.add_document(doc); let stored_fieldvalues: Vec<&FieldValue> = doc.field_values() .iter() - .filter(|field_value| schema.get_field_entry(field_value.field()).is_stored()) + .filter(|field_value| { + schema.get_field_entry(field_value.field()).is_stored() + }) .collect(); let doc_writer = self.segment_serializer.get_store_writer(); try!(doc_writer.store(&stored_fieldvalues)); @@ -191,15 +204,22 @@ impl<'a> SegmentWriter<'a> { } // This method is used as a trick to workaround the borrow checker -fn write(multifield_postings: &MultiFieldPostingsWriter, - fast_field_writers: &FastFieldsWriter, - fieldnorms_writer: &FastFieldsWriter, - mut serializer: SegmentSerializer) - -> Result<()> { +fn write( + multifield_postings: &MultiFieldPostingsWriter, + fast_field_writers: &FastFieldsWriter, + fieldnorms_writer: &FastFieldsWriter, + mut serializer: SegmentSerializer, +) -> Result<()> { - try!(multifield_postings.serialize(serializer.get_postings_serializer())); - try!(fast_field_writers.serialize(serializer.get_fast_field_serializer())); - try!(fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer())); + try!(multifield_postings.serialize( + serializer.get_postings_serializer(), + )); + try!(fast_field_writers.serialize( + serializer.get_fast_field_serializer(), + )); + try!(fieldnorms_writer.serialize( + serializer.get_fieldnorms_serializer(), + )); try!(serializer.close()); Ok(()) @@ -208,10 +228,12 @@ fn write(multifield_postings: &MultiFieldPostingsWriter, impl<'a> SerializableSegment for SegmentWriter<'a> { fn write(&self, serializer: SegmentSerializer) -> Result { let max_doc = self.max_doc; - write(&self.multifield_postings, - &self.fast_field_writers, - &self.fieldnorms_writer, - serializer)?; + write( + &self.multifield_postings, + &self.fast_field_writers, + &self.fieldnorms_writer, + serializer, + )?; Ok(max_doc) } } diff --git a/src/lib.rs b/src/lib.rs index 0b26c6197..2279ca687 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,7 +68,7 @@ extern crate stable_deref_trait; #[cfg(test)] extern crate env_logger; -#[cfg(feature="simdcompression")] +#[cfg(feature = "simdcompression")] extern crate libc; #[cfg(windows)] @@ -391,16 +391,24 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); let reader = searcher.segment_reader(0); - let inverted_index = reader.inverted_index(text_field).unwrap(); - assert!(inverted_index.read_postings(&term_abcd, FreqAndPositions).is_none()); + let inverted_index = reader.inverted_index(text_field); + assert!( + inverted_index + .read_postings(&term_abcd, FreqAndPositions) + .is_none() + ); { - let mut postings = inverted_index.read_postings(&term_a, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_a, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 5); assert!(!postings.advance()); } { - let mut postings = inverted_index.read_postings(&term_b, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_b, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); assert!(postings.advance()); @@ -426,17 +434,25 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); let reader = searcher.segment_reader(0); - let inverted_index = reader.inverted_index(term_abcd.field()).unwrap(); + let inverted_index = reader.inverted_index(term_abcd.field()); - assert!(inverted_index.read_postings(&term_abcd, FreqAndPositions).is_none()); + assert!( + inverted_index + .read_postings(&term_abcd, FreqAndPositions) + .is_none() + ); { - let mut postings = inverted_index.read_postings(&term_a, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_a, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 5); assert!(!postings.advance()); } { - let mut postings = inverted_index.read_postings(&term_b, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_b, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); assert!(postings.advance()); @@ -462,14 +478,22 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); let reader = searcher.segment_reader(0); - let inverted_index = reader.inverted_index(term_abcd.field()).unwrap(); - assert!(inverted_index.read_postings(&term_abcd, FreqAndPositions).is_none()); + let inverted_index = reader.inverted_index(term_abcd.field()); + assert!( + inverted_index + .read_postings(&term_abcd, FreqAndPositions) + .is_none() + ); { - let mut postings = inverted_index.read_postings(&term_a, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_a, FreqAndPositions) + .unwrap(); assert!(!postings.advance()); } { - let mut postings = inverted_index.read_postings(&term_b, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_b, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 3); assert!(postings.advance()); @@ -477,7 +501,9 @@ mod tests { assert!(!postings.advance()); } { - let mut postings = inverted_index.read_postings(&term_c, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_c, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 4); assert!(!postings.advance()); @@ -501,7 +527,7 @@ mod tests { let term = Term::from_field_u64(field, 1u64); let mut postings = searcher .segment_reader(0) - .inverted_index(term.field()).unwrap() + .inverted_index(term.field()) .read_postings(&term, SegmentPostingsOption::NoFreq) .unwrap(); assert!(postings.advance()); @@ -525,7 +551,7 @@ mod tests { let term = Term::from_field_i64(value_field, negative_val); let mut postings = searcher .segment_reader(0) - .inverted_index(term.field()).unwrap() + .inverted_index(term.field()) .read_postings(&term, SegmentPostingsOption::NoFreq) .unwrap(); assert!(postings.advance()); @@ -588,11 +614,17 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); let reader = searcher.segment_reader(0); - let inverted_index = reader.inverted_index(text_field).unwrap(); + let inverted_index = reader.inverted_index(text_field); let term_abcd = Term::from_field_text(text_field, "abcd"); - assert!(inverted_index.read_postings(&term_abcd, FreqAndPositions).is_none()); + assert!( + inverted_index + .read_postings(&term_abcd, FreqAndPositions) + .is_none() + ); let term_af = Term::from_field_text(text_field, "af"); - let mut postings = inverted_index.read_postings(&term_af, FreqAndPositions).unwrap(); + let mut postings = inverted_index + .read_postings(&term_af, FreqAndPositions) + .unwrap(); assert!(postings.advance()); assert_eq!(postings.doc(), 0); assert_eq!(postings.term_freq(), 3); @@ -634,29 +666,43 @@ mod tests { collector.docs() }; { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "a")]), - vec![1, 2]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "a")]), + vec![1, 2] + ); } { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "af")]), - vec![0]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "af")]), + vec![0] + ); } { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "b")]), - vec![0, 1, 2]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "b")]), + vec![0, 1, 2] + ); } { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "c")]), - vec![1, 2]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "c")]), + vec![1, 2] + ); } { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "d")]), - vec![2]); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "d")]), + vec![2] + ); } { - assert_eq!(get_doc_ids(vec![Term::from_field_text(text_field, "b"), - Term::from_field_text(text_field, "a")]), - vec![0, 1, 2]); + assert_eq!( + get_doc_ids(vec![ + Term::from_field_text(text_field, "b"), + Term::from_field_text(text_field, "a"), + ]), + vec![0, 1, 2] + ); } } } @@ -693,7 +739,8 @@ mod tests { let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); let other_text_field = schema_builder.add_text_field("text2", TEXT); - let document = doc!(text_field => "tantivy", + let document = + doc!(text_field => "tantivy", text_field => "some other value", other_text_field => "short"); assert_eq!(document.len(), 3); diff --git a/src/postings/docset.rs b/src/postings/docset.rs index 4b1ea3c7a..8aa665f53 100644 --- a/src/postings/docset.rs +++ b/src/postings/docset.rs @@ -72,8 +72,7 @@ pub trait DocSet { for (i, buffer_val) in buffer.iter_mut().enumerate() { if self.advance() { *buffer_val = self.doc(); - } - else { + } else { return i; } } diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 9b725cd86..d672077b2 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -65,7 +65,9 @@ mod tests { field_serializer.new_term("abc".as_bytes()).unwrap(); for doc_id in 0u32..120u32 { let delta_positions = vec![1, 2, 3, 2]; - field_serializer.write_doc(doc_id, 2, &delta_positions).unwrap(); + field_serializer + .write_doc(doc_id, 2, &delta_positions) + .unwrap(); } field_serializer.close_term().unwrap(); } @@ -84,8 +86,8 @@ mod tests { let heap = Heap::with_capacity(10_000_000); { - let mut segment_writer = SegmentWriter::for_segment(&heap, 18, segment.clone(), &schema) - .unwrap(); + let mut segment_writer = + SegmentWriter::for_segment(&heap, 18, segment.clone(), &schema).unwrap(); { let mut doc = Document::default(); // checking that position works if the field has two values @@ -131,15 +133,17 @@ mod tests { } { let term_a = Term::from_field_text(text_field, "abcdef"); - assert!(segment_reader - .inverted_index(term_a.field()).unwrap() + assert!( + segment_reader + .inverted_index(term_a.field()) .read_postings(&term_a, FreqAndPositions) - .is_none()); + .is_none() + ); } { let term_a = Term::from_field_text(text_field, "a"); let mut postings_a = segment_reader - .inverted_index(term_a.field()).unwrap() + .inverted_index(term_a.field()) .read_postings(&term_a, FreqAndPositions) .unwrap(); assert_eq!(postings_a.len(), 1000); @@ -162,7 +166,7 @@ mod tests { { let term_e = Term::from_field_text(text_field, "e"); let mut postings_e = segment_reader - .inverted_index(term_e.field()).unwrap() + .inverted_index(term_e.field()) .read_postings(&term_e, FreqAndPositions) .unwrap(); assert_eq!(postings_e.len(), 1000 - 2); @@ -202,8 +206,10 @@ mod tests { assert!(index_writer.commit().is_ok()); } index.load_searchers().unwrap(); - let term_query = TermQuery::new(Term::from_field_text(text_field, "a"), - SegmentPostingsOption::NoFreq); + let term_query = TermQuery::new( + Term::from_field_text(text_field, "a"), + SegmentPostingsOption::NoFreq, + ); let searcher = index.searcher(); let mut term_weight = term_query.specialized_weight(&*searcher); term_weight.segment_postings_options = SegmentPostingsOption::FreqAndPositions; @@ -250,7 +256,7 @@ mod tests { for i in 0..num_docs - 1 { for j in i + 1..num_docs { let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); @@ -264,7 +270,7 @@ mod tests { { let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); @@ -285,7 +291,7 @@ mod tests { // check that filtering works { let mut segment_postings = segment_reader - .inverted_index(term_0.field()).unwrap() + .inverted_index(term_0.field()) .read_postings(&term_0, SegmentPostingsOption::NoFreq) .unwrap(); @@ -295,7 +301,7 @@ mod tests { } let mut segment_postings = segment_reader - .inverted_index(term_0.field()).unwrap() + .inverted_index(term_0.field()) .read_postings(&term_0, SegmentPostingsOption::NoFreq) .unwrap(); @@ -320,7 +326,7 @@ mod tests { // make sure seeking still works for i in 0..num_docs { let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); @@ -336,7 +342,7 @@ mod tests { // now try with a longer sequence { let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); @@ -372,14 +378,14 @@ mod tests { // finally, check that it's empty { let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); assert_eq!(segment_postings.skip_next(0), SkipResult::End); let mut segment_postings = segment_reader - .inverted_index(term_2.field()).unwrap() + .inverted_index(term_2.field()) .read_postings(&term_2, SegmentPostingsOption::NoFreq) .unwrap(); @@ -446,12 +452,12 @@ mod tests { let segment_reader = searcher.segment_reader(0); b.iter(|| { - let mut segment_postings = segment_reader - .inverted_index(TERM_A.field()).unwrap() - .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) - .unwrap(); - while segment_postings.advance() {} - }); + let mut segment_postings = segment_reader + .inverted_index(TERM_A.field()) + .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) + .unwrap(); + while segment_postings.advance() {} + }); } #[bench] @@ -460,25 +466,27 @@ mod tests { let segment_reader = searcher.segment_reader(0); b.iter(|| { let segment_postings_a = segment_reader - .inverted_index(TERM_A.field()).unwrap() + .inverted_index(TERM_A.field()) .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) .unwrap(); let segment_postings_b = segment_reader - .inverted_index(TERM_B.field()).unwrap() + .inverted_index(TERM_B.field()) .read_postings(&*TERM_B, SegmentPostingsOption::NoFreq) .unwrap(); let segment_postings_c = segment_reader - .inverted_index(TERM_C.field()).unwrap() + .inverted_index(TERM_C.field()) .read_postings(&*TERM_C, SegmentPostingsOption::NoFreq) .unwrap(); let segment_postings_d = segment_reader - .inverted_index(TERM_D.field()).unwrap() + .inverted_index(TERM_D.field()) .read_postings(&*TERM_D, SegmentPostingsOption::NoFreq) .unwrap(); - let mut intersection = IntersectionDocSet::from(vec![segment_postings_a, - segment_postings_b, - segment_postings_c, - segment_postings_d]); + let mut intersection = IntersectionDocSet::from(vec![ + segment_postings_a, + segment_postings_b, + segment_postings_c, + segment_postings_d, + ]); while intersection.advance() {} }); } @@ -489,7 +497,7 @@ mod tests { let docs = tests::sample(segment_reader.num_docs(), p); let mut segment_postings = segment_reader - .inverted_index(TERM_A.field()).unwrap() + .inverted_index(TERM_A.field()) .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) .unwrap(); @@ -506,7 +514,7 @@ mod tests { b.iter(|| { let mut segment_postings = segment_reader - .inverted_index(TERM_A.field()).unwrap() + .inverted_index(TERM_A.field()) .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) .unwrap(); for doc in &existing_docs { @@ -544,7 +552,7 @@ mod tests { b.iter(|| { let n: u32 = test::black_box(17); let mut segment_postings = segment_reader - .inverted_index(TERM_A.field()).unwrap() + .inverted_index(TERM_A.field()) .read_postings(&*TERM_A, SegmentPostingsOption::NoFreq) .unwrap(); let mut s = 0u32; diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 4e1f770c7..1b62942c5 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -16,9 +16,10 @@ use schema::FieldEntry; use schema::FieldType; use schema::TextIndexingOptions; -fn posting_from_field_entry<'a>(field_entry: &FieldEntry, - heap: &'a Heap) - -> Box { +fn posting_from_field_entry<'a>( + field_entry: &FieldEntry, + heap: &'a Heap, +) -> Box { match *field_entry.field_type() { FieldType::Str(ref text_options) => { match text_options.get_indexing_options() { @@ -51,9 +52,7 @@ impl<'a> MultiFieldPostingsWriter<'a> { let per_field_postings_writers: Vec<_> = schema .fields() .iter() - .map(|field_entry| { - posting_from_field_entry(field_entry, heap) - }) + .map(|field_entry| posting_from_field_entry(field_entry, heap)) .collect(); MultiFieldPostingsWriter { @@ -102,7 +101,11 @@ impl<'a> MultiFieldPostingsWriter<'a> { let (_, stop) = offsets[i + 1]; let postings_writer = &self.per_field_postings_writers[field.0 as usize]; let mut field_serializer = serializer.new_field(field)?; - postings_writer.serialize(&term_offsets[start..stop], &mut field_serializer, self.heap)?; + postings_writer.serialize( + &term_offsets[start..stop], + &mut field_serializer, + self.heap, + )?; field_serializer.close()?; } Ok(()) @@ -127,29 +130,33 @@ pub trait PostingsWriter { /// * term - the term /// * heap - heap used to store the postings informations as well as the terms /// in the hashmap. - fn suscribe(&mut self, - term_index: &mut HashMap, - doc: DocId, - pos: u32, - term: &Term, - heap: &Heap); + fn suscribe( + &mut self, + term_index: &mut HashMap, + doc: DocId, + pos: u32, + term: &Term, + heap: &Heap, + ); /// Serializes the postings on disk. /// The actual serialization format is handled by the `PostingsSerializer`. - fn serialize(&self, - term_addrs: &[(&[u8], u32)], - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()>; + fn serialize( + &self, + term_addrs: &[(&[u8], u32)], + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()>; /// Tokenize a text and suscribe all of its token. - fn index_text<'a>(&mut self, - term_index: &mut HashMap, - doc_id: DocId, - field: Field, - field_values: &[&'a FieldValue], - heap: &Heap) - -> u32 { + fn index_text<'a>( + &mut self, + term_index: &mut HashMap, + doc_id: DocId, + field: Field, + field_values: &[&'a FieldValue], + heap: &Heap, + ) -> u32 { let mut pos = 0u32; let mut num_tokens: u32 = 0u32; let mut term = unsafe { Term::with_capacity(100) }; @@ -195,12 +202,14 @@ impl<'a, Rec: Recorder + 'static> SpecializedPostingsWriter<'a, Rec> { } impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'a, Rec> { - fn suscribe(&mut self, - term_index: &mut HashMap, - doc: DocId, - position: u32, - term: &Term, - heap: &Heap) { + fn suscribe( + &mut self, + term_index: &mut HashMap, + doc: DocId, + position: u32, + term: &Term, + heap: &Heap, + ) { debug_assert!(term.as_slice().len() >= 4); let recorder: &mut Rec = term_index.get_or_create(term); let current_doc = recorder.current_doc(); @@ -213,11 +222,12 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<' recorder.record_position(position, heap); } - fn serialize(&self, - term_addrs: &[(&[u8], u32)], - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()> { + fn serialize( + &self, + term_addrs: &[(&[u8], u32)], + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()> { for &(term_bytes, addr) in term_addrs { let recorder: &mut Rec = self.heap.get_mut_ref(addr); serializer.new_term(term_bytes)?; @@ -227,4 +237,3 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<' Ok(()) } } - diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index dde85d66c..07c0c4e19 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -27,11 +27,12 @@ pub trait Recorder: HeapAllocable { /// Close the document. It will help record the term frequency. fn close_doc(&mut self, heap: &Heap); /// Pushes the postings information to the serializer. - fn serialize(&self, - self_addr: u32, - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()>; + fn serialize( + &self, + self_addr: u32, + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()>; } /// Only records the doc ids @@ -64,11 +65,12 @@ impl Recorder for NothingRecorder { fn close_doc(&mut self, _heap: &Heap) {} - fn serialize(&self, - self_addr: u32, - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()> { + fn serialize( + &self, + self_addr: u32, + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()> { for doc in self.stack.iter(self_addr, heap) { serializer.write_doc(doc, 0u32, &EMPTY_ARRAY)?; } @@ -116,21 +118,23 @@ impl Recorder for TermFrequencyRecorder { } - fn serialize(&self, - self_addr: u32, - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()> { + fn serialize( + &self, + self_addr: u32, + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()> { // the last document has not been closed... // its term freq is self.current_tf. - let mut doc_iter = self.stack - .iter(self_addr, heap) - .chain(Some(self.current_tf).into_iter()); + let mut doc_iter = self.stack.iter(self_addr, heap).chain( + Some(self.current_tf) + .into_iter(), + ); while let Some(doc) = doc_iter.next() { - let term_freq = doc_iter - .next() - .expect("The IndexWriter recorded a doc without a term freq."); + let term_freq = doc_iter.next().expect( + "The IndexWriter recorded a doc without a term freq.", + ); serializer.write_doc(doc, term_freq, &EMPTY_ARRAY)?; } Ok(()) @@ -171,11 +175,12 @@ impl Recorder for TFAndPositionRecorder { self.stack.push(POSITION_END, heap); } - fn serialize(&self, - self_addr: u32, - serializer: &mut FieldSerializer, - heap: &Heap) - -> io::Result<()> { + fn serialize( + &self, + self_addr: u32, + serializer: &mut FieldSerializer, + heap: &Heap, + ) -> io::Result<()> { let mut doc_positions = Vec::with_capacity(100); let mut positions_iter = self.stack.iter(self_addr, heap); while let Some(doc) = positions_iter.next() { @@ -189,7 +194,11 @@ impl Recorder for TFAndPositionRecorder { prev_position = position; } } - serializer.write_doc(doc, doc_positions.len() as u32, &doc_positions)?; + serializer.write_doc( + doc, + doc_positions.len() as u32, + &doc_positions, + )?; } Ok(()) } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index d8d08e40b..cadc85401 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -25,11 +25,10 @@ struct PositionComputer { } impl PositionComputer { - pub fn new(positions_stream: CompressedIntStream) -> PositionComputer { PositionComputer { position_to_skip: None, - positions: vec!(), + positions: vec![], positions_stream: positions_stream, } } @@ -38,9 +37,9 @@ impl PositionComputer { self.position_to_skip = Some( self.position_to_skip .map(|prev_skip| prev_skip + num_skip) - .unwrap_or(0) - ); - } + .unwrap_or(0), + ); + } pub fn positions(&mut self, term_freq: usize) -> &[u32] { if let Some(num_skip) = self.position_to_skip { @@ -83,13 +82,13 @@ impl SegmentPostings { /// * `data` - data array. The complete data is not necessarily used. /// * `freq_handler` - the freq handler is in charge of decoding /// frequencies and/or positions - pub fn from_block_postings(segment_block_postings: BlockSegmentPostings, - delete_bitset: DeleteBitSet, - positions_stream_opt: Option) - -> SegmentPostings { - let position_computer = positions_stream_opt.map(|stream| { - UnsafeCell::new(PositionComputer::new(stream)) - }); + pub fn from_block_postings( + segment_block_postings: BlockSegmentPostings, + delete_bitset: DeleteBitSet, + positions_stream_opt: Option, + ) -> SegmentPostings { + let position_computer = + positions_stream_opt.map(|stream| UnsafeCell::new(PositionComputer::new(stream))); SegmentPostings { block_cursor: segment_block_postings, cur: COMPRESSION_BLOCK_SIZE, // cursor within the block @@ -110,7 +109,7 @@ impl SegmentPostings { } - fn position_add_skipusize>(&self, num_skips_fn: F) { + fn position_add_skip usize>(&self, num_skips_fn: F) { if let Some(ref position_computer) = self.position_computer.as_ref() { let num_skips = num_skips_fn(); unsafe { @@ -135,7 +134,7 @@ impl DocSet for SegmentPostings { return false; } } - self.position_add_skip(|| { self.term_freq() as usize }); + self.position_add_skip(|| self.term_freq() as usize); if !self.delete_bitset.is_deleted(self.doc()) { return true; } @@ -257,8 +256,10 @@ impl DocSet for SegmentPostings { #[inline] fn doc(&self) -> DocId { let docs = self.block_cursor.docs(); - debug_assert!(self.cur < docs.len(), - "Have you forgotten to call `.advance()` at least once before calling .doc()."); + debug_assert!( + self.cur < docs.len(), + "Have you forgotten to call `.advance()` at least once before calling .doc()." + ); docs[self.cur] } } @@ -278,16 +279,11 @@ impl Postings for SegmentPostings { let term_freq = self.term_freq(); self.position_computer .as_ref() - .map(|position_computer| { - unsafe { - (&mut *position_computer.get()).positions(term_freq as usize) - } + .map(|position_computer| unsafe { + (&mut *position_computer.get()).positions(term_freq as usize) }) .unwrap_or(&EMPTY_POSITIONS[..]) } - - - } @@ -311,10 +307,11 @@ pub struct BlockSegmentPostings { } impl BlockSegmentPostings { - pub(crate) fn from_data(doc_freq: usize, - data: SourceRead, - has_freq: bool) - -> BlockSegmentPostings { + pub(crate) fn from_data( + doc_freq: usize, + data: SourceRead, + has_freq: bool, + ) -> BlockSegmentPostings { let num_binpacked_blocks: usize = (doc_freq as usize) / COMPRESSION_BLOCK_SIZE; let num_vint_docs = (doc_freq as usize) - COMPRESSION_BLOCK_SIZE * num_binpacked_blocks; BlockSegmentPostings { @@ -402,15 +399,16 @@ impl BlockSegmentPostings { /// Returns false iff there was no remaining blocks. pub fn advance(&mut self) -> bool { if self.num_binpacked_blocks > 0 { - // TODO could self.doc_offset be just a local variable? - - let num_consumed_bytes = self - .doc_decoder - .uncompress_block_sorted(self.remaining_data.as_ref(), self.doc_offset); + let num_consumed_bytes = self.doc_decoder.uncompress_block_sorted( + self.remaining_data.as_ref(), + self.doc_offset, + ); self.remaining_data.advance(num_consumed_bytes); if self.has_freq { - let num_consumed_bytes = self.freq_decoder.uncompress_block_unsorted(self.remaining_data.as_ref()); + let num_consumed_bytes = self.freq_decoder.uncompress_block_unsorted( + self.remaining_data.as_ref(), + ); self.remaining_data.advance(num_consumed_bytes); } // it will be used as the next offset. @@ -418,15 +416,17 @@ impl BlockSegmentPostings { self.num_binpacked_blocks -= 1; true } else if self.num_vint_docs > 0 { - let num_compressed_bytes = - self.doc_decoder - .uncompress_vint_sorted(self.remaining_data.as_ref(), - self.doc_offset, - self.num_vint_docs); + let num_compressed_bytes = self.doc_decoder.uncompress_vint_sorted( + self.remaining_data.as_ref(), + self.doc_offset, + self.num_vint_docs, + ); self.remaining_data.advance(num_compressed_bytes); if self.has_freq { - self.freq_decoder - .uncompress_vint_unsorted(self.remaining_data.as_ref(), self.num_vint_docs); + self.freq_decoder.uncompress_vint_unsorted( + self.remaining_data.as_ref(), + self.num_vint_docs, + ); } self.num_vint_docs = 0; true @@ -508,12 +508,13 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); let segment_reader = searcher.segment_reader(0); - let inverted_index = segment_reader.inverted_index(int_field).unwrap(); + let inverted_index = segment_reader.inverted_index(int_field); let term = Term::from_field_u64(int_field, 0u64); let term_info = inverted_index.get_term_info(&term).unwrap(); - let mut block_segments = - inverted_index - .read_block_postings_from_terminfo(&term_info, SegmentPostingsOption::NoFreq); + let mut block_segments = inverted_index.read_block_postings_from_terminfo( + &term_info, + SegmentPostingsOption::NoFreq, + ); let mut offset: u32 = 0u32; // checking that the block before calling advance is empty assert!(block_segments.docs().is_empty()); @@ -549,17 +550,18 @@ mod tests { let mut block_segments; { let term = Term::from_field_u64(int_field, 0u64); - let inverted_index = segment_reader.inverted_index(int_field).unwrap(); + let inverted_index = segment_reader.inverted_index(int_field); let term_info = inverted_index.get_term_info(&term).unwrap(); - block_segments = - inverted_index - .read_block_postings_from_terminfo(&term_info, SegmentPostingsOption::NoFreq); + block_segments = inverted_index.read_block_postings_from_terminfo( + &term_info, + SegmentPostingsOption::NoFreq, + ); } assert!(block_segments.advance()); assert!(block_segments.docs() == &[0, 2, 4]); { let term = Term::from_field_u64(int_field, 1u64); - let inverted_index = segment_reader.inverted_index(int_field).unwrap(); + let inverted_index = segment_reader.inverted_index(int_field); let term_info = inverted_index.get_term_info(&term).unwrap(); inverted_index.reset_block_postings_from_terminfo(&term_info, &mut block_segments); } diff --git a/src/postings/segment_postings_option.rs b/src/postings/segment_postings_option.rs index 2aba4ec8e..b50e2eee4 100644 --- a/src/postings/segment_postings_option.rs +++ b/src/postings/segment_postings_option.rs @@ -17,7 +17,6 @@ pub enum SegmentPostingsOption { } impl SegmentPostingsOption { - /// Returns true iff this option includes encoding /// term frequencies. pub fn has_freq(&self) -> bool { diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 14a22ccea..4c37e015d 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -57,11 +57,12 @@ pub struct InvertedIndexSerializer { impl InvertedIndexSerializer { /// Open a new `PostingsSerializer` for the given segment - fn new(terms_write: CompositeWrite, - postings_write: CompositeWrite, - positions_write: CompositeWrite, - schema: Schema) - -> Result { + fn new( + terms_write: CompositeWrite, + postings_write: CompositeWrite, + positions_write: CompositeWrite, + schema: Schema, + ) -> Result { Ok(InvertedIndexSerializer { terms_write: terms_write, postings_write: postings_write, @@ -78,7 +79,8 @@ impl InvertedIndexSerializer { CompositeWrite::wrap(segment.open_write(TERMS)?), CompositeWrite::wrap(segment.open_write(POSTINGS)?), CompositeWrite::wrap(segment.open_write(POSITIONS)?), - segment.schema()) + segment.schema(), + ) } /// Must be called before starting pushing terms of @@ -94,7 +96,7 @@ impl InvertedIndexSerializer { field_entry.field_type().clone(), term_dictionary_write, postings_write, - positions_write + positions_write, ) } @@ -120,7 +122,6 @@ pub struct FieldSerializer<'a> { impl<'a> FieldSerializer<'a> { - fn new( field_type: FieldType, term_dictionary_write: &'a mut CountingWriter, @@ -128,25 +129,24 @@ impl<'a> FieldSerializer<'a> { positions_write: &'a mut CountingWriter, ) -> io::Result> { - let (term_freq_enabled, position_enabled): (bool, bool) = - match field_type { - FieldType::Str(ref text_options) => { - let text_indexing_options = text_options.get_indexing_options(); - (text_indexing_options.is_termfreq_enabled(), text_indexing_options.is_position_enabled()) - }, - _ => { - (false, false) - } - }; - let term_dictionary_builder = TermDictionaryBuilderImpl::new(term_dictionary_write, field_type)?; - let postings_serializer = PostingsSerializer::new(postings_write, term_freq_enabled); - let positions_serializer_opt = - if position_enabled { - Some(PositionSerializer::new(positions_write)) + let (term_freq_enabled, position_enabled): (bool, bool) = match field_type { + FieldType::Str(ref text_options) => { + let text_indexing_options = text_options.get_indexing_options(); + ( + text_indexing_options.is_termfreq_enabled(), + text_indexing_options.is_position_enabled(), + ) } - else { - None - }; + _ => (false, false), + }; + let term_dictionary_builder = + TermDictionaryBuilderImpl::new(term_dictionary_write, field_type)?; + let postings_serializer = PostingsSerializer::new(postings_write, term_freq_enabled); + let positions_serializer_opt = if position_enabled { + Some(PositionSerializer::new(positions_write)) + } else { + None + }; Ok(FieldSerializer { term_dictionary_builder: term_dictionary_builder, @@ -159,9 +159,9 @@ impl<'a> FieldSerializer<'a> { fn current_term_info(&self) -> TermInfo { let (filepos, offset) = self.positions_serializer_opt - .as_ref() - .map(|positions_serializer| positions_serializer.addr()) - .unwrap_or((0u32, 0u8)); + .as_ref() + .map(|positions_serializer| positions_serializer.addr()) + .unwrap_or((0u32, 0u8)); TermInfo { doc_freq: 0, postings_offset: self.postings_serializer.addr(), @@ -194,11 +194,12 @@ impl<'a> FieldSerializer<'a> { /// /// Term frequencies and positions may be ignored by the serializer depending /// on the configuration of the field in the `Schema`. - pub fn write_doc(&mut self, - doc_id: DocId, - term_freq: u32, - position_deltas: &[u32]) - -> io::Result<()> { + pub fn write_doc( + &mut self, + doc_id: DocId, + term_freq: u32, + position_deltas: &[u32], + ) -> io::Result<()> { self.current_term_info.doc_freq += 1; self.postings_serializer.write_doc(doc_id, term_freq)?; if let Some(ref mut positions_serializer) = self.positions_serializer_opt.as_mut() { @@ -213,7 +214,9 @@ impl<'a> FieldSerializer<'a> { /// using `VInt` encoding. pub fn close_term(&mut self) -> io::Result<()> { if self.term_open { - self.term_dictionary_builder.insert_value(&self.current_term_info)?; + self.term_dictionary_builder.insert_value( + &self.current_term_info, + )?; self.postings_serializer.close_term()?; self.term_open = false; } @@ -251,8 +254,8 @@ impl PostingsSerializer { postings_write: CountingWriter::wrap(write), block_encoder: BlockEncoder::new(), - doc_ids: vec!(), - term_freqs: vec!(), + doc_ids: vec![], + term_freqs: vec![], last_doc_id_encoded: 0u32, termfreq_enabled: termfreq_enabled, @@ -267,16 +270,17 @@ impl PostingsSerializer { if self.doc_ids.len() == COMPRESSION_BLOCK_SIZE { { // encode the doc ids - let block_encoded: &[u8] = - self.block_encoder - .compress_block_sorted(&self.doc_ids, self.last_doc_id_encoded); + let block_encoded: &[u8] = self.block_encoder.compress_block_sorted( + &self.doc_ids, + self.last_doc_id_encoded, + ); self.last_doc_id_encoded = self.doc_ids[self.doc_ids.len() - 1]; self.postings_write.write_all(block_encoded)?; } if self.termfreq_enabled { // encode the term_freqs - let block_encoded: &[u8] = self.block_encoder - .compress_block_unsorted(&self.term_freqs); + let block_encoded: &[u8] = + self.block_encoder.compress_block_unsorted(&self.term_freqs); self.postings_write.write_all(block_encoded)?; self.term_freqs.clear(); } @@ -294,16 +298,18 @@ impl PostingsSerializer { // In that case, the remaining part is encoded // using variable int encoding. { - let block_encoded = - self.block_encoder - .compress_vint_sorted(&self.doc_ids, self.last_doc_id_encoded); + let block_encoded = self.block_encoder.compress_vint_sorted( + &self.doc_ids, + self.last_doc_id_encoded, + ); self.postings_write.write_all(block_encoded)?; self.doc_ids.clear(); } // ... Idem for term frequencies if self.termfreq_enabled { - let block_encoded = self.block_encoder - .compress_vint_unsorted(&self.term_freqs[..]); + let block_encoded = self.block_encoder.compress_vint_unsorted( + &self.term_freqs[..], + ); self.postings_write.write_all(block_encoded)?; self.term_freqs.clear(); } @@ -373,4 +379,3 @@ impl PositionSerializer { self.write.flush() } } - diff --git a/src/postings/term_info.rs b/src/postings/term_info.rs index 51ae7083a..375f73202 100644 --- a/src/postings/term_info.rs +++ b/src/postings/term_info.rs @@ -12,7 +12,7 @@ use std::io; /// * `postings_offset` : an offset in the `.idx` file /// addressing the start of the posting list associated /// to this term. -#[derive(Debug,Default,Ord,PartialOrd,Eq,PartialEq,Clone)] +#[derive(Debug, Default, Ord, PartialOrd, Eq, PartialEq, Clone)] pub struct TermInfo { /// Number of documents in the segment containing the term pub doc_freq: u32, diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index b471da320..ba9f93b19 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -37,10 +37,12 @@ impl Query for BooleanQuery { } fn weight(&self, searcher: &Searcher) -> Result> { - let sub_weights = try!(self.subqueries - .iter() - .map(|&(ref _occur, ref subquery)| subquery.weight(searcher)) - .collect()); + let sub_weights = try!( + self.subqueries + .iter() + .map(|&(ref _occur, ref subquery)| subquery.weight(searcher)) + .collect() + ); let occurs: Vec = self.subqueries .iter() .map(|&(ref occur, ref _subquery)| *occur) @@ -57,10 +59,9 @@ impl BooleanQuery { let occur_term_queries: Vec<(Occur, Box)> = terms .into_iter() .map(|term| { - let term_query: Box = box TermQuery::new(term, - SegmentPostingsOption::Freq); - (Occur::Should, term_query) - }) + let term_query: Box = box TermQuery::new(term, SegmentPostingsOption::Freq); + (Occur::Should, term_query) + }) .collect(); BooleanQuery::from(occur_term_queries) } diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs index 595f54219..723e4a92d 100644 --- a/src/query/boolean_query/boolean_scorer.rs +++ b/src/query/boolean_query/boolean_scorer.rs @@ -55,11 +55,11 @@ impl BooleanScorer { .map(|posting| posting.doc()) .enumerate() .map(|(ord, doc)| { - HeapItem { - doc: doc, - ord: ord as u32, - } - }) + HeapItem { + doc: doc, + ord: ord as u32, + } + }) .collect(); BooleanScorer { scorers: non_empty_scorers, diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 04f22595c..0ff49cbde 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -22,11 +22,12 @@ impl BooleanWeight { impl Weight for BooleanWeight { fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result> { - let sub_scorers: Vec> = - try!(self.weights - .iter() - .map(|weight| weight.scorer(reader)) - .collect()); + let sub_scorers: Vec> = try!( + self.weights + .iter() + .map(|weight| weight.scorer(reader)) + .collect() + ); let boolean_scorer = BooleanScorer::new(sub_scorers, self.occur_filter); Ok(box boolean_scorer) } diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 01ef9e824..73f659a03 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -64,8 +64,10 @@ mod tests { } let make_term_query = |text: &str| { - let term_query = TermQuery::new(Term::from_field_text(text_field, text), - SegmentPostingsOption::NoFreq); + let term_query = TermQuery::new( + Term::from_field_text(text_field, text), + SegmentPostingsOption::NoFreq, + ); let query: Box = box term_query; query }; @@ -87,19 +89,25 @@ mod tests { assert_eq!(matching_docs(&boolean_query), vec![0, 1, 3]); } { - let boolean_query = BooleanQuery::from(vec![(Occur::Should, make_term_query("a")), - (Occur::Should, make_term_query("b"))]); + let boolean_query = BooleanQuery::from(vec![ + (Occur::Should, make_term_query("a")), + (Occur::Should, make_term_query("b")), + ]); assert_eq!(matching_docs(&boolean_query), vec![0, 1, 2, 3]); } { - let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), - (Occur::Should, make_term_query("b"))]); + let boolean_query = BooleanQuery::from(vec![ + (Occur::Must, make_term_query("a")), + (Occur::Should, make_term_query("b")), + ]); assert_eq!(matching_docs(&boolean_query), vec![0, 1, 3]); } { - let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), - (Occur::Should, make_term_query("b")), - (Occur::MustNot, make_term_query("d"))]); + let boolean_query = BooleanQuery::from(vec![ + (Occur::Must, make_term_query("a")), + (Occur::Should, make_term_query("b")), + (Occur::MustNot, make_term_query("d")), + ]); assert_eq!(matching_docs(&boolean_query), vec![0, 1]); } { diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index 4ad89a3b2..8adc4728b 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -61,9 +61,9 @@ mod tests { .map(|text| Term::from_field_text(text_field, text)) .collect(); let phrase_query = PhraseQuery::from(terms); - searcher - .search(&phrase_query, &mut test_collector) - .expect("search should succeed"); + searcher.search(&phrase_query, &mut test_collector).expect( + "search should succeed", + ); test_collector.docs() }; diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index 119f32dbe..1a85342b9 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -22,7 +22,7 @@ impl Weight for PhraseWeight { fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result> { let mut term_postings_list = Vec::new(); for term in &self.phrase_terms { - let inverted_index = reader.inverted_index(term.field())?; + let inverted_index = reader.inverted_index(term.field()); let term_postings_option = inverted_index.read_postings(term, SegmentPostingsOption::FreqAndPositions); if let Some(term_postings) = term_postings_option { @@ -31,6 +31,8 @@ impl Weight for PhraseWeight { return Ok(box EmptyScorer); } } - Ok(box PhraseScorer { intersection_docset: IntersectionDocSet::from(term_postings_list) }) + Ok(box PhraseScorer { + intersection_docset: IntersectionDocSet::from(term_postings_list), + }) } } diff --git a/src/query/query.rs b/src/query/query.rs index 683281dc6..59e1f2cbf 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -66,7 +66,10 @@ pub trait Query: fmt::Debug { let mut segment_search_timer = search_timer.open("segment_search"); { let _ = segment_search_timer.open("set_segment"); - try!(collector.set_segment(segment_ord as SegmentLocalId, segment_reader)); + try!(collector.set_segment( + segment_ord as SegmentLocalId, + segment_reader, + )); } let mut scorer = try!(weight.scorer(segment_reader)); { diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 08f167b25..8fa2a3c11 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -3,7 +3,8 @@ use combine::char::*; use super::user_input_ast::*; fn literal(input: I) -> ParseResult - where I: Stream +where + I: Stream, { let term_val = || { let word = many1(satisfy(|c: char| c.is_alphanumeric())); @@ -11,27 +12,29 @@ fn literal(input: I) -> ParseResult phrase.or(word) }; - let negative_numbers = (char('-'), many1(satisfy(|c: char| c.is_numeric()))) - .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); + let negative_numbers = (char('-'), many1(satisfy(|c: char| c.is_numeric()))).map( + |(s1, s2): (char, String)| format!("{}{}", s1, s2), + ); - let field = (letter(), many(satisfy(|c: char| c.is_alphanumeric() || c == '_'))) - .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); + let field = ( + letter(), + many(satisfy(|c: char| c.is_alphanumeric() || c == '_')), + ).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); let term_val_with_field = negative_numbers.or(term_val()); let term_query = (field, char(':'), term_val_with_field).map(|(field_name, _, phrase)| { - UserInputLiteral { - field_name: - Some(field_name), - phrase: phrase, - } - }); + UserInputLiteral { + field_name: Some(field_name), + phrase: phrase, + } + }); let term_default_field = term_val().map(|phrase| { - UserInputLiteral { - field_name: None, - phrase: phrase, - } - }); + UserInputLiteral { + field_name: None, + phrase: phrase, + } + }); try(term_query) .or(term_default_field) .map(UserInputAST::from) @@ -40,25 +43,29 @@ fn literal(input: I) -> ParseResult fn leaf(input: I) -> ParseResult - where I: Stream +where + I: Stream, { (char('-'), parser(literal)) .map(|(_, expr)| UserInputAST::Not(box expr)) - .or((char('+'), parser(literal)).map(|(_, expr)| UserInputAST::Must(box expr))) + .or((char('+'), parser(literal)).map(|(_, expr)| { + UserInputAST::Must(box expr) + })) .or(parser(literal)) .parse_stream(input) } pub fn parse_to_ast(input: I) -> ParseResult - where I: Stream +where + I: Stream, { sep_by(parser(leaf), spaces()) .map(|subqueries: Vec| if subqueries.len() == 1 { - subqueries.into_iter().next().unwrap() - } else { - UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect()) - }) + subqueries.into_iter().next().unwrap() + } else { + UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect()) + }) .parse_stream(input) } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 0b6b43efe..5beb42745 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -117,20 +117,22 @@ impl QueryParser { /// Parse the user query into an AST. fn parse_query_to_logical_ast(&self, query: &str) -> Result { - let (user_input_ast, _remaining) = parse_to_ast(query) - .map_err(|_| QueryParserError::SyntaxError)?; + let (user_input_ast, _remaining) = parse_to_ast(query).map_err( + |_| QueryParserError::SyntaxError, + )?; self.compute_logical_ast(user_input_ast) } fn resolve_field_name(&self, field_name: &str) -> Result { - self.schema - .get_field(field_name) - .ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name))) + self.schema.get_field(field_name).ok_or_else(|| { + QueryParserError::FieldDoesNotExist(String::from(field_name)) + }) } - fn compute_logical_ast(&self, - user_input_ast: UserInputAST) - -> Result { + fn compute_logical_ast( + &self, + user_input_ast: UserInputAST, + ) -> Result { let (occur, ast) = self.compute_logical_ast_with_occur(user_input_ast)?; if occur == Occur::MustNot { return Err(QueryParserError::AllButQueryForbidden); @@ -138,10 +140,11 @@ impl QueryParser { Ok(ast) } - fn compute_logical_ast_for_leaf(&self, - field: Field, - phrase: &str) - -> Result, QueryParserError> { + fn compute_logical_ast_for_leaf( + &self, + field: Field, + phrase: &str, + ) -> Result, QueryParserError> { let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); @@ -174,7 +177,9 @@ impl QueryParser { if terms.is_empty() { Ok(None) } else if terms.len() == 1 { - Ok(Some(LogicalLiteral::Term(terms.into_iter().next().unwrap()))) + Ok(Some( + LogicalLiteral::Term(terms.into_iter().next().unwrap()), + )) } else { Ok(Some(LogicalLiteral::Phrase(terms))) } @@ -191,18 +196,24 @@ impl QueryParser { } } - fn compute_logical_ast_with_occur(&self, - user_input_ast: UserInputAST) - -> Result<(Occur, LogicalAST), QueryParserError> { + fn compute_logical_ast_with_occur( + &self, + user_input_ast: UserInputAST, + ) -> Result<(Occur, LogicalAST), QueryParserError> { match user_input_ast { UserInputAST::Clause(sub_queries) => { let default_occur = self.default_occur(); - let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries.into_iter() - .map(|sub_query| self.compute_logical_ast_with_occur(*sub_query)) - .map(|res| { - res.map(|(occur, sub_ast)| (compose_occur(default_occur, occur), sub_ast)) - }) - .collect()); + let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!( + sub_queries + .into_iter() + .map(|sub_query| self.compute_logical_ast_with_occur(*sub_query)) + .map(|res| { + res.map(|(occur, sub_ast)| { + (compose_occur(default_occur, occur), sub_ast) + }) + }) + .collect() + ); Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) } UserInputAST::Not(subquery) => { @@ -320,9 +331,10 @@ mod test { } - fn parse_query_to_logical_ast(query: &str, - default_conjunction: bool) - -> Result { + fn parse_query_to_logical_ast( + query: &str, + default_conjunction: bool, + ) -> Result { let mut query_parser = make_query_parser(); if default_conjunction { query_parser.set_conjunction_by_default(); @@ -330,9 +342,11 @@ mod test { query_parser.parse_query_to_logical_ast(query) } - fn test_parse_query_to_logical_ast_helper(query: &str, - expected: &str, - default_conjunction: bool) { + fn test_parse_query_to_logical_ast_helper( + query: &str, + expected: &str, + default_conjunction: bool, + ) { let query = parse_query_to_logical_ast(query, default_conjunction).unwrap(); let query_str = format!("{:?}", query); assert_eq!(query_str, expected); @@ -358,21 +372,29 @@ mod test { } }; - assert_eq!(is_not_indexed_err("notindexed_text:titi"), - Some(String::from("notindexed_text"))); - assert_eq!(is_not_indexed_err("notindexed_u64:23424"), - Some(String::from("notindexed_u64"))); - assert_eq!(is_not_indexed_err("notindexed_i64:-234324"), - Some(String::from("notindexed_i64"))); + assert_eq!( + is_not_indexed_err("notindexed_text:titi"), + Some(String::from("notindexed_text")) + ); + assert_eq!( + is_not_indexed_err("notindexed_u64:23424"), + Some(String::from("notindexed_u64")) + ); + assert_eq!( + is_not_indexed_err("notindexed_i64:-234324"), + Some(String::from("notindexed_i64")) + ); } #[test] pub fn test_parse_query_untokenized() { - test_parse_query_to_logical_ast_helper("nottokenized:\"wordone wordtwo\"", - "Term([0, 0, 0, 7, 119, 111, 114, 100, 111, 110, \ + test_parse_query_to_logical_ast_helper( + "nottokenized:\"wordone wordtwo\"", + "Term([0, 0, 0, 7, 119, 111, 114, 100, 111, 110, \ 101, 32, 119, 111, 114, 100, 116, 119, 111])", - false); + false, + ); } #[test] @@ -381,82 +403,115 @@ mod test { assert!(query_parser.parse_query("signed:2324").is_ok()); assert!(query_parser.parse_query("signed:\"22\"").is_ok()); assert!(query_parser.parse_query("signed:\"-2234\"").is_ok()); - assert!(query_parser - .parse_query("signed:\"-9999999999999\"") - .is_ok()); + assert!( + query_parser + .parse_query("signed:\"-9999999999999\"") + .is_ok() + ); assert!(query_parser.parse_query("signed:\"a\"").is_err()); assert!(query_parser.parse_query("signed:\"2a\"").is_err()); - assert!(query_parser - .parse_query("signed:\"18446744073709551615\"") - .is_err()); + assert!( + query_parser + .parse_query("signed:\"18446744073709551615\"") + .is_err() + ); assert!(query_parser.parse_query("unsigned:\"2\"").is_ok()); assert!(query_parser.parse_query("unsigned:\"-2\"").is_err()); - assert!(query_parser - .parse_query("unsigned:\"18446744073709551615\"") - .is_ok()); - test_parse_query_to_logical_ast_helper("unsigned:2324", - "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])", - false); + assert!( + query_parser + .parse_query("unsigned:\"18446744073709551615\"") + .is_ok() + ); + test_parse_query_to_logical_ast_helper( + "unsigned:2324", + "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])", + false, + ); - test_parse_query_to_logical_ast_helper("signed:-2324", - &format!("{:?}", - Term::from_field_i64(Field(2u32), -2324)), - false); + test_parse_query_to_logical_ast_helper( + "signed:-2324", + &format!("{:?}", Term::from_field_i64(Field(2u32), -2324)), + false, + ); } #[test] pub fn test_parse_query_to_ast_disjunction() { - test_parse_query_to_logical_ast_helper("title:toto", - "Term([0, 0, 0, 0, 116, 111, 116, 111])", - false); - test_parse_query_to_logical_ast_helper("+title:toto", - "Term([0, 0, 0, 0, 116, 111, 116, 111])", - false); - test_parse_query_to_logical_ast_helper("+title:toto -titi", - "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \ + test_parse_query_to_logical_ast_helper( + "title:toto", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", + false, + ); + test_parse_query_to_logical_ast_helper( + "+title:toto", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", + false, + ); + test_parse_query_to_logical_ast_helper( + "+title:toto -titi", + "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \ -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \ Term([0, 0, 0, 1, 116, 105, 116, 105])))", - false); - assert_eq!(parse_query_to_logical_ast("-title:toto", false) - .err() - .unwrap(), - QueryParserError::AllButQueryForbidden); - test_parse_query_to_logical_ast_helper("title:a b", - "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \ + false, + ); + assert_eq!( + parse_query_to_logical_ast("-title:toto", false) + .err() + .unwrap(), + QueryParserError::AllButQueryForbidden + ); + test_parse_query_to_logical_ast_helper( + "title:a b", + "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \ Term([0, 0, 0, 1, 98])))", - false); - test_parse_query_to_logical_ast_helper("title:\"a b\"", - "\"[Term([0, 0, 0, 0, 97]), \ + false, + ); + test_parse_query_to_logical_ast_helper( + "title:\"a b\"", + "\"[Term([0, 0, 0, 0, 97]), \ Term([0, 0, 0, 0, 98])]\"", - false); + false, + ); } #[test] pub fn test_parse_query_to_ast_conjunction() { - test_parse_query_to_logical_ast_helper("title:toto", - "Term([0, 0, 0, 0, 116, 111, 116, 111])", - true); - test_parse_query_to_logical_ast_helper("+title:toto", - "Term([0, 0, 0, 0, 116, 111, 116, 111])", - true); - test_parse_query_to_logical_ast_helper("+title:toto -titi", - "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \ + test_parse_query_to_logical_ast_helper( + "title:toto", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", + true, + ); + test_parse_query_to_logical_ast_helper( + "+title:toto", + "Term([0, 0, 0, 0, 116, 111, 116, 111])", + true, + ); + test_parse_query_to_logical_ast_helper( + "+title:toto -titi", + "(+Term([0, 0, 0, 0, 116, 111, 116, 111]) \ -(Term([0, 0, 0, 0, 116, 105, 116, 105]) \ Term([0, 0, 0, 1, 116, 105, 116, 105])))", - true); - assert_eq!(parse_query_to_logical_ast("-title:toto", true) - .err() - .unwrap(), - QueryParserError::AllButQueryForbidden); - test_parse_query_to_logical_ast_helper("title:a b", - "(+Term([0, 0, 0, 0, 97]) \ + true, + ); + assert_eq!( + parse_query_to_logical_ast("-title:toto", true) + .err() + .unwrap(), + QueryParserError::AllButQueryForbidden + ); + test_parse_query_to_logical_ast_helper( + "title:a b", + "(+Term([0, 0, 0, 0, 97]) \ +(Term([0, 0, 0, 0, 98]) \ Term([0, 0, 0, 1, 98])))", - true); - test_parse_query_to_logical_ast_helper("title:\"a b\"", - "\"[Term([0, 0, 0, 0, 97]), \ + true, + ); + test_parse_query_to_logical_ast_helper( + "title:\"a b\"", + "\"[Term([0, 0, 0, 0, 97]), \ Term([0, 0, 0, 0, 98])]\"", - true); + true, + ); } } diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 9670e73e2..bbc751c5e 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -44,8 +44,10 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); - let term_query = TermQuery::new(Term::from_field_text(text_field, "a"), - SegmentPostingsOption::NoFreq); + let term_query = TermQuery::new( + Term::from_field_text(text_field, "a"), + SegmentPostingsOption::NoFreq, + ); let term_weight = term_query.weight(&searcher).unwrap(); let segment_reader = searcher.segment_reader(0); let mut term_scorer = term_weight.scorer(segment_reader).unwrap(); diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 73ea46b4b..95787a030 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -7,7 +7,8 @@ use postings::Postings; use fastfield::FastFieldReader; pub struct TermScorer - where TPostings: Postings +where + TPostings: Postings, { pub idf: Score, pub fieldnorm_reader_opt: Option, @@ -15,7 +16,8 @@ pub struct TermScorer } impl TermScorer - where TPostings: Postings +where + TPostings: Postings, { pub fn postings(&self) -> &TPostings { &self.postings @@ -23,7 +25,8 @@ impl TermScorer } impl DocSet for TermScorer - where TPostings: Postings +where + TPostings: Postings, { fn advance(&mut self) -> bool { self.postings.advance() @@ -40,7 +43,8 @@ impl DocSet for TermScorer } impl Scorer for TermScorer - where TPostings: Postings +where + TPostings: Postings, { fn score(&self) -> Score { let doc = self.postings.doc(); diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index a755a2921..d837a63fd 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -28,21 +28,22 @@ impl TermWeight { } /// If the field is not found, returns an empty `DocSet`. - pub fn specialized_scorer(&self, - reader: &SegmentReader) - -> Result> { + pub fn specialized_scorer( + &self, + reader: &SegmentReader, + ) -> Result> { let field = self.term.field(); - let inverted_index = reader.inverted_index(field)?; + let inverted_index = reader.inverted_index(field); let fieldnorm_reader_opt = reader.get_fieldnorms_reader(field); - let postings_opt: Option = inverted_index.read_postings(&self.term, self.segment_postings_options); + let postings_opt: Option = + inverted_index.read_postings(&self.term, self.segment_postings_options); if let Some(segment_postings) = postings_opt { Ok(TermScorer { idf: self.idf(), fieldnorm_reader_opt: fieldnorm_reader_opt, postings: segment_postings, }) - } - else { + } else { Ok(TermScorer { idf: 1f32, fieldnorm_reader_opt: None, diff --git a/src/schema/field.rs b/src/schema/field.rs index 9df8e149b..b7ecc3737 100644 --- a/src/schema/field.rs +++ b/src/schema/field.rs @@ -10,7 +10,7 @@ use common::BinarySerializable; /// /// Because the field id is a `u8`, tantivy can only have at most `255` fields. /// Value 255 is reserved. -#[derive(Copy, Clone, Debug, PartialEq,PartialOrd,Eq,Ord,Hash, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] pub struct Field(pub u32); impl BinarySerializable for Field { diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 883dc49ff..7487ff7c1 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -89,7 +89,8 @@ impl FieldEntry { impl Serialize for FieldEntry { fn serialize(&self, serializer: S) -> Result - where S: Serializer + where + S: Serializer, { let mut s = serializer.serialize_struct("field_entry", 3)?; s.serialize_field("name", &self.name)?; @@ -115,7 +116,8 @@ impl Serialize for FieldEntry { impl<'de> Deserialize<'de> for FieldEntry { fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> + where + D: Deserializer<'de>, { #[derive(Deserialize)] #[serde(field_identifier, rename_all = "lowercase")] @@ -137,7 +139,8 @@ impl<'de> Deserialize<'de> for FieldEntry { } fn visit_map(self, mut map: V) -> Result - where V: MapAccess<'de> + where + V: MapAccess<'de>, { let mut name = None; let mut ty = None; @@ -187,13 +190,14 @@ impl<'de> Deserialize<'de> for FieldEntry { let name = name.ok_or_else(|| de::Error::missing_field("name"))?; ty.ok_or_else(|| de::Error::missing_field("ty"))?; - let field_type = field_type - .ok_or_else(|| de::Error::missing_field("options"))?; + let field_type = field_type.ok_or_else( + || de::Error::missing_field("options"), + )?; Ok(FieldEntry { - name: name, - field_type: field_type, - }) + name: name, + field_type: field_type, + }) } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 7a494c9e4..f31c6e9da 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -80,8 +80,9 @@ impl FieldType { FieldType::Str(_) => Ok(Value::Str(field_text.clone())), FieldType::U64(_) | FieldType::I64(_) => { - Err(ValueParsingError::TypeError(format!("Expected an integer, got {:?}", - json))) + Err(ValueParsingError::TypeError( + format!("Expected an integer, got {:?}", json), + )) } } } @@ -110,9 +111,11 @@ impl FieldType { } } _ => { - let msg = format!("Json value not supported error {:?}. Expected {:?}", - json, - self); + let msg = format!( + "Json value not supported error {:?}. Expected {:?}", + json, + self + ); Err(ValueParsingError::TypeError(msg)) } } diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 7c5f480dc..93f50ff48 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -105,9 +105,9 @@ impl SchemaBuilder { /// This will consume your `SchemaBuilder` pub fn build(self) -> Schema { Schema(Arc::new(InnerSchema { - fields: self.fields, - fields_map: self.fields_map, - })) + fields: self.fields, + fields_map: self.fields_map, + })) } } @@ -206,15 +206,14 @@ impl Schema { /// Build a document object from a json-object. pub fn parse_document(&self, doc_json: &str) -> Result { - let json_obj: JsonObject = serde_json::from_str(doc_json) - .map_err(|_| { - let doc_json_sample: String = if doc_json.len() < 20 { - String::from(doc_json) - } else { - format!("{:?}...", &doc_json[0..20]) - }; - DocParsingError::NotJSON(doc_json_sample) - })?; + let json_obj: JsonObject = serde_json::from_str(doc_json).map_err(|_| { + let doc_json_sample: String = if doc_json.len() < 20 { + String::from(doc_json) + } else { + format!("{:?}...", &doc_json[0..20]) + }; + DocParsingError::NotJSON(doc_json_sample) + })?; let mut doc = Document::default(); for (field_name, json_value) in json_obj.iter() { @@ -225,18 +224,15 @@ impl Schema { match *json_value { JsonValue::Array(ref json_items) => { for json_item in json_items { - let value = try!(field_type - .value_from_json(json_item) - .map_err(|e| { - DocParsingError::ValueError(field_name.clone(), e) - })); + let value = + try!(field_type.value_from_json(json_item).map_err(|e| { + DocParsingError::ValueError(field_name.clone(), e) + })); doc.add(FieldValue::new(field, value)); } } _ => { - let value = try!(field_type - .value_from_json(json_value) - .map_err(|e| { + let value = try!(field_type.value_from_json(json_value).map_err(|e| { DocParsingError::ValueError(field_name.clone(), e) })); doc.add(FieldValue::new(field, value)); @@ -259,7 +255,8 @@ impl fmt::Debug for Schema { impl Serialize for Schema { fn serialize(&self, serializer: S) -> Result - where S: Serializer + where + S: Serializer, { let mut seq = serializer.serialize_seq(Some(self.0.fields.len()))?; for e in &self.0.fields { @@ -271,7 +268,8 @@ impl Serialize for Schema { impl<'de> Deserialize<'de> for Schema { fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> + where + D: Deserializer<'de>, { struct SchemaVisitor; @@ -283,7 +281,8 @@ impl<'de> Deserialize<'de> for Schema { } fn visit_seq(self, mut seq: A) -> Result - where A: SeqAccess<'de> + where + A: SeqAccess<'de>, { let mut schema = SchemaBuilder { fields: Vec::with_capacity(seq.size_hint().unwrap_or(0)), @@ -430,12 +429,14 @@ mod tests { } { let doc = schema - .parse_document(r#"{ + .parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": 4, "popularity": 10 - }"#) + }"#, + ) .unwrap(); assert_eq!(doc.get_first(title_field).unwrap().text(), "my title"); assert_eq!(doc.get_first(author_field).unwrap().text(), "fulmicoton"); @@ -443,13 +444,15 @@ mod tests { assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10); } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": 4, "popularity": 10, "jambon": "bayonne" - }"#); + }"#, + ); match json_err { Err(DocParsingError::NoSuchFieldInSchema(field_name)) => { assert_eq!(field_name, "jambon"); @@ -460,13 +463,15 @@ mod tests { } } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": "5", "popularity": "10", "jambon": "bayonne" - }"#); + }"#, + ); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::TypeError(_))) => { assert!(true); @@ -477,12 +482,14 @@ mod tests { } } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": -5, "popularity": 10 - }"#); + }"#, + ); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { assert!(true); @@ -493,12 +500,14 @@ mod tests { } } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": 9223372036854775808, "popularity": 10 - }"#); + }"#, + ); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { panic!("expected 9223372036854775808 to fit into u64, but it didn't"); @@ -509,12 +518,14 @@ mod tests { } } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": 50, "popularity": 9223372036854775808 - }"#); + }"#, + ); match json_err { Err(DocParsingError::ValueError(_, ValueParsingError::OverflowError(_))) => { assert!(true); @@ -525,11 +536,13 @@ mod tests { } } { - let json_err = schema.parse_document(r#"{ + let json_err = schema.parse_document( + r#"{ "title": "my title", "author": "fulmicoton", "count": 50, - }"#); + }"#, + ); match json_err { Err(NotJSON(_)) => { assert!(true); diff --git a/src/schema/term.rs b/src/schema/term.rs index f66144b07..197f4975a 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -13,7 +13,9 @@ const INT_TERM_LEN: usize = 4 + 8; /// /// It actually wraps a `Vec`. #[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)] -pub struct Term>(B) where B: AsRef<[u8]>; +pub struct Term>(B) +where + B: AsRef<[u8]>; impl Term { /// Builds a term given a field, and a u64-value @@ -109,7 +111,8 @@ impl Term { } impl Term - where B: AsRef<[u8]> +where + B: AsRef<[u8]>, { /// Wraps a source of data pub fn wrap(data: B) -> Term { @@ -166,7 +169,8 @@ impl Term } impl AsRef<[u8]> for Term - where B: AsRef<[u8]> +where + B: AsRef<[u8]>, { fn as_ref(&self) -> &[u8] { self.0.as_ref() diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 472bd3e1e..ddcd9948e 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -2,7 +2,7 @@ use std::ops::BitOr; /// Define how a text field should be handled by tantivy. -#[derive(Clone,Debug,PartialEq,Eq, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct TextOptions { indexing: TextIndexingOptions, stored: bool, @@ -45,10 +45,10 @@ impl Default for TextOptions { /// Describe how a field should be indexed -#[derive(Clone,Copy,Debug,PartialEq,PartialOrd,Eq,Hash, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Hash, Serialize, Deserialize)] pub enum TextIndexingOptions { /// Unindexed fields will not generate any postings. They will not be searchable either. - #[serde(rename="unindexed")] + #[serde(rename = "unindexed")] Unindexed, /// Untokenized means that the field text will not be split into tokens before being indexed. /// A field with the value "Hello world", will have the document suscribe to one single @@ -56,23 +56,23 @@ pub enum TextIndexingOptions { /// /// It will **not** be searchable if the user enter "hello" for instance. /// This can be useful for tags, or ids for instance. - #[serde(rename="untokenized")] + #[serde(rename = "untokenized")] Untokenized, /// TokenizedNoFreq will tokenize the field value, and append the document doc id /// to the posting lists associated to all of the tokens. /// The frequence of appearance of the term in the document however will be lost. /// The term frequency used in the TfIdf formula will always be 1. - #[serde(rename="tokenize")] + #[serde(rename = "tokenize")] TokenizedNoFreq, /// TokenizedWithFreq will tokenize the field value, and encode /// both the docid and the term frequency in the posting lists associated to all - #[serde(rename="freq")] + #[serde(rename = "freq")] TokenizedWithFreq, /// Like TokenizedWithFreq, but also encodes the positions of the /// terms in a separate file. This option is required for phrase queries. /// Don't use this if you are certain you won't need it, the term positions file /// can be very big. - #[serde(rename="position")] + #[serde(rename = "position")] TokenizedWithFreqAndPosition, } diff --git a/src/schema/value.rs b/src/schema/value.rs index ad24688ee..828822a8e 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -16,7 +16,8 @@ pub enum Value { impl Serialize for Value { fn serialize(&self, serializer: S) -> Result - where S: Serializer + where + S: Serializer, { match *self { Value::Str(ref v) => serializer.serialize_str(v), @@ -28,7 +29,8 @@ impl Serialize for Value { impl<'de> Deserialize<'de> for Value { fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> + where + D: Deserializer<'de>, { struct ValueVisitor; @@ -162,9 +164,13 @@ mod binary_serialize { Ok(Value::I64(value)) } _ => { - Err(io::Error::new(io::ErrorKind::InvalidData, - format!("No field type is associated with code {:?}", - type_code))) + Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "No field type is associated with code {:?}", + type_code + ), + )) } } } diff --git a/src/store/mod.rs b/src/store/mod.rs index 59e0558d1..46138d556 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -54,17 +54,19 @@ mod tests { fn write_lorem_ipsum_store(writer: WritePtr, num_docs: usize) -> Schema { let mut schema_builder = SchemaBuilder::default(); let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored()); - let field_title = schema_builder - .add_text_field("title", TextOptions::default().set_stored()); + let field_title = + schema_builder.add_text_field("title", TextOptions::default().set_stored()); let schema = schema_builder.build(); - let lorem = String::from("Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \ + let lorem = String::from( + "Doc Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \ do eiusmod tempor incididunt ut labore et dolore magna aliqua. \ Ut enim ad minim veniam, quis nostrud exercitation ullamco \ laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \ dolor in reprehenderit in voluptate velit esse cillum dolore eu \ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non \ proident, sunt in culpa qui officia deserunt mollit anim id est \ - laborum."); + laborum.", + ); { let mut store_writer = StoreWriter::new(writer); for i in 0..num_docs { @@ -96,8 +98,10 @@ mod tests { let store_source = directory.open_read(path).unwrap(); let store = StoreReader::from_source(store_source); for i in 0..1_000 { - assert_eq!(*store.get(i).unwrap().get_first(field_title).unwrap().text(), - format!("Doc {}", i)); + assert_eq!( + *store.get(i).unwrap().get_first(field_title).unwrap().text(), + format!("Doc {}", i) + ); } } @@ -106,9 +110,9 @@ mod tests { let mut directory = MmapDirectory::create_from_tempdir().unwrap(); let path = Path::new("store"); b.iter(|| { - write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000); - directory.delete(path).unwrap(); - }); + write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000); + directory.delete(path).unwrap(); + }); } diff --git a/src/store/reader.rs b/src/store/reader.rs index 05781a583..72f9b2da7 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -49,7 +49,7 @@ impl StoreReader { let mut cursor = &total_buffer[block_offset..]; let block_length = u32::deserialize(&mut cursor).unwrap(); let block_array: &[u8] = &total_buffer[(block_offset + 4 as usize).. - (block_offset + 4 + block_length as usize)]; + (block_offset + 4 + block_length as usize)]; let mut lz4_decoder = try!(lz4::Decoder::new(block_array)); *self.current_block_offset.borrow_mut() = usize::max_value(); try!(lz4_decoder.read_to_end(&mut current_block_mut).map(|_| ())); @@ -94,5 +94,9 @@ fn split_source(data: ReadOnlySource) -> (ReadOnlySource, ReadOnlySource, DocId) let offset = u64::deserialize(&mut serialized_offset_buf).unwrap(); let offset = offset as usize; let max_doc = u32::deserialize(&mut serialized_offset_buf).unwrap(); - (data.slice(0, offset), data.slice(offset, footer_offset), max_doc) + ( + data.slice(0, offset), + data.slice(offset, footer_offset), + max_doc, + ) } diff --git a/src/store/writer.rs b/src/store/writer.rs index 28befa7af..2b7aacb19 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -49,12 +49,15 @@ impl StoreWriter { /// pub fn store<'a>(&mut self, field_values: &[&'a FieldValue]) -> io::Result<()> { self.intermediary_buffer.clear(); - try!((field_values.len() as u32).serialize(&mut self.intermediary_buffer)); + try!((field_values.len() as u32).serialize( + &mut self.intermediary_buffer, + )); for field_value in field_values { try!((*field_value).serialize(&mut self.intermediary_buffer)); } - (self.intermediary_buffer.len() as u32) - .serialize(&mut self.current_block)?; + (self.intermediary_buffer.len() as u32).serialize( + &mut self.current_block, + )?; self.current_block.write_all(&self.intermediary_buffer[..])?; self.doc += 1; if self.current_block.len() > BLOCK_SIZE { @@ -66,16 +69,22 @@ impl StoreWriter { fn write_and_compress_block(&mut self) -> io::Result<()> { self.intermediary_buffer.clear(); { - let mut encoder = try!(lz4::EncoderBuilder::new().build(&mut self.intermediary_buffer)); + let mut encoder = try!(lz4::EncoderBuilder::new().build( + &mut self.intermediary_buffer, + )); try!(encoder.write_all(&self.current_block)); let (_, encoder_result) = encoder.finish(); try!(encoder_result); } - (self.intermediary_buffer.len() as u32) - .serialize(&mut self.writer)?; + (self.intermediary_buffer.len() as u32).serialize( + &mut self.writer, + )?; self.writer.write_all(&self.intermediary_buffer)?; - self.offset_index_writer - .insert(self.doc, &(self.writer.written_bytes() as u64))?; + self.offset_index_writer.insert( + self.doc, + &(self.writer.written_bytes() as + u64), + )?; self.current_block.clear(); Ok(()) } @@ -90,8 +99,7 @@ impl StoreWriter { try!(self.write_and_compress_block()); } let header_offset: u64 = self.writer.written_bytes() as u64; - try!(self.offset_index_writer - .write(&mut self.writer)); + try!(self.offset_index_writer.write(&mut self.writer)); try!(header_offset.serialize(&mut self.writer)); try!(self.doc.serialize(&mut self.writer)); self.writer.flush() diff --git a/src/termdict/fstdict/streamer.rs b/src/termdict/fstdict/streamer.rs index 823e5cdc4..1d90fe9c1 100644 --- a/src/termdict/fstdict/streamer.rs +++ b/src/termdict/fstdict/streamer.rs @@ -5,17 +5,13 @@ use super::TermDictionaryImpl; use termdict::{TermStreamerBuilder, TermStreamer}; /// See [`TermStreamerBuilder`](./trait.TermStreamerBuilder.html) -pub struct TermStreamerBuilderImpl<'a> -{ +pub struct TermStreamerBuilderImpl<'a> { fst_map: &'a TermDictionaryImpl, stream_builder: StreamBuilder<'a>, } -impl<'a> TermStreamerBuilderImpl<'a> -{ - pub(crate) fn new(fst_map: &'a TermDictionaryImpl, - stream_builder: StreamBuilder<'a>) - -> Self { +impl<'a> TermStreamerBuilderImpl<'a> { + pub(crate) fn new(fst_map: &'a TermDictionaryImpl, stream_builder: StreamBuilder<'a>) -> Self { TermStreamerBuilderImpl { fst_map: fst_map, stream_builder: stream_builder, @@ -23,8 +19,7 @@ impl<'a> TermStreamerBuilderImpl<'a> } } -impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> -{ +impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> { type Streamer = TermStreamerImpl<'a>; fn ge>(mut self, bound: T) -> Self { @@ -60,8 +55,7 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> /// See [`TermStreamer`](./trait.TermStreamer.html) -pub struct TermStreamerImpl<'a> -{ +pub struct TermStreamerImpl<'a> { fst_map: &'a TermDictionaryImpl, stream: Stream<'a>, offset: u64, @@ -69,17 +63,15 @@ pub struct TermStreamerImpl<'a> current_value: TermInfo, } -impl<'a> TermStreamer for TermStreamerImpl<'a> -{ +impl<'a> TermStreamer for TermStreamerImpl<'a> { fn advance(&mut self) -> bool { if let Some((term, offset)) = self.stream.next() { self.current_key.clear(); self.current_key.extend_from_slice(term); self.offset = offset; - self.current_value = - self.fst_map - .read_value(self.offset) - .expect("Fst data is corrupted. Failed to deserialize a value."); + self.current_value = self.fst_map.read_value(self.offset).expect( + "Fst data is corrupted. Failed to deserialize a value.", + ); true } else { false diff --git a/src/termdict/fstdict/termdict.rs b/src/termdict/fstdict/termdict.rs index 253e70b2a..ce608113b 100644 --- a/src/termdict/fstdict/termdict.rs +++ b/src/termdict/fstdict/termdict.rs @@ -13,14 +13,14 @@ fn convert_fst_error(e: fst::Error) -> io::Error { } /// See [`TermDictionaryBuilder`](./trait.TermDictionaryBuilder.html) -pub struct TermDictionaryBuilderImpl -{ +pub struct TermDictionaryBuilderImpl { fst_builder: fst::MapBuilder, data: Vec, } impl TermDictionaryBuilderImpl - where W: Write +where + W: Write, { /// # Warning /// Horribly dangerous internal API @@ -46,14 +46,15 @@ impl TermDictionaryBuilderImpl } impl TermDictionaryBuilder for TermDictionaryBuilderImpl - where W: Write +where + W: Write, { fn new(w: W, _field_type: FieldType) -> io::Result { let fst_builder = fst::MapBuilder::new(w).map_err(convert_fst_error)?; Ok(TermDictionaryBuilderImpl { - fst_builder: fst_builder, - data: Vec::new(), - }) + fst_builder: fst_builder, + data: Vec::new(), + }) } fn insert>(&mut self, key_ref: K, value: &TermInfo) -> io::Result<()> { @@ -75,28 +76,25 @@ impl TermDictionaryBuilder for TermDictionaryBuilderImpl } } -fn open_fst_index(source: ReadOnlySource) -> io::Result { +fn open_fst_index(source: ReadOnlySource) -> fst::Map { let fst = match source { ReadOnlySource::Anonymous(data) => { - Fst::from_shared_bytes(data.data, data.start, data.len) - .map_err(convert_fst_error)? + Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted") } ReadOnlySource::Mmap(mmap_readonly) => { - Fst::from_mmap(mmap_readonly).map_err(convert_fst_error)? + Fst::from_mmap(mmap_readonly).expect("FST data is corrupted") } }; - Ok(fst::Map::from(fst)) + fst::Map::from(fst) } /// See [`TermDictionary`](./trait.TermDictionary.html) -pub struct TermDictionaryImpl -{ +pub struct TermDictionaryImpl { fst_index: fst::Map, values_mmap: ReadOnlySource, } -impl TermDictionaryImpl -{ +impl TermDictionaryImpl { /// Deserialize and returns the value at address `offset` pub(crate) fn read_value(&self, offset: u64) -> io::Result { let buffer = self.values_mmap.as_slice(); @@ -106,34 +104,34 @@ impl TermDictionaryImpl } -impl<'a> TermDictionary<'a> for TermDictionaryImpl -{ +impl<'a> TermDictionary<'a> for TermDictionaryImpl { type Streamer = TermStreamerImpl<'a>; type StreamBuilder = TermStreamerBuilderImpl<'a>; - fn from_source(source: ReadOnlySource) -> io::Result { + fn from_source(source: ReadOnlySource) -> Self { let total_len = source.len(); let length_offset = total_len - 4; let mut split_len_buffer: &[u8] = &source.as_slice()[length_offset..]; - let footer_size = u32::deserialize(&mut split_len_buffer)? as usize; + let footer_size = u32::deserialize(&mut split_len_buffer).expect( + "Deserializing 4 bytes should always work", + ) as usize; let split_len = length_offset - footer_size; let fst_source = source.slice(0, split_len); let values_source = source.slice(split_len, length_offset); - let fst_index = open_fst_index(fst_source)?; - Ok(TermDictionaryImpl { - fst_index: fst_index, - values_mmap: values_source, - }) + let fst_index = open_fst_index(fst_source); + TermDictionaryImpl { + fst_index: fst_index, + values_mmap: values_source, + } } fn get>(&self, key: K) -> Option { - self.fst_index - .get(key) - .map(|offset| { - self.read_value(offset) - .expect("The fst is corrupted. Failed to deserialize a value.") - }) + self.fst_index.get(key).map(|offset| { + self.read_value(offset).expect( + "The fst is corrupted. Failed to deserialize a value.", + ) + }) } fn range(&self) -> TermStreamerBuilderImpl { diff --git a/src/termdict/merger.rs b/src/termdict/merger.rs index 1e0dde82f..517f9589a 100644 --- a/src/termdict/merger.rs +++ b/src/termdict/merger.rs @@ -4,30 +4,26 @@ use std::cmp::Ordering; use termdict::TermStreamer; use schema::Term; -pub struct HeapItem<'a> -{ +pub struct HeapItem<'a> { pub streamer: TermStreamerImpl<'a>, pub segment_ord: usize, } -impl<'a> PartialEq for HeapItem<'a> -{ +impl<'a> PartialEq for HeapItem<'a> { fn eq(&self, other: &Self) -> bool { self.segment_ord == other.segment_ord } } -impl<'a> Eq for HeapItem<'a> {} +impl<'a> Eq for HeapItem<'a> {} -impl<'a> PartialOrd for HeapItem<'a> -{ +impl<'a> PartialOrd for HeapItem<'a> { fn partial_cmp(&self, other: &HeapItem<'a>) -> Option { Some(self.cmp(other)) } } -impl<'a> Ord for HeapItem<'a> -{ +impl<'a> Ord for HeapItem<'a> { fn cmp(&self, other: &HeapItem<'a>) -> Ordering { (&other.streamer.key(), &other.segment_ord).cmp(&(&self.streamer.key(), &self.segment_ord)) } @@ -40,15 +36,12 @@ impl<'a> Ord for HeapItem<'a> /// - the term /// - a slice with the ordinal of the segments containing /// the terms. -pub struct TermMerger<'a> -{ +pub struct TermMerger<'a> { heap: BinaryHeap>, current_streamers: Vec>, } -impl<'a> TermMerger<'a> -{ - +impl<'a> TermMerger<'a> { /// Stream of merged term dictionary /// /// @@ -59,11 +52,11 @@ impl<'a> TermMerger<'a> .into_iter() .enumerate() .map(|(ord, streamer)| { - HeapItem { - streamer: streamer, - segment_ord: ord, - } - }) + HeapItem { + streamer: streamer, + segment_ord: ord, + } + }) .collect(), } } @@ -133,5 +126,3 @@ impl<'a> TermMerger<'a> } } } - - diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index 13a31b6d7..9150b8f85 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -54,16 +54,16 @@ use postings::TermInfo; pub use self::merger::TermMerger; -#[cfg(not(feature="streamdict"))] +#[cfg(not(feature = "streamdict"))] mod fstdict; -#[cfg(not(feature="streamdict"))] +#[cfg(not(feature = "streamdict"))] pub use self::fstdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl, TermStreamerBuilderImpl}; -#[cfg(feature="streamdict")] +#[cfg(feature = "streamdict")] mod streamdict; -#[cfg(feature="streamdict")] +#[cfg(feature = "streamdict")] pub use self::streamdict::{TermDictionaryImpl, TermDictionaryBuilderImpl, TermStreamerImpl, TermStreamerBuilderImpl}; @@ -72,7 +72,9 @@ use std::io; /// Dictionary associating sorted `&[u8]` to values -pub trait TermDictionary<'a> where Self: Sized +pub trait TermDictionary<'a> +where + Self: Sized, { /// Streamer type associated to the term dictionary type Streamer: TermStreamer + 'a; @@ -81,7 +83,7 @@ pub trait TermDictionary<'a> where Self: Sized type StreamBuilder: TermStreamerBuilder + 'a; /// Opens a `TermDictionary` given a data source. - fn from_source(source: ReadOnlySource) -> io::Result; + fn from_source(source: ReadOnlySource) -> Self; /// Lookups the value corresponding to the key. fn get>(&self, target_key: K) -> Option; @@ -110,7 +112,8 @@ pub trait TermDictionary<'a> where Self: Sized /// /// Inserting must be done in the order of the `keys`. pub trait TermDictionaryBuilder: Sized - where W: io::Write +where + W: io::Write, { /// Creates a new `TermDictionaryBuilder` fn new(write: W, field_type: FieldType) -> io::Result; @@ -170,8 +173,7 @@ pub trait TermStreamer: Sized { /// `TermStreamerBuilder` is an helper object used to define /// a range of terms that should be streamed. -pub trait TermStreamerBuilder -{ +pub trait TermStreamerBuilder { /// Associated `TermStreamer` type that this builder is building. type Streamer: TermStreamer; @@ -226,7 +228,8 @@ mod tests { { let write = directory.open_write(&path).unwrap(); let field_type = FieldType::Str(TEXT); - let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(write, field_type).unwrap(); + let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(write, field_type) + .unwrap(); term_dictionary_builder .insert("abc".as_bytes(), &make_term_info(34u32)) .unwrap(); @@ -236,7 +239,7 @@ mod tests { term_dictionary_builder.finish().unwrap(); } let source = directory.open_read(&path).unwrap(); - let term_dict: TermDictionaryImpl = TermDictionaryImpl::from_source(source).unwrap(); + let term_dict: TermDictionaryImpl = TermDictionaryImpl::from_source(source); assert_eq!(term_dict.get("abc").unwrap().doc_freq, 34u32); assert_eq!(term_dict.get("abcd").unwrap().doc_freq, 346u32); let mut stream = term_dict.stream(); @@ -296,7 +299,7 @@ mod tests { index.load_searchers().unwrap(); let searcher = index.searcher(); - let field_searcher = searcher.field(text_field).unwrap(); + let field_searcher = searcher.field(text_field); let mut term_it = field_searcher.terms(); let mut term_string = String::new(); while term_it.advance() { @@ -314,15 +317,17 @@ mod tests { .collect(); let field_type = FieldType::Str(TEXT); let buffer: Vec = { - let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type).unwrap(); + let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type) + .unwrap(); for &(ref id, ref i) in &ids { - term_dictionary_builder.insert(id.as_bytes(), &make_term_info(*i)).unwrap(); + term_dictionary_builder + .insert(id.as_bytes(), &make_term_info(*i)) + .unwrap(); } term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source) - .unwrap(); + let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source); { let mut streamer = term_dictionary.stream(); let mut i = 0; @@ -343,16 +348,22 @@ mod tests { fn test_stream_high_range_prefix_suffix() { let field_type = FieldType::Str(TEXT); let buffer: Vec = { - let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type).unwrap(); + let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type) + .unwrap(); // term requires more than 16bits - term_dictionary_builder.insert("abcdefghijklmnopqrstuvwxy", &make_term_info(1)).unwrap(); - term_dictionary_builder.insert("abcdefghijklmnopqrstuvwxyz", &make_term_info(2)).unwrap(); - term_dictionary_builder.insert("abr", &make_term_info(2)).unwrap(); + term_dictionary_builder + .insert("abcdefghijklmnopqrstuvwxy", &make_term_info(1)) + .unwrap(); + term_dictionary_builder + .insert("abcdefghijklmnopqrstuvwxyz", &make_term_info(2)) + .unwrap(); + term_dictionary_builder + .insert("abr", &make_term_info(2)) + .unwrap(); term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source) - .unwrap(); + let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source); let mut kv_stream = term_dictionary.stream(); assert!(kv_stream.advance()); assert_eq!(kv_stream.key(), "abcdefghijklmnopqrstuvwxy".as_bytes()); @@ -372,17 +383,19 @@ mod tests { .collect(); let field_type = FieldType::Str(TEXT); let buffer: Vec = { - let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type).unwrap(); + let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type) + .unwrap(); for &(ref id, ref i) in &ids { - term_dictionary_builder.insert(id.as_bytes(), &make_term_info(*i)).unwrap(); + term_dictionary_builder + .insert(id.as_bytes(), &make_term_info(*i)) + .unwrap(); } term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source) - .unwrap(); + let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source); { for i in (0..20).chain(6000..8_000) { let &(ref target_key, _) = &ids[i]; @@ -440,16 +453,18 @@ mod tests { fn test_stream_range_boundaries() { let field_type = FieldType::Str(TEXT); let buffer: Vec = { - let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type).unwrap(); + let mut term_dictionary_builder = TermDictionaryBuilderImpl::new(vec![], field_type) + .unwrap(); for i in 0u8..10u8 { let number_arr = [i; 1]; - term_dictionary_builder.insert(&number_arr, &make_term_info(i as u32)).unwrap(); + term_dictionary_builder + .insert(&number_arr, &make_term_info(i as u32)) + .unwrap(); } term_dictionary_builder.finish().unwrap() }; let source = ReadOnlySource::from(buffer); - let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source) - .unwrap(); + let term_dictionary: TermDictionaryImpl = TermDictionaryImpl::from_source(source); let value_list = |mut streamer: TermStreamerImpl| { let mut res: Vec = vec![]; @@ -460,12 +475,17 @@ mod tests { }; { let range = term_dictionary.range().ge([2u8]).into_stream(); - assert_eq!(value_list(range), - vec![2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]); + assert_eq!( + value_list(range), + vec![2u32, 3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32] + ); } { let range = term_dictionary.range().gt([2u8]).into_stream(); - assert_eq!(value_list(range), vec![3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32]); + assert_eq!( + value_list(range), + vec![3u32, 4u32, 5u32, 6u32, 7u32, 8u32, 9u32] + ); } { let range = term_dictionary.range().lt([6u8]).into_stream(); @@ -473,7 +493,10 @@ mod tests { } { let range = term_dictionary.range().le([6u8]).into_stream(); - assert_eq!(value_list(range), vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32]); + assert_eq!( + value_list(range), + vec![0u32, 1u32, 2u32, 3u32, 4u32, 5u32, 6u32] + ); } { let range = term_dictionary.range().ge([0u8]).lt([5u8]).into_stream(); diff --git a/src/termdict/streamdict/delta_encoder.rs b/src/termdict/streamdict/delta_encoder.rs index 9a2a4173e..5ba466203 100644 --- a/src/termdict/streamdict/delta_encoder.rs +++ b/src/termdict/streamdict/delta_encoder.rs @@ -11,7 +11,7 @@ use common::BinarySerializable; fn common_prefix_len(s1: &[u8], s2: &[u8]) -> usize { s1.iter() .zip(s2.iter()) - .take_while(|&(a, b)| a==b) + .take_while(|&(a, b)| a == b) .count() } @@ -45,32 +45,28 @@ pub struct TermDeltaDecoder { impl TermDeltaDecoder { pub fn with_previous_term(term: Vec) -> TermDeltaDecoder { - TermDeltaDecoder { - term: Vec::from(term) - } + TermDeltaDecoder { term: Vec::from(term) } } #[inline(always)] pub fn decode<'a>(&mut self, code: u8, mut cursor: &'a [u8]) -> &'a [u8] { - let (prefix_len, suffix_len): (usize, usize) = - if (code & 1u8) == 1u8 { - let b = cursor[0]; - cursor = &cursor[1..]; - let prefix_len = (b & 15u8) as usize; - let suffix_len = (b >> 4u8) as usize; - (prefix_len, suffix_len) - } - else { - let prefix_len = u32::deserialize(&mut cursor).unwrap(); - let suffix_len = u32::deserialize(&mut cursor).unwrap(); - (prefix_len as usize, suffix_len as usize) - }; + let (prefix_len, suffix_len): (usize, usize) = if (code & 1u8) == 1u8 { + let b = cursor[0]; + cursor = &cursor[1..]; + let prefix_len = (b & 15u8) as usize; + let suffix_len = (b >> 4u8) as usize; + (prefix_len, suffix_len) + } else { + let prefix_len = u32::deserialize(&mut cursor).unwrap(); + let suffix_len = u32::deserialize(&mut cursor).unwrap(); + (prefix_len as usize, suffix_len as usize) + }; unsafe { self.term.set_len(prefix_len) }; self.term.extend_from_slice(&(*cursor)[..suffix_len]); &cursor[suffix_len..] } - pub fn term(&self) -> &[u8] { + pub fn term(&self) -> &[u8] { &self.term[..] } } @@ -89,7 +85,6 @@ pub struct TermInfoDeltaEncoder { } impl TermInfoDeltaEncoder { - pub fn new(has_positions: bool) -> Self { TermInfoDeltaEncoder { term_info: TermInfo::default(), @@ -109,7 +104,8 @@ impl TermInfoDeltaEncoder { positions_inner_offset: 0, }; if self.has_positions { - delta_term_info.delta_positions_offset = term_info.positions_offset - self.term_info.positions_offset; + delta_term_info.delta_positions_offset = term_info.positions_offset - + self.term_info.positions_offset; delta_term_info.positions_inner_offset = term_info.positions_inner_offset; } mem::replace(&mut self.term_info, term_info); @@ -131,7 +127,6 @@ pub fn make_mask(num_bytes: usize) -> u32 { } impl TermInfoDeltaDecoder { - pub fn from_term_info(term_info: TermInfo, has_positions: bool) -> TermInfoDeltaDecoder { TermInfoDeltaDecoder { term_info: term_info, @@ -147,7 +142,7 @@ impl TermInfoDeltaDecoder { positions_offset: checkpoint.positions_offset, positions_inner_offset: 0u8, }, - has_positions: has_positions + has_positions: has_positions, } } @@ -164,12 +159,12 @@ impl TermInfoDeltaDecoder { self.term_info.postings_offset += delta_postings_offset; if self.has_positions { let num_bytes_positions_offset = ((code >> 5) & 3) as usize + 1; - let delta_positions_offset: u32 = unsafe { *(cursor.as_ptr() as *const u32) } & make_mask(num_bytes_positions_offset); + let delta_positions_offset: u32 = unsafe { *(cursor.as_ptr() as *const u32) } & + make_mask(num_bytes_positions_offset); self.term_info.positions_offset += delta_positions_offset; self.term_info.positions_inner_offset = cursor[num_bytes_positions_offset]; &cursor[num_bytes_positions_offset + 1..] - } - else { + } else { cursor } } diff --git a/src/termdict/streamdict/mod.rs b/src/termdict/streamdict/mod.rs index f9c01529e..176f63377 100644 --- a/src/termdict/streamdict/mod.rs +++ b/src/termdict/streamdict/mod.rs @@ -22,7 +22,6 @@ pub struct CheckPoint { } impl BinarySerializable for CheckPoint { - fn serialize(&self, writer: &mut W) -> io::Result<()> { self.stream_offset.serialize(writer)?; self.postings_offset.serialize(writer)?; @@ -40,4 +39,4 @@ impl BinarySerializable for CheckPoint { positions_offset: positions_offset, }) } -} \ No newline at end of file +} diff --git a/src/termdict/streamdict/streamer.rs b/src/termdict/streamdict/streamer.rs index dcb4b8bdb..22f687da1 100644 --- a/src/termdict/streamdict/streamer.rs +++ b/src/termdict/streamdict/streamer.rs @@ -7,11 +7,11 @@ use postings::TermInfo; use super::delta_encoder::{TermInfoDeltaDecoder, TermDeltaDecoder}; -fn stream_before<'a>(term_dictionary: &'a TermDictionaryImpl, - target_key: &[u8], - has_positions: bool) - -> TermStreamerImpl<'a> -{ +fn stream_before<'a>( + term_dictionary: &'a TermDictionaryImpl, + target_key: &[u8], + has_positions: bool, +) -> TermStreamerImpl<'a> { let (prev_key, checkpoint) = term_dictionary.strictly_previous_key(target_key.as_ref()); let stream_data: &'a [u8] = &term_dictionary.stream_data()[checkpoint.stream_offset as usize..]; @@ -24,8 +24,7 @@ fn stream_before<'a>(term_dictionary: &'a TermDictionaryImpl, /// See [`TermStreamerBuilder`](./trait.TermStreamerBuilder.html) -pub struct TermStreamerBuilderImpl<'a> -{ +pub struct TermStreamerBuilderImpl<'a> { term_dictionary: &'a TermDictionaryImpl, origin: usize, offset_from: usize, @@ -35,14 +34,17 @@ pub struct TermStreamerBuilderImpl<'a> has_positions: bool, } -impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> -{ +impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> { type Streamer = TermStreamerImpl<'a>; /// Limit the range to terms greater or equal to the bound fn ge>(mut self, bound: T) -> Self { let target_key = bound.as_ref(); - let streamer = stream_before(self.term_dictionary, target_key.as_ref(), self.has_positions); + let streamer = stream_before( + self.term_dictionary, + target_key.as_ref(), + self.has_positions, + ); let smaller_than = |k: &[u8]| k.lt(target_key); let (offset_before, current_key, term_info) = get_offset(smaller_than, streamer); self.current_key = current_key; @@ -54,7 +56,11 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> /// Limit the range to terms strictly greater than the bound fn gt>(mut self, bound: T) -> Self { let target_key = bound.as_ref(); - let streamer = stream_before(self.term_dictionary, target_key.as_ref(), self.has_positions); + let streamer = stream_before( + self.term_dictionary, + target_key.as_ref(), + self.has_positions, + ); let smaller_than = |k: &[u8]| k.le(target_key); let (offset_before, current_key, term_info) = get_offset(smaller_than, streamer); self.current_key = current_key; @@ -66,7 +72,11 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> /// Limit the range to terms lesser or equal to the bound fn lt>(mut self, bound: T) -> Self { let target_key = bound.as_ref(); - let streamer = stream_before(self.term_dictionary, target_key.as_ref(), self.has_positions); + let streamer = stream_before( + self.term_dictionary, + target_key.as_ref(), + self.has_positions, + ); let smaller_than = |k: &[u8]| k.lt(target_key); let (offset_before, _, _) = get_offset(smaller_than, streamer); self.offset_to = offset_before - self.origin; @@ -76,7 +86,11 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> /// Limit the range to terms lesser or equal to the bound fn le>(mut self, bound: T) -> Self { let target_key = bound.as_ref(); - let streamer = stream_before(self.term_dictionary, target_key.as_ref(), self.has_positions); + let streamer = stream_before( + self.term_dictionary, + target_key.as_ref(), + self.has_positions, + ); let smaller_than = |k: &[u8]| k.le(target_key); let (offset_before, _, _) = get_offset(smaller_than, streamer); self.offset_to = offset_before - self.origin; @@ -88,10 +102,13 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> let data: &[u8] = self.term_dictionary.stream_data(); let start = self.offset_from; let stop = max(self.offset_to, start); + let term_delta_decoder = TermDeltaDecoder::with_previous_term(self.current_key); + let term_info_decoder = + TermInfoDeltaDecoder::from_term_info(self.term_info, self.has_positions); TermStreamerImpl { cursor: &data[start..stop], - term_delta_decoder: TermDeltaDecoder::with_previous_term(self.current_key), - term_info_decoder: TermInfoDeltaDecoder::from_term_info(self.term_info, self.has_positions), // TODO checkpoint + term_delta_decoder: term_delta_decoder, + term_info_decoder: term_info_decoder, } } } @@ -103,10 +120,10 @@ impl<'a> TermStreamerBuilder for TermStreamerBuilderImpl<'a> /// - the block start /// - the index within this block /// - the term_buffer state to initialize the block) -fn get_offset<'a, P: Fn(&[u8]) -> bool>(predicate: P, - mut streamer: TermStreamerImpl<'a>) - -> (usize, Vec, TermInfo) -{ +fn get_offset<'a, P: Fn(&[u8]) -> bool>( + predicate: P, + mut streamer: TermStreamerImpl<'a>, +) -> (usize, Vec, TermInfo) { let mut prev: &[u8] = streamer.cursor; let mut term_info = streamer.value().clone(); @@ -124,11 +141,8 @@ fn get_offset<'a, P: Fn(&[u8]) -> bool>(predicate: P, (prev.as_ptr() as usize, prev_data, term_info) } -impl<'a> TermStreamerBuilderImpl<'a> -{ - pub(crate) fn new( - term_dictionary: &'a TermDictionaryImpl, - has_positions: bool) -> Self { +impl<'a> TermStreamerBuilderImpl<'a> { + pub(crate) fn new(term_dictionary: &'a TermDictionaryImpl, has_positions: bool) -> Self { let data = term_dictionary.stream_data(); let origin = data.as_ptr() as usize; TermStreamerBuilderImpl { @@ -146,8 +160,7 @@ impl<'a> TermStreamerBuilderImpl<'a> /// See [`TermStreamer`](./trait.TermStreamer.html) -pub struct TermStreamerImpl<'a> -{ +pub struct TermStreamerImpl<'a> { cursor: &'a [u8], term_delta_decoder: TermDeltaDecoder, term_info_decoder: TermInfoDeltaDecoder, @@ -156,8 +169,7 @@ pub struct TermStreamerImpl<'a> -impl<'a> TermStreamer for TermStreamerImpl<'a> -{ +impl<'a> TermStreamer for TermStreamerImpl<'a> { fn advance(&mut self) -> bool { if self.cursor.is_empty() { return false; @@ -178,4 +190,3 @@ impl<'a> TermStreamer for TermStreamerImpl<'a> &self.term_info_decoder.term_info() } } - diff --git a/src/termdict/streamdict/termdict.rs b/src/termdict/streamdict/termdict.rs index bf4d899fd..f0f7c618f 100644 --- a/src/termdict/streamdict/termdict.rs +++ b/src/termdict/streamdict/termdict.rs @@ -30,20 +30,16 @@ fn has_positions(field_type: &FieldType) -> bool { let indexing_options = text_options.get_indexing_options(); if indexing_options.is_position_enabled() { true - } - else { + } else { false } } - _ => { - false - } + _ => false, } } /// See [`TermDictionaryBuilder`](./trait.TermDictionaryBuilder.html) -pub struct TermDictionaryBuilderImpl -{ +pub struct TermDictionaryBuilderImpl { write: CountingWriter, term_delta_encoder: TermDeltaEncoder, term_info_encoder: TermInfoDeltaEncoder, @@ -61,7 +57,8 @@ fn fill_last<'a>(fst: &'a Fst, mut node: Node<'a>, buffer: &mut Vec) { } impl TermDictionaryBuilderImpl - where W: Write +where + W: Write, { fn add_index_entry(&mut self) { let stream_offset = self.write.written_bytes() as u32; @@ -74,10 +71,17 @@ impl TermDictionaryBuilderImpl positions_offset: positions_offset, }; self.block_index - .insert(&self.term_delta_encoder.term(), self.checkpoints.len() as u64) - .expect("Serializing fst on a Vec should never fail. Where your terms not in order maybe?"); - checkpoint.serialize(&mut self.checkpoints) - .expect("Serializing checkpoint on a Vec should never fail."); + .insert( + &self.term_delta_encoder.term(), + self.checkpoints.len() as u64, + ) + .expect( + "Serializing fst on a Vec should never fail. \ + Where your terms not in order maybe?", + ); + checkpoint.serialize(&mut self.checkpoints).expect( + "Serializing checkpoint on a Vec should never fail.", + ); } /// # Warning @@ -98,7 +102,13 @@ impl TermDictionaryBuilderImpl pub(crate) fn insert_value(&mut self, term_info: &TermInfo) -> io::Result<()> { let delta_term_info = self.term_info_encoder.encode(term_info.clone()); let (prefix_len, suffix) = self.term_delta_encoder.prefix_suffix(); - write_term_kv(prefix_len, suffix, &delta_term_info, self.term_info_encoder.has_positions, &mut self.write)?; + write_term_kv( + prefix_len, + suffix, + &delta_term_info, + self.term_info_encoder.has_positions, + &mut self.write, + )?; self.len += 1; Ok(()) } @@ -108,19 +118,20 @@ fn num_bytes_required(mut n: u32) -> u8 { for i in 1u8..5u8 { if n < 256u32 { return i; - } - else { + } else { n /= 256; } } 0u8 } -fn write_term_kv(prefix_len: usize, - suffix: &[u8], - delta_term_info: &DeltaTermInfo, - has_positions: bool, - write: &mut W) -> io::Result<()> { +fn write_term_kv( + prefix_len: usize, + suffix: &[u8], + delta_term_info: &DeltaTermInfo, + has_positions: bool, + write: &mut W, +) -> io::Result<()> { let suffix_len = suffix.len(); let mut code = 0u8; let num_bytes_docfreq = num_bytes_required(delta_term_info.doc_freq); @@ -131,9 +142,13 @@ fn write_term_kv(prefix_len: usize, code |= (num_bytes_positions_offset - 1) << 5u8; if (prefix_len < 16) && (suffix_len < 16) { code |= 1u8; - write.write_all(&[code, (prefix_len as u8) | ((suffix_len as u8) << 4u8)])?; - } - else { + write.write_all( + &[ + code, + (prefix_len as u8) | ((suffix_len as u8) << 4u8), + ], + )?; + } else { write.write_all(&[code])?; (prefix_len as u32).serialize(write)?; (suffix_len as u32).serialize(write)?; @@ -145,11 +160,15 @@ fn write_term_kv(prefix_len: usize, } { let bytes: [u8; 4] = unsafe { transmute(delta_term_info.delta_postings_offset) }; - write.write_all(&bytes[0..num_bytes_postings_offset as usize])?; + write.write_all( + &bytes[0..num_bytes_postings_offset as usize], + )?; } if has_positions { let bytes: [u8; 4] = unsafe { transmute(delta_term_info.delta_positions_offset) }; - write.write_all(&bytes[0..num_bytes_positions_offset as usize])?; + write.write_all( + &bytes[0..num_bytes_positions_offset as usize], + )?; write.write_all(&[delta_term_info.positions_inner_offset])?; } Ok(()) @@ -157,7 +176,8 @@ fn write_term_kv(prefix_len: usize, } impl TermDictionaryBuilder for TermDictionaryBuilderImpl - where W: Write +where + W: Write, { /// Creates a new `TermDictionaryBuilder` fn new(mut write: W, field_type: FieldType) -> io::Result { @@ -169,7 +189,7 @@ impl TermDictionaryBuilder for TermDictionaryBuilderImpl term_delta_encoder: TermDeltaEncoder::default(), term_info_encoder: TermInfoDeltaEncoder::new(has_positions), block_index: fst::MapBuilder::new(vec![]).expect("This cannot fail"), - checkpoints: vec!(), + checkpoints: vec![], len: 0, }) } @@ -206,28 +226,22 @@ impl TermDictionaryBuilder for TermDictionaryBuilderImpl fn open_fst_index(source: ReadOnlySource) -> io::Result { use self::ReadOnlySource::*; let fst_result = match source { - Anonymous(data) => { - Fst::from_shared_bytes(data.data, data.start, data.len) - } - Mmap(mmap_readonly) => { - Fst::from_mmap(mmap_readonly) - } + Anonymous(data) => Fst::from_shared_bytes(data.data, data.start, data.len), + Mmap(mmap_readonly) => Fst::from_mmap(mmap_readonly), }; let fst = fst_result.map_err(convert_fst_error)?; Ok(fst::Map::from(fst)) } /// See [`TermDictionary`](./trait.TermDictionary.html) -pub struct TermDictionaryImpl -{ +pub struct TermDictionaryImpl { stream_data: ReadOnlySource, fst_index: fst::Map, checkpoints_data: ReadOnlySource, has_positions: bool, } -impl TermDictionaryImpl -{ +impl TermDictionaryImpl { pub(crate) fn stream_data(&self) -> &[u8] { self.stream_data.as_slice() } @@ -235,8 +249,8 @@ impl TermDictionaryImpl pub(crate) fn strictly_previous_key(&self, key: &[u8]) -> (Vec, CheckPoint) { let (term, checkpoint_offset) = self.strictly_previous_key_checkpoint_offset(key); let mut checkpoint_data = &self.checkpoints_data.as_slice()[checkpoint_offset..]; - let checkpoint = CheckPoint::deserialize(&mut checkpoint_data) - .expect("Checkpoint data is corrupted"); + let checkpoint = + CheckPoint::deserialize(&mut checkpoint_data).expect("Checkpoint data is corrupted"); (term, checkpoint) } @@ -288,47 +302,47 @@ impl TermDictionaryImpl -impl<'a> TermDictionary<'a> for TermDictionaryImpl -{ +impl<'a> TermDictionary<'a> for TermDictionaryImpl { type Streamer = TermStreamerImpl<'a>; type StreamBuilder = TermStreamerBuilderImpl<'a>; /// Opens a `TermDictionary` given a data source. - fn from_source(mut source: ReadOnlySource) -> io::Result { - let has_positions = source.slice(0, 1).as_ref()[0] == 255u8; + fn from_source(mut source: ReadOnlySource) -> Self { + let has_positions = source.slice(0, 1)[0] == 255u8; source = source.slice_from(1); let total_len = source.len(); let (body, footer) = source.split(total_len - 16); let mut footer_buffer: &[u8] = footer.as_slice(); - let fst_addr: usize = u64::deserialize(&mut footer_buffer)? as usize; - let checkpoints_addr: usize = u64::deserialize(&mut footer_buffer)? as usize; + let fst_addr = u64::deserialize(&mut footer_buffer).expect( + "deserializing 8 byte should never fail", + ) as usize; + let checkpoints_addr = u64::deserialize(&mut footer_buffer).expect( + "deserializing 8 byte should never fail", + ) as usize; let stream_data = body.slice(0, fst_addr - PADDING_SIZE); let fst_data = body.slice(fst_addr, checkpoints_addr); let checkpoints_data = body.slice_from(checkpoints_addr); - let fst_index = open_fst_index(fst_data)?; + let fst_index = open_fst_index(fst_data).expect("Index FST data corrupted"); - Ok(TermDictionaryImpl { + TermDictionaryImpl { has_positions: has_positions, stream_data: stream_data, checkpoints_data: checkpoints_data, fst_index: fst_index, - }) + } } /// Lookups the value corresponding to the key. fn get>(&self, target_key: K) -> Option { - let mut streamer = self.range() - .ge(&target_key) - .into_stream(); + let mut streamer = self.range().ge(&target_key).into_stream(); if streamer.advance() && streamer.key() == target_key.as_ref() { Some(streamer.value().clone()) - } - else { + } else { None } } @@ -353,4 +367,4 @@ mod tests { assert_eq!(num_bytes_required(256), 2); assert_eq!(num_bytes_required(u32::max_value()), 4); } -} \ No newline at end of file +}