diff --git a/examples/simple_search.rs b/examples/simple_search.rs index 316d503cb..c776366da 100644 --- a/examples/simple_search.rs +++ b/examples/simple_search.rs @@ -5,11 +5,11 @@ extern crate tempdir; extern crate serde_json; use std::path::Path; -use tempdir::TempDir; -use tantivy::Index; -use tantivy::schema::*; use tantivy::collector::TopCollector; use tantivy::query::QueryParser; +use tantivy::schema::*; +use tantivy::Index; +use tempdir::TempDir; fn main() { // Let's create a temporary directory for the diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs index 04d898425..ca8d11fce 100644 --- a/src/collector/chained_collector.rs +++ b/src/collector/chained_collector.rs @@ -1,9 +1,9 @@ -use Result; use collector::Collector; +use DocId; +use Result; +use Score; use SegmentLocalId; use SegmentReader; -use DocId; -use Score; /// Collector that does nothing. /// This is used in the chain Collector and will hopefully diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 8cb4196d3..bbd2a4c66 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -1,9 +1,9 @@ use super::Collector; use DocId; -use Score; use Result; -use SegmentReader; +use Score; use SegmentLocalId; +use SegmentReader; /// `CountCollector` collector only counts how many /// documents match the query. diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 9ac30992c..e904c2e25 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -1,25 +1,25 @@ -use std::mem; use collector::Collector; +use docset::SkipResult; use fastfield::FacetReader; +use schema::Facet; use schema::Field; use std::cell::UnsafeCell; -use schema::Facet; +use std::collections::btree_map; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::BinaryHeap; use std::collections::Bound; -use std::collections::BTreeSet; -use termdict::TermMerger; -use docset::SkipResult; -use std::collections::btree_map; -use std::{usize, u64}; use std::iter::Peekable; +use std::mem; +use std::{u64, usize}; +use termdict::TermMerger; +use std::cmp::Ordering; use DocId; use Result; use Score; -use SegmentReader; use SegmentLocalId; -use std::cmp::Ordering; +use SegmentReader; struct Hit<'a> { count: u64, @@ -430,27 +430,22 @@ pub struct FacetCounts { facet_counts: BTreeMap, } - pub struct FacetChildIterator<'a> { underlying: btree_map::Range<'a, Facet, u64>, } impl<'a> Iterator for FacetChildIterator<'a> { - type Item = (&'a Facet, u64); fn next(&mut self) -> Option { - self.underlying - .next() - .map(|(facet, count)| (facet, *count)) + self.underlying.next().map(|(facet, count)| (facet, *count)) } } - impl FacetCounts { - - pub fn get(&self, facet_from: T) -> FacetChildIterator //impl Iterator - where Facet: From + pub fn get(&self, facet_from: T) -> FacetChildIterator + where + Facet: From, { let facet = Facet::from(facet_from); let left_bound = Bound::Excluded(facet.clone()); @@ -463,9 +458,7 @@ impl FacetCounts { Bound::Excluded(facet_after) }; let underlying: btree_map::Range<_, _> = self.facet_counts.range((left_bound, right_bound)); - FacetChildIterator { - underlying - } + FacetChildIterator { underlying } } pub fn top_k(&self, facet: T, k: usize) -> Vec<(&Facet, u64)> @@ -497,13 +490,13 @@ impl FacetCounts { #[cfg(test)] mod tests { - use core::Index; - use schema::{Document, Facet, SchemaBuilder}; - use query::AllQuery; use super::{FacetCollector, FacetCounts}; - use std::iter; + use core::Index; + use query::AllQuery; use rand::{thread_rng, Rng}; use schema::Field; + use schema::{Document, Facet, SchemaBuilder}; + use std::iter; #[test] fn test_facet_collector_drilldown() { @@ -558,8 +551,10 @@ mod tests { } #[test] - #[should_panic(expected = "Tried to add a facet which is a descendant of \ - an already added facet.")] + #[should_panic( + expected = "Tried to add a facet which is a descendant of \ + an already added facet." + )] fn test_misused_facet_collector() { let mut facet_collector = FacetCollector::for_field(Field(0)); facet_collector.add_facet(Facet::from("/country")); @@ -619,18 +614,16 @@ mod tests { } - -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::Bencher; - use schema::SchemaBuilder; - use Index; use collector::FacetCollector; - use schema::Facet; use query::AllQuery; use rand::{thread_rng, Rng}; - + use schema::Facet; + use schema::SchemaBuilder; + use test::Bencher; + use Index; #[bench] fn bench_facet_collector(b: &mut Bencher) { @@ -662,4 +655,4 @@ mod bench { searcher.search(&AllQuery, &mut facet_collector).unwrap(); }); } -} \ No newline at end of file +} diff --git a/src/collector/mod.rs b/src/collector/mod.rs index c1df3772c..918ee757e 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -2,11 +2,11 @@ Defines how the documents matching a search query should be processed. */ -use SegmentReader; -use SegmentLocalId; use DocId; -use Score; use Result; +use Score; +use SegmentLocalId; +use SegmentReader; mod count_collector; pub use self::count_collector::CountCollector; @@ -89,12 +89,12 @@ impl<'a, C: Collector> Collector for &'a mut C { pub mod tests { use super::*; - use DocId; - use Score; use core::SegmentReader; - use SegmentLocalId; use fastfield::FastFieldReader; use schema::Field; + use DocId; + use Score; + use SegmentLocalId; /// Stores all of the doc ids. /// This collector is only used for tests. @@ -187,11 +187,10 @@ pub mod tests { } - -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::Bencher; use collector::{Collector, CountCollector}; + use test::Bencher; #[bench] fn build_collector(b: &mut Bencher) { @@ -204,4 +203,4 @@ mod bench { count_collector.count() }); } -} \ No newline at end of file +} diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 8407edcfc..26685383d 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -1,9 +1,9 @@ use super::Collector; use DocId; -use Score; use Result; -use SegmentReader; +use Score; use SegmentLocalId; +use SegmentReader; /// Multicollector makes it possible to collect on more than one collector. /// It should only be used for use cases where the Collector types is unknown diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 0fd6c61b5..37015d08d 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -1,12 +1,12 @@ use super::Collector; -use SegmentReader; -use SegmentLocalId; -use DocAddress; -use Result; -use std::collections::BinaryHeap; use std::cmp::Ordering; +use std::collections::BinaryHeap; +use DocAddress; use DocId; +use Result; use Score; +use SegmentLocalId; +use SegmentReader; // Rust heap is a max-heap and we need a min heap. #[derive(Clone, Copy)] @@ -135,9 +135,9 @@ impl Collector for TopCollector { mod tests { use super::*; + use collector::Collector; use DocId; use Score; - use collector::Collector; #[test] fn test_top_collector_not_at_capacity() { diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index 0d36e4d41..f495b8a7a 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -1,6 +1,6 @@ -use std::io::Write; -use std::io; use common::serialize::BinarySerializable; +use std::io; +use std::io::Write; use std::mem; use std::ops::Deref; use std::ptr; @@ -106,7 +106,8 @@ where addr + 8 <= data.len(), "The fast field field should have been padded with 7 bytes." ); - let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }; + let val_unshifted_unmasked: u64 = + unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; val_shifted & mask } else { @@ -141,7 +142,8 @@ where for output_val in output.iter_mut() { let addr = addr_in_bits >> 3; let bit_shift = addr_in_bits & 7; - let val_unshifted_unmasked: u64 = unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }; + let val_unshifted_unmasked: u64 = + unsafe { ptr::read_unaligned(data[addr..].as_ptr() as *const u64) }; let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64; *output_val = val_shifted & mask; addr_in_bits += num_bits; diff --git a/src/common/bitset.rs b/src/common/bitset.rs index 76e3ad52b..0d3f1a54a 100644 --- a/src/common/bitset.rs +++ b/src/common/bitset.rs @@ -202,14 +202,14 @@ impl BitSet { #[cfg(test)] mod tests { - use tests; - use std::collections::HashSet; use super::BitSet; use super::TinySet; - use tests::generate_nonunique_unsorted; - use std::collections::BTreeSet; - use query::BitSetDocSet; use docset::DocSet; + use query::BitSetDocSet; + use std::collections::BTreeSet; + use std::collections::HashSet; + use tests; + use tests::generate_nonunique_unsorted; #[test] fn test_tiny_set() { @@ -354,12 +354,12 @@ mod tests { } } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test; - use super::TinySet; use super::BitSet; + use super::TinySet; + use test; #[bench] fn bench_tinyset_pop(b: &mut test::Bencher) { @@ -392,4 +392,4 @@ mod bench { fn bench_bitset_initialize(b: &mut test::Bencher) { b.iter(|| BitSet::with_max_value(1_000_000)); } -} \ No newline at end of file +} diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs index b30ed6403..257e2b579 100644 --- a/src/common/composite_file.rs +++ b/src/common/composite_file.rs @@ -1,12 +1,12 @@ -use std::io::Write; -use common::CountingWriter; -use std::collections::HashMap; -use schema::Field; -use common::VInt; -use directory::WritePtr; -use std::io::{self, Read}; -use directory::ReadOnlySource; use common::BinarySerializable; +use common::CountingWriter; +use common::VInt; +use directory::ReadOnlySource; +use directory::WritePtr; +use schema::Field; +use std::collections::HashMap; +use std::io::Write; +use std::io::{self, Read}; #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)] pub struct FileAddr { @@ -30,10 +30,7 @@ impl BinarySerializable for FileAddr { fn deserialize(reader: &mut R) -> io::Result { let field = Field::deserialize(reader)?; let idx = VInt::deserialize(reader)?.0 as usize; - Ok(FileAddr { - field, - idx, - }) + Ok(FileAddr { field, idx }) } } @@ -166,7 +163,7 @@ impl CompositeFile { /// to a given `Field` and stored in a `CompositeFile`. pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option { self.offsets_index - .get(&FileAddr { field, idx, }) + .get(&FileAddr { field, idx }) .map(|&(from, to)| self.data.slice(from, to)) } } @@ -174,12 +171,12 @@ impl CompositeFile { #[cfg(test)] mod test { - use std::io::Write; use super::{CompositeFile, CompositeWrite}; + use common::BinarySerializable; + use common::VInt; use directory::{Directory, RAMDirectory}; use schema::Field; - use common::VInt; - use common::BinarySerializable; + use std::io::Write; use std::path::Path; #[test] diff --git a/src/common/counting_writer.rs b/src/common/counting_writer.rs index 8bfb69d5a..5eaec208b 100644 --- a/src/common/counting_writer.rs +++ b/src/common/counting_writer.rs @@ -1,5 +1,5 @@ -use std::io::Write; use std::io; +use std::io::Write; pub struct CountingWriter { underlying: W, diff --git a/src/common/mod.rs b/src/common/mod.rs index fb5042200..2942438b4 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,16 +1,16 @@ - mod serialize; -mod vint; -mod counting_writer; -mod composite_file; pub mod bitpacker; mod bitset; +mod composite_file; +mod counting_writer; +mod serialize; +mod vint; -pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; -pub use self::serialize::{BinarySerializable, FixedSize}; -pub use self::vint::VInt; -pub use self::counting_writer::CountingWriter; pub use self::bitset::BitSet; pub(crate) use self::bitset::TinySet; +pub(crate) use self::composite_file::{CompositeFile, CompositeWrite}; +pub use self::counting_writer::CountingWriter; +pub use self::serialize::{BinarySerializable, FixedSize}; +pub use self::vint::VInt; pub use byteorder::LittleEndian as Endianness; use std::io; @@ -104,8 +104,8 @@ pub fn u64_to_i64(val: u64) -> i64 { #[cfg(test)] pub(crate) mod test { - use super::{compute_num_bits, i64_to_u64, u64_to_i64}; pub use super::serialize::test::fixed_size_test; + use super::{compute_num_bits, i64_to_u64, u64_to_i64}; fn test_i64_converter_helper(val: i64) { assert_eq!(u64_to_i64(i64_to_u64(val)), val); diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 543b72b19..0df4f75ae 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -1,10 +1,10 @@ use byteorder::{ReadBytesExt, WriteBytesExt}; use common::Endianness; -use std::fmt; -use std::io::Write; -use std::io::Read; -use std::io; use common::VInt; +use std::fmt; +use std::io; +use std::io::Read; +use std::io::Write; /// Trait for a simple binary serialization. pub trait BinarySerializable: fmt::Debug + Sized { @@ -135,8 +135,8 @@ impl BinarySerializable for String { #[cfg(test)] pub mod test { - use common::VInt; use super::*; + use common::VInt; pub fn fixed_size_test() { let mut buffer = Vec::new(); diff --git a/src/common/vint.rs b/src/common/vint.rs index b0c32d1d3..69942d31c 100644 --- a/src/common/vint.rs +++ b/src/common/vint.rs @@ -1,7 +1,7 @@ use super::BinarySerializable; use std::io; -use std::io::Write; use std::io::Read; +use std::io::Write; /// Wrapper over a `u64` that serializes as a variable int. #[derive(Debug, Eq, PartialEq)] diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 31b9a9a7c..734caf9f8 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -8,10 +8,8 @@ const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * 4 + 1; pub use self::stream::CompressedIntStream; - use bitpacking::{BitPacker, BitPacker4x}; - /// Returns the size in bytes of a compressed block, given `num_bits`. pub fn compressed_block_size(num_bits: u8) -> usize { 1 + (num_bits as usize) * COMPRESSION_BLOCK_SIZE / 8 @@ -35,19 +33,21 @@ impl BlockEncoder { pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> &[u8] { let num_bits = self.bitpacker.num_bits_sorted(offset, block); self.output[0] = num_bits; - let written_size = 1 + self.bitpacker.compress_sorted(offset, block, &mut self.output[1..], num_bits); + let written_size = + 1 + self.bitpacker + .compress_sorted(offset, block, &mut self.output[1..], num_bits); &self.output[..written_size] } pub fn compress_block_unsorted(&mut self, block: &[u32]) -> &[u8] { let num_bits = self.bitpacker.num_bits(block); self.output[0] = num_bits; - let written_size = 1 + self.bitpacker.compress(block, &mut self.output[1..], num_bits); + let written_size = 1 + self.bitpacker + .compress(block, &mut self.output[1..], num_bits); &self.output[..written_size] } } - pub struct BlockDecoder { bitpacker: BitPacker4x, pub output: [u32; COMPRESSION_BLOCK_SIZE + 1], @@ -68,17 +68,23 @@ impl BlockDecoder { output_len: 0, } } - + pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32) -> usize { let num_bits = compressed_data[0]; self.output_len = COMPRESSION_BLOCK_SIZE; - 1 + self.bitpacker.decompress_sorted(offset, &compressed_data[1..], &mut self.output, num_bits) + 1 + self.bitpacker.decompress_sorted( + offset, + &compressed_data[1..], + &mut self.output, + num_bits, + ) } pub fn uncompress_block_unsorted<'a>(&mut self, compressed_data: &'a [u8]) -> usize { let num_bits = compressed_data[0]; self.output_len = COMPRESSION_BLOCK_SIZE; - 1 + self.bitpacker.decompress(&compressed_data[1..], &mut self.output, num_bits) + 1 + self.bitpacker + .decompress(&compressed_data[1..], &mut self.output, num_bits) } #[inline] @@ -264,14 +270,13 @@ pub mod tests { } } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { use super::*; use test::Bencher; use tests; - fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec { let seed: &[u32; 4] = &[1, 2, 3, seed_val]; let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); diff --git a/src/compression/stream.rs b/src/compression/stream.rs index 8cca8a3b6..762792a9b 100644 --- a/src/compression/stream.rs +++ b/src/compression/stream.rs @@ -1,6 +1,6 @@ +use compression::compressed_block_size; use compression::BlockDecoder; use compression::COMPRESSION_BLOCK_SIZE; -use compression::compressed_block_size; use directory::{ReadOnlySource, SourceRead}; /// Reads a stream of compressed ints. @@ -13,7 +13,7 @@ pub struct CompressedIntStream { buffer: SourceRead, block_decoder: BlockDecoder, - cached_addr: usize, // address of the currently decoded block + cached_addr: usize, // address of the currently decoded block cached_next_addr: usize, // address following the currently decoded block addr: usize, // address of the block associated to the current position @@ -42,7 +42,8 @@ impl CompressedIntStream { // no need to read. self.cached_next_addr } else { - let next_addr = addr + self.block_decoder.uncompress_block_unsorted(self.buffer.slice_from(addr)); + let next_addr = addr + self.block_decoder + .uncompress_block_unsorted(self.buffer.slice_from(addr)); self.cached_addr = addr; self.cached_next_addr = next_addr; next_addr @@ -101,8 +102,8 @@ pub mod tests { use super::CompressedIntStream; use compression::compressed_block_size; - use compression::COMPRESSION_BLOCK_SIZE; use compression::BlockEncoder; + use compression::COMPRESSION_BLOCK_SIZE; use directory::ReadOnlySource; fn create_stream_buffer() -> ReadOnlySource { diff --git a/src/core/index.rs b/src/core/index.rs index 49d64dfb9..4d822ced2 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -1,33 +1,32 @@ -use Result; +use core::SegmentId; use error::{ErrorKind, ResultExt}; -use serde_json; use schema::Schema; -use std::sync::Arc; +use serde_json; use std::borrow::BorrowMut; use std::fmt; -use core::SegmentId; +use std::sync::Arc; +use Result; - -#[cfg(feature="mmap")] +use super::pool::LeasedItem; +use super::pool::Pool; +use super::segment::create_segment; +use super::segment::Segment; +use core::searcher::Searcher; +use core::IndexMeta; +use core::SegmentMeta; +use core::SegmentReader; +use core::META_FILEPATH; +use directory::ManagedDirectory; +#[cfg(feature = "mmap")] use directory::MmapDirectory; use directory::{Directory, RAMDirectory}; use indexer::index_writer::open_index_writer; -use core::searcher::Searcher; -use num_cpus; -use super::segment::Segment; -use core::SegmentReader; -use super::pool::Pool; -use core::SegmentMeta; -use super::pool::LeasedItem; -use std::path::Path; -use core::IndexMeta; -use indexer::DirectoryLock; -use IndexWriter; -use directory::ManagedDirectory; -use core::META_FILEPATH; -use super::segment::create_segment; use indexer::segment_updater::save_new_metas; +use indexer::DirectoryLock; +use num_cpus; +use std::path::Path; use tokenizer::TokenizerManager; +use IndexWriter; const NUM_SEARCHERS: usize = 12; @@ -64,7 +63,7 @@ impl Index { /// The index will use the `MMapDirectory`. /// /// If a previous index was in this directory, then its meta file will be destroyed. - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] pub fn create>(directory_path: P, schema: Schema) -> Result { let mmap_directory = MmapDirectory::open(directory_path)?; let directory = ManagedDirectory::new(mmap_directory)?; @@ -84,7 +83,7 @@ impl Index { /// /// The temp directory is only used for testing the `MmapDirectory`. /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`. - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] pub fn create_from_tempdir(schema: Schema) -> Result { let mmap_directory = MmapDirectory::create_from_tempdir()?; let directory = ManagedDirectory::new(mmap_directory)?; @@ -112,7 +111,7 @@ impl Index { } /// Opens a new directory from an index path. - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] pub fn open>(directory_path: P) -> Result { let mmap_directory = MmapDirectory::open(directory_path)?; let directory = ManagedDirectory::new(mmap_directory)?; @@ -224,7 +223,7 @@ impl Index { .collect::>()?; let schema = self.schema(); let searchers = (0..NUM_SEARCHERS) - .map(|_| Searcher::new(schema.clone(),segment_readers.clone())) + .map(|_| Searcher::new(schema.clone(), segment_readers.clone())) .collect(); self.searcher_pool.publish_new_generation(searchers); Ok(()) diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index 56db6fc62..b5ed52427 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -1,7 +1,7 @@ -use schema::Schema; use core::SegmentMeta; -use std::fmt; +use schema::Schema; use serde_json; +use std::fmt; /// Meta information about the `Index`. /// @@ -45,9 +45,9 @@ impl fmt::Debug for IndexMeta { #[cfg(test)] mod tests { - use serde_json; use super::IndexMeta; use schema::{SchemaBuilder, TEXT}; + use serde_json; #[test] fn test_serialize_metas() { diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index ac7b70314..7620258bc 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,13 +1,13 @@ +use common::BinarySerializable; +use compression::CompressedIntStream; use directory::{ReadOnlySource, SourceRead}; -use termdict::TermDictionary; -use postings::{BlockSegmentPostings, SegmentPostings}; +use postings::FreqReadingOption; use postings::TermInfo; +use postings::{BlockSegmentPostings, SegmentPostings}; +use schema::FieldType; use schema::IndexRecordOption; use schema::Term; -use compression::CompressedIntStream; -use postings::FreqReadingOption; -use common::BinarySerializable; -use schema::FieldType; +use termdict::TermDictionary; /// The inverted index reader is in charge of accessing /// the inverted index associated to a specific field. @@ -27,7 +27,7 @@ pub struct InvertedIndexReader { postings_source: ReadOnlySource, positions_source: ReadOnlySource, record_option: IndexRecordOption, - total_num_tokens: u64 + total_num_tokens: u64, } impl InvertedIndexReader { @@ -45,7 +45,7 @@ impl InvertedIndexReader { postings_source: postings_source.slice_from(8), positions_source, record_option, - total_num_tokens + total_num_tokens, } } @@ -56,11 +56,11 @@ impl InvertedIndexReader { .get_index_record_option() .unwrap_or(IndexRecordOption::Basic); InvertedIndexReader { - termdict: TermDictionary::empty(field_type), + termdict: TermDictionary::empty(field_type), postings_source: ReadOnlySource::empty(), positions_source: ReadOnlySource::empty(), record_option, - total_num_tokens: 0u64 + total_num_tokens: 0u64, } } @@ -149,8 +149,6 @@ impl InvertedIndexReader { self.total_num_tokens } - - /// Returns the segment postings associated with the term, and with the given option, /// or `None` if the term has never been encountered and indexed. /// @@ -166,12 +164,15 @@ impl InvertedIndexReader { Some(self.read_postings_from_terminfo(&term_info, option)) } - pub(crate) fn read_postings_no_deletes(&self, term: &Term, option: IndexRecordOption) -> Option { + pub(crate) fn read_postings_no_deletes( + &self, + term: &Term, + option: IndexRecordOption, + ) -> Option { let term_info = get!(self.get_term_info(term)); Some(self.read_postings_from_terminfo(&term_info, option)) } - /// Returns the number of documents containing the term. pub fn doc_freq(&self, term: &Term) -> u32 { self.get_term_info(term) @@ -179,6 +180,3 @@ impl InvertedIndexReader { .unwrap_or(0u32) } } - - - diff --git a/src/core/mod.rs b/src/core/mod.rs index 3a6c9568a..6d43685f8 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,24 +1,24 @@ -pub mod searcher; pub mod index; -mod segment_reader; -mod segment_id; -mod segment_component; -mod segment; mod index_meta; -mod pool; -mod segment_meta; mod inverted_index_reader; +mod pool; +pub mod searcher; +mod segment; +mod segment_component; +mod segment_id; +mod segment_meta; +mod segment_reader; +pub use self::index::Index; +pub use self::index_meta::IndexMeta; pub use self::inverted_index_reader::InvertedIndexReader; pub use self::searcher::Searcher; -pub use self::segment_component::SegmentComponent; -pub use self::segment_id::SegmentId; -pub use self::segment_reader::SegmentReader; pub use self::segment::Segment; pub use self::segment::SerializableSegment; -pub use self::index::Index; +pub use self::segment_component::SegmentComponent; +pub use self::segment_id::SegmentId; pub use self::segment_meta::SegmentMeta; -pub use self::index_meta::IndexMeta; +pub use self::segment_reader::SegmentReader; use std::path::PathBuf; diff --git a/src/core/pool.rs b/src/core/pool.rs index 729c94a08..64a894d4c 100644 --- a/src/core/pool.rs +++ b/src/core/pool.rs @@ -1,8 +1,8 @@ -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; +use crossbeam::sync::MsQueue; use std::mem; use std::ops::{Deref, DerefMut}; -use crossbeam::sync::MsQueue; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; use std::sync::Arc; pub struct GenerationItem { @@ -114,8 +114,8 @@ impl Drop for LeasedItem { #[cfg(test)] mod tests { - use std::iter; use super::Pool; + use std::iter; #[test] fn test_pool() { diff --git a/src/core/searcher.rs b/src/core/searcher.rs index b9389b0c2..8f36b58ea 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -1,15 +1,15 @@ -use Result; -use core::SegmentReader; -use schema::Document; use collector::Collector; -use query::Query; -use DocAddress; -use schema::{Field, Term}; -use termdict::TermMerger; -use std::sync::Arc; -use std::fmt; -use schema::Schema; use core::InvertedIndexReader; +use core::SegmentReader; +use query::Query; +use schema::Document; +use schema::Schema; +use schema::{Field, Term}; +use std::fmt; +use std::sync::Arc; +use termdict::TermMerger; +use DocAddress; +use Result; /// Holds a list of `SegmentReader`s ready for search. /// @@ -22,14 +22,11 @@ pub struct Searcher { } impl Searcher { - /// Creates a new `Searcher` - pub(crate) fn new( - schema: Schema, - segment_readers: Vec) -> Searcher { + pub(crate) fn new(schema: Schema, segment_readers: Vec) -> Searcher { Searcher { schema, - segment_readers + segment_readers, } } /// Fetches a document from tantivy's store given a `DocAddress`. @@ -109,7 +106,6 @@ impl FieldSearcher { } } - impl fmt::Debug for Searcher { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let segment_ids = self.segment_readers diff --git a/src/core/segment.rs b/src/core/segment.rs index bac21044d..b0a7e48c7 100644 --- a/src/core/segment.rs +++ b/src/core/segment.rs @@ -1,16 +1,16 @@ -use Result; -use std::path::PathBuf; -use schema::Schema; -use std::fmt; -use core::SegmentId; -use directory::{FileProtection, ReadOnlySource, WritePtr}; -use indexer::segment_serializer::SegmentSerializer; use super::SegmentComponent; use core::Index; -use std::result; -use directory::Directory; +use core::SegmentId; use core::SegmentMeta; use directory::error::{OpenReadError, OpenWriteError}; +use directory::Directory; +use directory::{FileProtection, ReadOnlySource, WritePtr}; +use indexer::segment_serializer::SegmentSerializer; +use schema::Schema; +use std::fmt; +use std::path::PathBuf; +use std::result; +use Result; /// A segment is a piece of the index. #[derive(Clone)] @@ -111,8 +111,8 @@ mod tests { use core::SegmentComponent; use directory::Directory; - use std::collections::HashSet; use schema::SchemaBuilder; + use std::collections::HashSet; use Index; #[test] diff --git a/src/core/segment_id.rs b/src/core/segment_id.rs index 1a52736e0..75e76089d 100644 --- a/src/core/segment_id.rs +++ b/src/core/segment_id.rs @@ -1,6 +1,6 @@ -use uuid::Uuid; -use std::fmt; use std::cmp::{Ord, Ordering}; +use std::fmt; +use uuid::Uuid; #[cfg(test)] use std::sync::atomic; diff --git a/src/core/segment_meta.rs b/src/core/segment_meta.rs index b4901e0f8..c8d50046a 100644 --- a/src/core/segment_meta.rs +++ b/src/core/segment_meta.rs @@ -1,7 +1,7 @@ -use core::SegmentId; use super::SegmentComponent; -use std::path::PathBuf; +use core::SegmentId; use std::collections::HashSet; +use std::path::PathBuf; #[derive(Clone, Debug, Serialize, Deserialize)] struct DeleteMeta { diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 0dd18c853..ea6276a9a 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -1,30 +1,30 @@ -use Result; -use core::Segment; -use core::SegmentId; -use core::SegmentComponent; -use std::sync::RwLock; -use common::HasLen; -use core::SegmentMeta; -use fastfield::{self, FastFieldNotAvailableError}; -use fastfield::DeleteBitSet; -use store::StoreReader; -use schema::Document; -use DocId; -use std::sync::Arc; -use std::collections::HashMap; use common::CompositeFile; -use std::fmt; +use common::HasLen; use core::InvertedIndexReader; -use schema::Field; -use schema::FieldType; +use core::Segment; +use core::SegmentComponent; +use core::SegmentId; +use core::SegmentMeta; use error::ErrorKind; +use fastfield::DeleteBitSet; use fastfield::FacetReader; use fastfield::FastFieldReader; -use schema::Schema; -use termdict::TermDictionary; +use fastfield::{self, FastFieldNotAvailableError}; use fastfield::{FastValue, MultiValueIntFastFieldReader}; -use schema::Cardinality; use fieldnorm::FieldNormReader; +use schema::Cardinality; +use schema::Document; +use schema::Field; +use schema::FieldType; +use schema::Schema; +use std::collections::HashMap; +use std::fmt; +use std::sync::Arc; +use std::sync::RwLock; +use store::StoreReader; +use termdict::TermDictionary; +use DocId; +use Result; /// Entry point to access all of the datastructures of the `Segment` /// @@ -109,12 +109,12 @@ impl SegmentReader { ) -> fastfield::Result> { let field_entry = self.schema.get_field_entry(field); if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue) - { - self.fast_fields_composite - .open_read(field) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(FastFieldReader::open) - } else { + { + self.fast_fields_composite + .open_read(field) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open) + } else { Err(FastFieldNotAvailableError::new(field_entry)) } } @@ -127,17 +127,17 @@ impl SegmentReader { ) -> fastfield::Result> { let field_entry = self.schema.get_field_entry(field); if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues) - { - let idx_reader = self.fast_fields_composite - .open_read_with_idx(field, 0) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(FastFieldReader::open)?; - let vals_reader = self.fast_fields_composite - .open_read_with_idx(field, 1) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(FastFieldReader::open)?; - Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader)) - } else { + { + let idx_reader = self.fast_fields_composite + .open_read_with_idx(field, 0) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open)?; + let vals_reader = self.fast_fields_composite + .open_read_with_idx(field, 1) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open)?; + Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader)) + } else { Err(FastFieldNotAvailableError::new(field_entry)) } } @@ -175,12 +175,14 @@ impl SegmentReader { /// They are simply stored as a fast field, serialized in /// the `.fieldnorm` file of the segment. pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader { - if let Some(fieldnorm_source) = self.fieldnorms_composite - .open_read(field) { + if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) { FieldNormReader::open(fieldnorm_source) } else { let field_name = self.schema.get_field_name(field); - let err_msg= format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name); + let err_msg = format!( + "Field norm not found for field {:?}. Was it market as indexed during indexing.", + field_name + ); panic!(err_msg); } } @@ -215,13 +217,12 @@ impl SegmentReader { let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?; let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?; - let delete_bitset_opt = - if segment.meta().has_deletes() { - let delete_data = segment.open_read(SegmentComponent::DELETE)?; - Some(DeleteBitSet::open(delete_data)) - } else { - None - }; + let delete_bitset_opt = if segment.meta().has_deletes() { + let delete_data = segment.open_read(SegmentComponent::DELETE)?; + Some(DeleteBitSet::open(delete_data)) + } else { + None + }; let schema = segment.schema(); Ok(SegmentReader { diff --git a/src/datastruct/skip/mod.rs b/src/datastruct/skip/mod.rs index 7f99888d2..bed5f61eb 100644 --- a/src/datastruct/skip/mod.rs +++ b/src/datastruct/skip/mod.rs @@ -1,10 +1,10 @@ #![allow(dead_code)] -mod skiplist_builder; mod skiplist; +mod skiplist_builder; -pub use self::skiplist_builder::SkipListBuilder; pub use self::skiplist::SkipList; +pub use self::skiplist_builder::SkipListBuilder; #[cfg(test)] mod tests { diff --git a/src/datastruct/skip/skiplist.rs b/src/datastruct/skip/skiplist.rs index ef5491ac0..d7e8ed6a5 100644 --- a/src/datastruct/skip/skiplist.rs +++ b/src/datastruct/skip/skiplist.rs @@ -1,6 +1,6 @@ use common::{BinarySerializable, VInt}; -use std::marker::PhantomData; use std::cmp::max; +use std::marker::PhantomData; static EMPTY: [u8; 0] = []; diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs index 0d8b7d416..6a698a2c7 100644 --- a/src/datastruct/skip/skiplist_builder.rs +++ b/src/datastruct/skip/skiplist_builder.rs @@ -1,7 +1,7 @@ -use std::io::Write; -use common::{BinarySerializable, VInt, is_power_of_2}; -use std::marker::PhantomData; +use common::{is_power_of_2, BinarySerializable, VInt}; use std::io; +use std::io::Write; +use std::marker::PhantomData; struct LayerBuilder { period_mask: usize, diff --git a/src/datastruct/stacker/expull.rs b/src/datastruct/stacker/expull.rs index 0c0a56e9c..d6e71b771 100644 --- a/src/datastruct/stacker/expull.rs +++ b/src/datastruct/stacker/expull.rs @@ -1,5 +1,5 @@ -use std::mem; use super::heap::{Heap, HeapAllocable}; +use std::mem; #[inline] pub fn is_power_of_2(val: u32) -> bool { @@ -99,8 +99,8 @@ impl<'a> Iterator for ExpUnrolledLinkedListIterator<'a> { #[cfg(test)] mod tests { - use super::*; use super::super::heap::Heap; + use super::*; #[test] fn test_stack() { @@ -120,14 +120,13 @@ mod tests { } } - } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::Bencher; - use super::Heap; use super::ExpUnrolledLinkedList; + use super::Heap; + use test::Bencher; const NUM_STACK: usize = 10_000; const STACK_SIZE: u32 = 1000; @@ -166,4 +165,4 @@ mod bench { heap.clear(); }); } -} \ No newline at end of file +} diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs index 16bb4d684..ae24920b1 100644 --- a/src/datastruct/stacker/hashmap.rs +++ b/src/datastruct/stacker/hashmap.rs @@ -1,7 +1,7 @@ +use super::heap::{BytesRef, Heap, HeapAllocable}; +use postings::UnorderedTermId; use std::iter; use std::mem; -use postings::UnorderedTermId; -use super::heap::{BytesRef, Heap, HeapAllocable}; mod murmurhash2 { @@ -117,11 +117,7 @@ struct QuadraticProbing { impl QuadraticProbing { fn compute(hash: usize, mask: usize) -> QuadraticProbing { - QuadraticProbing { - hash, - i: 0, - mask, - } + QuadraticProbing { hash, i: 0, mask } } #[inline] @@ -135,21 +131,18 @@ use std::slice; pub struct Iter<'a: 'b, 'b> { hashmap: &'b TermHashMap<'a>, - inner: slice::Iter<'a, usize> + inner: slice::Iter<'a, usize>, } impl<'a, 'b> Iterator for Iter<'a, 'b> { type Item = (&'b [u8], u32, UnorderedTermId); fn next(&mut self) -> Option { - self.inner - .next() - .cloned() - .map(move |bucket: usize| { - let kv = self.hashmap.table[bucket]; - let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr); - (key, offset, bucket as UnorderedTermId) - }) + self.inner.next().cloned().map(move |bucket: usize| { + let kv = self.hashmap.table[bucket]; + let (key, offset): (&'b [u8], u32) = self.hashmap.get_key_value(kv.key_value_addr); + (key, offset, bucket as UnorderedTermId) + }) } } @@ -183,14 +176,15 @@ impl<'a> TermHashMap<'a> { pub fn set_bucket(&mut self, hash: u32, key_value_addr: BytesRef, bucket: usize) { self.occupied.push(bucket); self.table[bucket] = KeyValue { - key_value_addr, hash + key_value_addr, + hash, }; } pub fn iter<'b: 'a>(&'b self) -> Iter<'a, 'b> { Iter { inner: self.occupied.iter(), - hashmap: &self + hashmap: &self, } } @@ -225,8 +219,8 @@ impl<'a> TermHashMap<'a> { #[cfg(all(test, unstable))] mod bench { - use test::Bencher; use super::murmurhash2::murmurhash2; + use test::Bencher; #[bench] fn bench_murmurhash_2(b: &mut Bencher) { @@ -246,11 +240,11 @@ mod bench { #[cfg(test)] mod tests { - use super::*; use super::super::heap::{Heap, HeapAllocable}; use super::murmurhash2::murmurhash2; - use std::collections::HashSet; use super::split_memory; + use super::*; + use std::collections::HashSet; struct TestValue { val: u32, @@ -332,5 +326,4 @@ mod tests { assert_eq!(set.len(), 10_000); } - } diff --git a/src/datastruct/stacker/heap.rs b/src/datastruct/stacker/heap.rs index fbefe6aa7..9176e0ce1 100644 --- a/src/datastruct/stacker/heap.rs +++ b/src/datastruct/stacker/heap.rs @@ -1,7 +1,7 @@ +use byteorder::{ByteOrder, NativeEndian}; use std::cell::UnsafeCell; use std::mem; use std::ptr; -use byteorder::{ByteOrder, NativeEndian}; /// `BytesRef` refers to a slice in tantivy's custom `Heap`. /// diff --git a/src/datastruct/stacker/mod.rs b/src/datastruct/stacker/mod.rs index 811bfeee7..990c6dbcb 100644 --- a/src/datastruct/stacker/mod.rs +++ b/src/datastruct/stacker/mod.rs @@ -1,10 +1,10 @@ +mod expull; pub(crate) mod hashmap; mod heap; -mod expull; -pub use self::heap::{Heap, HeapAllocable}; pub use self::expull::ExpUnrolledLinkedList; pub use self::hashmap::TermHashMap; +pub use self::heap::{Heap, HeapAllocable}; #[test] fn test_unrolled_linked_list() { diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 1019a1efa..24c4e4efd 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -1,11 +1,11 @@ -use std::marker::Send; -use std::fmt; -use std::path::Path; use directory::error::{DeleteError, OpenReadError, OpenWriteError}; use directory::{ReadOnlySource, WritePtr}; -use std::result; +use std::fmt; use std::io; +use std::marker::Send; use std::marker::Sync; +use std::path::Path; +use std::result; /// Write-once read many (WORM) abstraction for where /// tantivy's data should be stored. diff --git a/src/directory/error.rs b/src/directory/error.rs index eda224fc0..12145e8a4 100644 --- a/src/directory/error.rs +++ b/src/directory/error.rs @@ -1,7 +1,7 @@ use std::error::Error as StdError; -use std::path::PathBuf; -use std::io; use std::fmt; +use std::io; +use std::path::PathBuf; /// General IO error with an optional path to the offending file. #[derive(Debug)] diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index 0001759ea..8311df4c8 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -1,18 +1,18 @@ -use std::path::{Path, PathBuf}; -use serde_json; +use core::MANAGED_FILEPATH; use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError}; use directory::{ReadOnlySource, WritePtr}; -use std::result; -use std::io; -use Directory; -use std::sync::{Arc, RwLock}; -use std::collections::HashSet; -use std::sync::RwLockWriteGuard; -use std::io::Write; -use core::MANAGED_FILEPATH; -use std::collections::HashMap; -use std::fmt; use error::{ErrorKind, Result, ResultExt}; +use serde_json; +use std::collections::HashMap; +use std::collections::HashSet; +use std::fmt; +use std::io; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::result; +use std::sync::RwLockWriteGuard; +use std::sync::{Arc, RwLock}; +use Directory; /// Wrapper of directories that keeps track of files created by Tantivy. /// @@ -282,10 +282,10 @@ impl Clone for ManagedDirectory { mod tests { use super::*; - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] use directory::MmapDirectory; - use std::path::Path; use std::io::Write; + use std::path::Path; use tempdir::TempDir; lazy_static! { @@ -294,7 +294,7 @@ mod tests { } #[test] - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] fn test_managed_directory() { let tempdir = TempDir::new("index").unwrap(); let tempdir_path = PathBuf::from(tempdir.path()); @@ -343,7 +343,7 @@ mod tests { } #[test] - #[cfg(feature="mmap ")] + #[cfg(feature = "mmap ")] fn test_managed_directory_gc_while_mmapped() { let tempdir = TempDir::new("index").unwrap(); let tempdir_path = PathBuf::from(tempdir.path()); @@ -373,7 +373,7 @@ mod tests { } #[test] - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] fn test_managed_directory_protect() { let tempdir = TempDir::new("index").unwrap(); let tempdir_path = PathBuf::from(tempdir.path()); diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 4075ddf26..aff713aec 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -1,17 +1,17 @@ use atomicwrites; use common::make_io_err; -use directory::Directory; use directory::error::{DeleteError, IOError, OpenDirectoryError, OpenReadError, OpenWriteError}; -use directory::ReadOnlySource; use directory::shared_vec_slice::SharedVecSlice; +use directory::Directory; +use directory::ReadOnlySource; use directory::WritePtr; use fst::raw::MmapReadOnly; use std::collections::hash_map::Entry as HashMapEntry; use std::collections::HashMap; use std::convert::From; use std::fmt; -use std::fs::{self, File}; use std::fs::OpenOptions; +use std::fs::{self, File}; use std::io::{self, Seek, SeekFrom}; use std::io::{BufWriter, Read, Write}; use std::path::{Path, PathBuf}; diff --git a/src/directory/mod.rs b/src/directory/mod.rs index e2a7d670e..736c31e73 100644 --- a/src/directory/mod.rs +++ b/src/directory/mod.rs @@ -4,29 +4,29 @@ WORM directory abstraction. */ -#[cfg(feature="mmap")] +#[cfg(feature = "mmap")] mod mmap_directory; -mod ram_directory; mod directory; +mod managed_directory; +mod ram_directory; mod read_only_source; mod shared_vec_slice; -mod managed_directory; /// Errors specific to the directory module. pub mod error; use std::io::{BufWriter, Seek, Write}; -pub use self::read_only_source::ReadOnlySource; pub use self::directory::Directory; pub use self::ram_directory::RAMDirectory; +pub use self::read_only_source::ReadOnlySource; -#[cfg(feature="mmap")] +#[cfg(feature = "mmap")] pub use self::mmap_directory::MmapDirectory; -pub(crate) use self::read_only_source::SourceRead; pub(crate) use self::managed_directory::{FileProtection, ManagedDirectory}; +pub(crate) use self::read_only_source::SourceRead; /// Synonym of Seek + Write pub trait SeekableWrite: Seek + Write {} @@ -42,8 +42,8 @@ pub type WritePtr = BufWriter>; mod tests { use super::*; - use std::path::Path; use std::io::{Seek, SeekFrom, Write}; + use std::path::Path; lazy_static! { static ref TEST_PATH: &'static Path = Path::new("some_path_for_test"); @@ -56,7 +56,7 @@ mod tests { } #[test] - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] fn test_mmap_directory() { let mut mmap_directory = MmapDirectory::create_from_tempdir().unwrap(); test_directory(&mut mmap_directory); diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 15b538a30..383643836 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -1,14 +1,14 @@ +use super::shared_vec_slice::SharedVecSlice; +use common::make_io_err; +use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError}; +use directory::WritePtr; +use directory::{Directory, ReadOnlySource}; use std::collections::HashMap; use std::fmt; use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use std::result; use std::sync::{Arc, RwLock}; -use common::make_io_err; -use directory::{Directory, ReadOnlySource}; -use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError}; -use directory::WritePtr; -use super::shared_vec_slice::SharedVecSlice; /// Writer associated with the `RAMDirectory` /// diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index fe17742d7..fe5c9ca0f 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -1,11 +1,11 @@ -#[cfg(feature="mmap")] -use fst::raw::MmapReadOnly; -use std::ops::Deref; use super::shared_vec_slice::SharedVecSlice; use common::HasLen; -use std::slice; -use std::io::{self, Read}; +#[cfg(feature = "mmap")] +use fst::raw::MmapReadOnly; use stable_deref_trait::{CloneStableDeref, StableDeref}; +use std::io::{self, Read}; +use std::ops::Deref; +use std::slice; /// Read object that represents files in tantivy. /// @@ -15,7 +15,7 @@ use stable_deref_trait::{CloneStableDeref, StableDeref}; /// hold by this object should never be altered or destroyed. pub enum ReadOnlySource { /// Mmap source of data - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] Mmap(MmapReadOnly), /// Wrapping a `Vec` Anonymous(SharedVecSlice), @@ -41,7 +41,7 @@ impl ReadOnlySource { /// Returns the data underlying the ReadOnlySource object. pub fn as_slice(&self) -> &[u8] { match *self { - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] ReadOnlySource::Mmap(ref mmap_read_only) => unsafe { mmap_read_only.as_slice() }, ReadOnlySource::Anonymous(ref shared_vec) => shared_vec.as_slice(), } @@ -66,9 +66,14 @@ impl ReadOnlySource { /// 1KB slice is remaining, the whole `500MBs` /// are retained in memory. pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource { - assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset); + assert!( + from_offset <= to_offset, + "Requested negative slice [{}..{}]", + from_offset, + to_offset + ); match *self { - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] ReadOnlySource::Mmap(ref mmap_read_only) => { let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset); ReadOnlySource::Mmap(sliced_mmap) @@ -130,13 +135,11 @@ impl SourceRead { pub fn slice_from(&self, start: usize) -> &[u8] { &self.cursor[start..] - } pub fn get(&self, idx: usize) -> u8 { self.cursor[idx] } - } impl AsRef<[u8]> for SourceRead { diff --git a/src/docset.rs b/src/docset.rs index 356ae0b74..ecf184c1e 100644 --- a/src/docset.rs +++ b/src/docset.rs @@ -1,8 +1,8 @@ -use DocId; +use common::BitSet; use std::borrow::Borrow; use std::borrow::BorrowMut; use std::cmp::Ordering; -use common::BitSet; +use DocId; /// Expresses the outcome of a call to `DocSet`'s `.skip_next(...)`. #[derive(PartialEq, Eq, Debug)] diff --git a/src/error.rs b/src/error.rs index 74a612014..4ec4bfe25 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,13 +2,13 @@ use std::io; -use std::path::PathBuf; -use std::sync::PoisonError; use directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError}; +use fastfield::FastFieldNotAvailableError; use query; use schema; -use fastfield::FastFieldNotAvailableError; use serde_json; +use std::path::PathBuf; +use std::sync::PoisonError; error_chain!( errors { diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs index 9c666504f..3f8a0eb5b 100644 --- a/src/fastfield/delete.rs +++ b/src/fastfield/delete.rs @@ -1,10 +1,10 @@ use bit_set::BitSet; -use directory::WritePtr; -use std::io::Write; -use std::io; -use directory::ReadOnlySource; -use DocId; use common::HasLen; +use directory::ReadOnlySource; +use directory::WritePtr; +use std::io; +use std::io::Write; +use DocId; /// Write a delete `BitSet` /// @@ -62,10 +62,8 @@ impl DeleteBitSet { b & (1u8 << shift) != 0 } } - } - impl HasLen for DeleteBitSet { fn len(&self) -> usize { self.len @@ -74,10 +72,10 @@ impl HasLen for DeleteBitSet { #[cfg(test)] mod tests { - use std::path::PathBuf; + use super::*; use bit_set::BitSet; use directory::*; - use super::*; + use std::path::PathBuf; fn test_delete_bitset_helper(bitset: &BitSet) { let test_path = PathBuf::from("test"); diff --git a/src/fastfield/error.rs b/src/fastfield/error.rs index bdc82c7e8..a05ef2284 100644 --- a/src/fastfield/error.rs +++ b/src/fastfield/error.rs @@ -1,5 +1,5 @@ -use std::result; use schema::FieldEntry; +use std::result; /// `FastFieldNotAvailableError` is returned when the /// user requested for a fast field reader, and the field was not diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 5490cfd19..182b17989 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -1,8 +1,8 @@ use super::MultiValueIntFastFieldReader; -use DocId; -use termdict::TermOrdinal; use schema::Facet; use termdict::TermDictionary; +use termdict::TermOrdinal; +use DocId; /// The facet reader makes it possible to access the list of /// facets associated to a given document in a specific diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 725d8b4b4..6b1044ffe 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -23,26 +23,26 @@ values stored. Read access performance is comparable to that of an array lookup. */ +pub use self::delete::write_delete_bitset; +pub use self::delete::DeleteBitSet; +pub use self::error::{FastFieldNotAvailableError, Result}; +pub use self::facet_reader::FacetReader; +pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastFieldWriter}; +pub use self::reader::FastFieldReader; +pub use self::serializer::FastFieldSerializer; +pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use common; use schema::Cardinality; use schema::FieldType; use schema::Value; -pub use self::delete::DeleteBitSet; -pub use self::delete::write_delete_bitset; -pub use self::error::{FastFieldNotAvailableError, Result}; -pub use self::facet_reader::FacetReader; -pub use self::multivalued::{MultiValueIntFastFieldWriter, MultiValueIntFastFieldReader}; -pub use self::reader::FastFieldReader; -pub use self::serializer::FastFieldSerializer; -pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; -mod reader; -mod writer; -mod serializer; -mod error; mod delete; +mod error; mod facet_reader; mod multivalued; +mod reader; +mod serializer; +mod writer; /// Trait for types that are allowed for fast fields: (u64 or i64). pub trait FastValue: Default + Clone + Copy { @@ -121,19 +121,19 @@ fn value_to_u64(value: &Value) -> u64 { #[cfg(test)] mod tests { + use super::*; use common::CompositeFile; use directory::{Directory, RAMDirectory, WritePtr}; use fastfield::FastFieldReader; use rand::Rng; use rand::SeedableRng; use rand::XorShiftRng; - use schema::{Schema, SchemaBuilder}; use schema::Document; - use schema::FAST; use schema::Field; + use schema::FAST; + use schema::{Schema, SchemaBuilder}; use std::collections::HashMap; use std::path::Path; - use super::*; lazy_static! { pub static ref SCHEMA: Schema = { @@ -141,9 +141,7 @@ mod tests { schema_builder.add_u64_field("field", FAST); schema_builder.build() }; - pub static ref FIELD: Field = { - SCHEMA.get_field("field").unwrap() - }; + pub static ref FIELD: Field = { SCHEMA.get_field("field").unwrap() }; } #[test] @@ -409,17 +407,17 @@ mod tests { } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use super::tests::{SCHEMA, generate_permutation}; - use test::{self, Bencher}; use super::tests::FIELD; + use super::tests::{generate_permutation, SCHEMA}; + use super::*; use common::CompositeFile; use directory::{Directory, RAMDirectory, WritePtr}; use fastfield::FastFieldReader; use std::collections::HashMap; use std::path::Path; - use super::*; + use test::{self, Bencher}; #[bench] fn bench_intfastfield_linear_veclookup(b: &mut Bencher) { @@ -515,4 +513,4 @@ mod bench { } } -} \ No newline at end of file +} diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 57a5db082..3d78e46a0 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -1,15 +1,15 @@ -mod writer; mod reader; +mod writer; -pub use self::writer::MultiValueIntFastFieldWriter; pub use self::reader::MultiValueIntFastFieldReader; +pub use self::writer::MultiValueIntFastFieldWriter; #[cfg(test)] mod tests { - use schema::SchemaBuilder; use schema::Cardinality; use schema::IntOptions; + use schema::SchemaBuilder; use Index; #[test] diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 0fe2e12f3..668c28ac9 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -1,5 +1,5 @@ -use DocId; use fastfield::{FastFieldReader, FastValue}; +use DocId; /// Reader for a multivalued `u64` fast field. /// diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index ef67eb009..e8aac8ec0 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -1,12 +1,12 @@ -use fastfield::FastFieldSerializer; use fastfield::serializer::FastSingleFieldSerializer; use fastfield::value_to_u64; -use std::collections::HashMap; -use DocId; +use fastfield::FastFieldSerializer; +use itertools::Itertools; use postings::UnorderedTermId; use schema::{Document, Field}; +use std::collections::HashMap; use std::io; -use itertools::Itertools; +use DocId; /// Writer for multi-valued (as in, more than one value per document) /// int fast field. @@ -37,7 +37,6 @@ pub struct MultiValueIntFastFieldWriter { } impl MultiValueIntFastFieldWriter { - /// Creates a new `IntFastFieldWriter` pub(crate) fn new(field: Field, is_facet: bool) -> Self { MultiValueIntFastFieldWriter { @@ -68,7 +67,7 @@ impl MultiValueIntFastFieldWriter { pub fn add_document(&mut self, doc: &Document) { self.next_doc(); // facets are indexed in the `SegmentWriter` as we encode their unordered id. - if !self.is_facet { + if !self.is_facet { for field_value in doc.field_values() { if field_value.field() == self.field { self.add_val(value_to_u64(field_value.value())); diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 389fdcfa9..bb4ca5aab 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -1,19 +1,19 @@ -use common::BinarySerializable; +use super::FastValue; use common::bitpacker::BitUnpacker; -use common::CompositeFile; use common::compute_num_bits; -use directory::{Directory, RAMDirectory, WritePtr}; +use common::BinarySerializable; +use common::CompositeFile; use directory::ReadOnlySource; -use DocId; +use directory::{Directory, RAMDirectory, WritePtr}; use fastfield::{FastFieldSerializer, FastFieldsWriter}; use owning_ref::OwningRef; -use schema::FAST; use schema::SchemaBuilder; +use schema::FAST; use std::collections::HashMap; use std::marker::PhantomData; use std::mem; use std::path::Path; -use super::FastValue; +use DocId; /// Trait for accessing a fastfield. /// diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 11dbfa7e9..5b15c76bb 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -1,10 +1,10 @@ -use common::BinarySerializable; -use directory::WritePtr; -use schema::Field; use common::bitpacker::BitPacker; use common::compute_num_bits; -use common::CountingWriter; +use common::BinarySerializable; use common::CompositeWrite; +use common::CountingWriter; +use directory::WritePtr; +use schema::Field; use std::io::{self, Write}; /// `FastFieldSerializer` is in charge of serializing diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index f6846b053..f37284060 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -1,13 +1,13 @@ -use schema::{Cardinality, Document, Field, Schema}; -use fastfield::FastFieldSerializer; -use std::io; -use schema::FieldType; -use common; -use common::VInt; -use std::collections::HashMap; -use postings::UnorderedTermId; use super::multivalued::MultiValueIntFastFieldWriter; +use common; use common::BinarySerializable; +use common::VInt; +use fastfield::FastFieldSerializer; +use postings::UnorderedTermId; +use schema::FieldType; +use schema::{Cardinality, Document, Field, Schema}; +use std::collections::HashMap; +use std::io; /// The fastfieldswriter regroup all of the fast field writers. pub struct FastFieldsWriter { diff --git a/src/fieldnorm/code.rs b/src/fieldnorm/code.rs index 242e1f49d..71079bd02 100644 --- a/src/fieldnorm/code.rs +++ b/src/fieldnorm/code.rs @@ -1,10 +1,8 @@ - #[inline(always)] pub fn id_to_fieldnorm(id: u8) -> u32 { FIELD_NORMS_TABLE[id as usize] } - #[inline(always)] pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 { FIELD_NORMS_TABLE @@ -12,45 +10,34 @@ pub fn fieldnorm_to_id(fieldnorm: u32) -> u8 { .unwrap_or_else(|idx| idx - 1) as u8 } - pub const FIELD_NORMS_TABLE: [u32; 256] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, - 56, 60, 64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, - 152, 168, 184, 200, 216, 232, 248, 264, 280, 312, 344, 376, 408, 440, 472, 504, - 536, 600, 664, 728, 792, 856, 920, 984, - 1048, 1176, 1304, 1432, 1560, 1688, 1816, 1944, - 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120, - 4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, - 10264, 11288, 12312, 13336, 14360, 15384, - 16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, - 32792, 36888, 40984, 45080, 49176, 53272, 57368, 61464, - 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480, - 163864, 180248, 196632, 213016, 229400, 245784, 262168, - 294936, 327704, 360472, 393240, 426008, 458776, - 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, - 983064, 1048600, 1179672, 1310744, 1441816, 1572888, 1703960, 1835032, - 1966104, 2097176, 2359320, 2621464, 2883608, 3145752, 3407896, 3670040, 3932184, - 4194328, 4718616, 5242904, 5767192, 6291480, 6815768, 7340056, 7864344, 8388632, 9437208, - 10485784, 11534360, 12582936, 13631512, 14680088, 15728664, 16777240, 18874392, 20971544, - 23068696, 25165848, 27263000, 29360152, 31457304, 33554456, 37748760, 41943064, - 46137368, 50331672, 54525976, 58720280, 62914584, 67108888, 75497496, 83886104, - 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968, 167772184, - 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912, 335544344, - 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800, 671088664, - 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576, 1342177304, - 1476395032, 1610612760, 1744830488, 1879048216, 2013265944 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 96, 104, 112, 120, 128, 136, 144, 152, 168, 184, 200, 216, 232, + 248, 264, 280, 312, 344, 376, 408, 440, 472, 504, 536, 600, 664, 728, 792, 856, 920, 984, 1048, + 1176, 1304, 1432, 1560, 1688, 1816, 1944, 2072, 2328, 2584, 2840, 3096, 3352, 3608, 3864, 4120, + 4632, 5144, 5656, 6168, 6680, 7192, 7704, 8216, 9240, 10264, 11288, 12312, 13336, 14360, 15384, + 16408, 18456, 20504, 22552, 24600, 26648, 28696, 30744, 32792, 36888, 40984, 45080, 49176, + 53272, 57368, 61464, 65560, 73752, 81944, 90136, 98328, 106520, 114712, 122904, 131096, 147480, + 163864, 180248, 196632, 213016, 229400, 245784, 262168, 294936, 327704, 360472, 393240, 426008, + 458776, 491544, 524312, 589848, 655384, 720920, 786456, 851992, 917528, 983064, 1048600, + 1179672, 1310744, 1441816, 1572888, 1703960, 1835032, 1966104, 2097176, 2359320, 2621464, + 2883608, 3145752, 3407896, 3670040, 3932184, 4194328, 4718616, 5242904, 5767192, 6291480, + 6815768, 7340056, 7864344, 8388632, 9437208, 10485784, 11534360, 12582936, 13631512, 14680088, + 15728664, 16777240, 18874392, 20971544, 23068696, 25165848, 27263000, 29360152, 31457304, + 33554456, 37748760, 41943064, 46137368, 50331672, 54525976, 58720280, 62914584, 67108888, + 75497496, 83886104, 92274712, 100663320, 109051928, 117440536, 125829144, 134217752, 150994968, + 167772184, 184549400, 201326616, 218103832, 234881048, 251658264, 268435480, 301989912, + 335544344, 369098776, 402653208, 436207640, 469762072, 503316504, 536870936, 603979800, + 671088664, 738197528, 805306392, 872415256, 939524120, 1006632984, 1073741848, 1207959576, + 1342177304, 1476395032, 1610612760, 1744830488, 1879048216, 2013265944, ]; - - #[cfg(test)] mod tests { use super::{fieldnorm_to_id, id_to_fieldnorm, FIELD_NORMS_TABLE}; - #[test] fn test_decode_code() { assert_eq!(fieldnorm_to_id(0), 0); @@ -103,4 +90,4 @@ mod tests { assert_eq!(FIELD_NORMS_TABLE[i], decode_fieldnorm_byte(i as u8)); } } -} \ No newline at end of file +} diff --git a/src/fieldnorm/mod.rs b/src/fieldnorm/mod.rs index 45fa92167..4746c1407 100644 --- a/src/fieldnorm/mod.rs +++ b/src/fieldnorm/mod.rs @@ -17,13 +17,12 @@ //! //! This trick is used by the [BM25 similarity](). mod code; +mod reader; mod serializer; mod writer; -mod reader; pub use self::reader::FieldNormReader; -pub use self::writer::FieldNormsWriter; pub use self::serializer::FieldNormsSerializer; +pub use self::writer::FieldNormsWriter; use self::code::{fieldnorm_to_id, id_to_fieldnorm}; - diff --git a/src/fieldnorm/reader.rs b/src/fieldnorm/reader.rs index 982eb1f4e..e16f3defb 100644 --- a/src/fieldnorm/reader.rs +++ b/src/fieldnorm/reader.rs @@ -1,8 +1,7 @@ -use super::{id_to_fieldnorm, fieldnorm_to_id}; +use super::{fieldnorm_to_id, id_to_fieldnorm}; use directory::ReadOnlySource; use DocId; - /// Reads the fieldnorm associated to a document. /// The fieldnorm represents the length associated to /// a given Field of a given document. @@ -21,16 +20,13 @@ use DocId; /// precompute computationally expensive functions of the fieldnorm /// in a very short array. pub struct FieldNormReader { - data: ReadOnlySource + data: ReadOnlySource, } impl FieldNormReader { - /// Opens a field norm reader given its data source. pub fn open(data: ReadOnlySource) -> Self { - FieldNormReader { - data - } + FieldNormReader { data } } /// Returns the `fieldnorm` associated to a doc id. @@ -71,12 +67,13 @@ impl FieldNormReader { #[cfg(test)] impl From> for FieldNormReader { fn from(field_norms: Vec) -> FieldNormReader { - let field_norms_id = field_norms.into_iter() + let field_norms_id = field_norms + .into_iter() .map(FieldNormReader::fieldnorm_to_id) .collect::>(); let field_norms_data = ReadOnlySource::from(field_norms_id); FieldNormReader { - data: field_norms_data + data: field_norms_data, } } -} \ No newline at end of file +} diff --git a/src/fieldnorm/serializer.rs b/src/fieldnorm/serializer.rs index e0f413ae2..5308d9f6a 100644 --- a/src/fieldnorm/serializer.rs +++ b/src/fieldnorm/serializer.rs @@ -1,26 +1,21 @@ -use directory::WritePtr; -use std::io; use common::CompositeWrite; +use directory::WritePtr; use schema::Field; +use std::io; use std::io::Write; - pub struct FieldNormsSerializer { composite_write: CompositeWrite, } impl FieldNormsSerializer { - /// Constructor pub fn from_write(write: WritePtr) -> io::Result { // just making room for the pointer to header. let composite_write = CompositeWrite::wrap(write); - Ok(FieldNormsSerializer { - composite_write - }) + Ok(FieldNormsSerializer { composite_write }) } - pub fn serialize_field(&mut self, field: Field, fieldnorms_data: &[u8]) -> io::Result<()> { let write = self.composite_write.for_field(field); write.write_all(fieldnorms_data)?; @@ -32,6 +27,4 @@ impl FieldNormsSerializer { self.composite_write.close()?; Ok(()) } - } - diff --git a/src/fieldnorm/writer.rs b/src/fieldnorm/writer.rs index 77f7d5e62..c36ea99fa 100644 --- a/src/fieldnorm/writer.rs +++ b/src/fieldnorm/writer.rs @@ -1,26 +1,23 @@ use DocId; -use schema::Field; -use super::FieldNormsSerializer; -use std::io; -use schema::Schema; use super::fieldnorm_to_id; +use super::FieldNormsSerializer; +use schema::Field; +use schema::Schema; +use std::io; pub struct FieldNormsWriter { fields: Vec, - fieldnorms_buffer: Vec> + fieldnorms_buffer: Vec>, } impl FieldNormsWriter { - pub fn fields_with_fieldnorm(schema: &Schema) -> Vec { schema .fields() .iter() .enumerate() - .filter(|&(_, field_entry)| { - field_entry.is_indexed() - }) + .filter(|&(_, field_entry)| field_entry.is_indexed()) .map(|(field, _)| Field(field as u32)) .collect::>() } @@ -35,9 +32,7 @@ impl FieldNormsWriter { .unwrap_or(0); FieldNormsWriter { fields, - fieldnorms_buffer: (0..max_field) - .map(|_| Vec::new()) - .collect::>() + fieldnorms_buffer: (0..max_field).map(|_| Vec::new()).collect::>(), } } @@ -49,7 +44,10 @@ impl FieldNormsWriter { pub fn record(&mut self, doc: DocId, field: Field, fieldnorm: u32) { let fieldnorm_buffer: &mut Vec = &mut self.fieldnorms_buffer[field.0 as usize]; - assert!(fieldnorm_buffer.len() <= doc as usize, "Cannot register a given fieldnorm twice"); + assert!( + fieldnorm_buffer.len() <= doc as usize, + "Cannot register a given fieldnorm twice" + ); // we fill intermediary `DocId` as having a fieldnorm of 0. fieldnorm_buffer.resize(doc as usize + 1, 0u8); fieldnorm_buffer[doc as usize] = fieldnorm_to_id(fieldnorm); @@ -62,4 +60,4 @@ impl FieldNormsWriter { } Ok(()) } -} \ No newline at end of file +} diff --git a/src/functional_test.rs b/src/functional_test.rs index a232e434d..aaeb112c4 100644 --- a/src/functional_test.rs +++ b/src/functional_test.rs @@ -1,10 +1,10 @@ -use std::collections::HashSet; use rand::thread_rng; +use std::collections::HashSet; +use rand::distributions::{IndependentSample, Range}; use schema::*; use Index; use Searcher; -use rand::distributions::{IndependentSample, Range}; fn check_index_content(searcher: &Searcher, vals: &HashSet) { assert!(searcher.segment_readers().len() < 20); @@ -13,7 +13,7 @@ fn check_index_content(searcher: &Searcher, vals: &HashSet) { #[test] #[ignore] -#[cfg(feature="mmap")] +#[cfg(feature = "mmap")] fn test_indexing() { let mut schema_builder = SchemaBuilder::default(); diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index bc6686ab3..4c2597fbb 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -1,7 +1,7 @@ use super::operation::DeleteOperation; -use std::sync::{Arc, RwLock}; use std::mem; use std::ops::DerefMut; +use std::sync::{Arc, RwLock}; // The DeleteQueue is similar in conceptually to a multiple // consumer single producer broadcast channel. diff --git a/src/indexer/directory_lock.rs b/src/indexer/directory_lock.rs index 991e46cbb..b152a3c58 100644 --- a/src/indexer/directory_lock.rs +++ b/src/indexer/directory_lock.rs @@ -1,6 +1,6 @@ -use Directory; -use directory::error::OpenWriteError; use core::LOCKFILE_FILEPATH; +use directory::error::OpenWriteError; +use Directory; /// The directory lock is a mechanism used to /// prevent the creation of two [`IndexWriter`](struct.IndexWriter.html) diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 75225fcf6..0892b4fc7 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1,3 +1,6 @@ +use super::operation::AddOperation; +use super::segment_updater::SegmentUpdater; +use super::PreparedCommit; use bit_set::BitSet; use chan; use core::Index; @@ -6,31 +9,28 @@ use core::SegmentComponent; use core::SegmentId; use core::SegmentMeta; use core::SegmentReader; -use indexer::stamper::Stamper; -use futures::sync::oneshot::Receiver; +use datastruct::stacker::hashmap::split_memory; use datastruct::stacker::Heap; use directory::FileProtection; +use docset::DocSet; use error::{Error, ErrorKind, Result, ResultExt}; use fastfield::write_delete_bitset; +use futures::sync::oneshot::Receiver; use indexer::delete_queue::{DeleteCursor, DeleteQueue}; -use datastruct::stacker::hashmap::split_memory; use indexer::doc_opstamp_mapping::DocToOpstampMapping; -use indexer::MergePolicy; use indexer::operation::DeleteOperation; +use indexer::stamper::Stamper; +use indexer::DirectoryLock; +use indexer::MergePolicy; use indexer::SegmentEntry; use indexer::SegmentWriter; -use docset::DocSet; -use schema::IndexRecordOption; use schema::Document; +use schema::IndexRecordOption; use schema::Term; use std::mem; use std::mem::swap; -use std::thread::JoinHandle; -use indexer::DirectoryLock; -use super::operation::AddOperation; -use super::segment_updater::SegmentUpdater; -use super::PreparedCommit; use std::thread; +use std::thread::JoinHandle; // Size of the margin for the heap. A segment is closed when the remaining memory // in the heap goes below MARGIN_IN_BYTES. @@ -443,10 +443,7 @@ impl IndexWriter { } /// Merges a given list of segments - pub fn merge( - &mut self, - segment_ids: &[SegmentId], - ) -> Receiver { + pub fn merge(&mut self, segment_ids: &[SegmentId]) -> Receiver { self.segment_updater.start_merge(segment_ids) } @@ -642,12 +639,12 @@ impl IndexWriter { #[cfg(test)] mod tests { + use env_logger; + use error::*; use indexer::NoMergePolicy; use schema::{self, Document}; use Index; use Term; - use error::*; - use env_logger; #[test] fn test_lockfile_stops_duplicates() { diff --git a/src/indexer/log_merge_policy.rs b/src/indexer/log_merge_policy.rs index 64fbcf90a..9e5edf5e8 100644 --- a/src/indexer/log_merge_policy.rs +++ b/src/indexer/log_merge_policy.rs @@ -99,8 +99,8 @@ impl Default for LogMergePolicy { #[cfg(test)] mod tests { use super::*; - use indexer::merge_policy::MergePolicy; use core::{SegmentId, SegmentMeta}; + use indexer::merge_policy::MergePolicy; fn test_merge_policy() -> LogMergePolicy { let mut log_merge_policy = LogMergePolicy::default(); diff --git a/src/indexer/merge_policy.rs b/src/indexer/merge_policy.rs index 34177264c..4a4b31b2a 100644 --- a/src/indexer/merge_policy.rs +++ b/src/indexer/merge_policy.rs @@ -1,7 +1,7 @@ use core::SegmentId; use core::SegmentMeta; -use std::marker; use std::fmt::Debug; +use std::marker; /// Set of segment suggested for a merge. #[derive(Debug, Clone)] diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 5e4cc527e..55568f1b6 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1,24 +1,23 @@ -use error::{ErrorKind, Result}; -use core::SegmentReader; use core::Segment; -use DocId; +use core::SegmentReader; use core::SerializableSegment; -use indexer::SegmentSerializer; -use postings::InvertedIndexSerializer; -use itertools::Itertools; use docset::DocSet; +use error::{ErrorKind, Result}; use fastfield::DeleteBitSet; -use schema::{Field, Schema}; -use termdict::TermMerger; -use fastfield::FastFieldSerializer; use fastfield::FastFieldReader; -use store::StoreWriter; -use std::cmp::{max, min}; +use fastfield::FastFieldSerializer; +use fieldnorm::FieldNormReader; use fieldnorm::FieldNormsSerializer; use fieldnorm::FieldNormsWriter; -use fieldnorm::FieldNormReader; +use indexer::SegmentSerializer; +use itertools::Itertools; +use postings::InvertedIndexSerializer; use postings::Postings; - +use schema::{Field, Schema}; +use std::cmp::{max, min}; +use store::StoreWriter; +use termdict::TermMerger; +use DocId; fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { let mut total_tokens = 0u64; @@ -38,15 +37,17 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { total_tokens += reader.inverted_index(field).total_num_tokens(); } } - total_tokens + count - .iter() - .cloned() - .enumerate() - .map(|(fieldnorm_ord, count)| count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64) - .sum::() + total_tokens + + count + .iter() + .cloned() + .enumerate() + .map(|(fieldnorm_ord, count)| { + count as u64 * FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8) as u64 + }) + .sum::() } - pub struct IndexMerger { schema: Schema, readers: Vec, @@ -70,7 +71,6 @@ fn compute_min_max_val( .map(|doc_id| u64_reader.get(doc_id)) .minmax() .into_option() - } None => { // no deleted documents, @@ -162,7 +162,7 @@ impl IndexMerger { if let Some((seg_min_val, seg_max_val)) = compute_min_max_val( &u64_reader, reader.max_doc(), - reader.delete_bitset() + reader.delete_bitset(), ) { // the segment has some non-deleted documents min_val = min(min_val, seg_min_val); @@ -176,8 +176,10 @@ impl IndexMerger { } Err(_) => { let fieldname = self.schema.get_field_name(field); - let error_msg = - format!("Failed to find a fast field reader for field {:?}", fieldname); + let error_msg = format!( + "Failed to find a fast field reader for field {:?}", + fieldname + ); bail!(ErrorKind::SchemaError(error_msg)); } } @@ -211,7 +213,6 @@ impl IndexMerger { } fn write_postings(&self, serializer: &mut InvertedIndexSerializer) -> Result<()> { - let mut positions_buffer: Vec = Vec::with_capacity(1_000); let mut delta_computer = DeltaComputer::new(); @@ -318,7 +319,7 @@ impl IndexMerger { for (segment_ord, mut segment_postings) in segment_postings { let old_to_new_doc_id = &merged_doc_id_map[segment_ord]; loop { - let doc = segment_postings.doc(); + let doc = segment_postings.doc(); // `.advance()` has been called once before the loop. // @@ -335,7 +336,8 @@ impl IndexMerger { let term_freq = segment_postings.term_freq(); segment_postings.positions(&mut positions_buffer); - let delta_positions = delta_computer.compute_delta(&positions_buffer); + let delta_positions = + delta_computer.compute_delta(&positions_buffer); field_serializer.write_doc( remapped_doc_id, term_freq, @@ -389,21 +391,21 @@ impl SerializableSegment for IndexMerger { #[cfg(test)] mod tests { - use schema; - use schema::Document; - use schema::Term; - use schema::TextFieldIndexing; - use query::TermQuery; - use schema::Field; - use core::Index; - use Searcher; - use DocAddress; use collector::tests::FastFieldTestCollector; use collector::tests::TestCollector; - use query::BooleanQuery; - use schema::IndexRecordOption; - use schema::Cardinality; + use core::Index; use futures::Future; + use query::BooleanQuery; + use query::TermQuery; + use schema; + use schema::Cardinality; + use schema::Document; + use schema::Field; + use schema::IndexRecordOption; + use schema::Term; + use schema::TextFieldIndexing; + use DocAddress; + use Searcher; #[test] fn test_index_merger_no_deletes() { diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index e33f23cc2..783e787c8 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -1,29 +1,29 @@ -pub mod index_writer; -pub mod segment_serializer; -pub mod merger; -pub mod merge_policy; -mod log_merge_policy; -mod segment_register; -mod segment_writer; -mod segment_manager; pub mod delete_queue; -pub mod segment_updater; mod directory_lock; -mod segment_entry; mod doc_opstamp_mapping; +pub mod index_writer; +mod log_merge_policy; +pub mod merge_policy; +pub mod merger; pub mod operation; -mod stamper; mod prepared_commit; +mod segment_entry; +mod segment_manager; +mod segment_register; +pub mod segment_serializer; +pub mod segment_updater; +mod segment_writer; +mod stamper; -pub use self::prepared_commit::PreparedCommit; -pub use self::segment_entry::{SegmentEntry, SegmentState}; -pub use self::segment_serializer::SegmentSerializer; -pub use self::segment_writer::SegmentWriter; +pub(crate) use self::directory_lock::DirectoryLock; pub use self::index_writer::IndexWriter; pub use self::log_merge_policy::LogMergePolicy; pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy}; +pub use self::prepared_commit::PreparedCommit; +pub use self::segment_entry::{SegmentEntry, SegmentState}; pub use self::segment_manager::SegmentManager; -pub(crate) use self::directory_lock::DirectoryLock; +pub use self::segment_serializer::SegmentSerializer; +pub use self::segment_writer::SegmentWriter; /// Alias for the default merge policy, which is the `LogMergePolicy`. pub type DefaultMergePolicy = LogMergePolicy; diff --git a/src/indexer/prepared_commit.rs b/src/indexer/prepared_commit.rs index b290b2c53..4728af01a 100644 --- a/src/indexer/prepared_commit.rs +++ b/src/indexer/prepared_commit.rs @@ -1,5 +1,5 @@ -use Result; use super::IndexWriter; +use Result; /// A prepared commit pub struct PreparedCommit<'a> { @@ -13,7 +13,7 @@ impl<'a> PreparedCommit<'a> { PreparedCommit { index_writer, payload: None, - opstamp + opstamp, } } diff --git a/src/indexer/segment_entry.rs b/src/indexer/segment_entry.rs index dc69af182..c35406ad1 100644 --- a/src/indexer/segment_entry.rs +++ b/src/indexer/segment_entry.rs @@ -1,7 +1,7 @@ -use core::SegmentMeta; use bit_set::BitSet; -use indexer::delete_queue::DeleteCursor; use core::SegmentId; +use core::SegmentMeta; +use indexer::delete_queue::DeleteCursor; use std::fmt; #[derive(Clone, Copy, PartialEq, Eq, Debug)] diff --git a/src/indexer/segment_manager.rs b/src/indexer/segment_manager.rs index a2bffc8b2..7e53a4d00 100644 --- a/src/indexer/segment_manager.rs +++ b/src/indexer/segment_manager.rs @@ -1,14 +1,14 @@ use super::segment_register::SegmentRegister; -use std::sync::RwLock; +use core::SegmentId; use core::SegmentMeta; use core::{LOCKFILE_FILEPATH, META_FILEPATH}; -use core::SegmentId; -use indexer::SegmentEntry; -use std::path::PathBuf; -use std::collections::hash_set::HashSet; -use std::sync::{RwLockReadGuard, RwLockWriteGuard}; -use std::fmt::{self, Debug, Formatter}; use indexer::delete_queue::DeleteCursor; +use indexer::SegmentEntry; +use std::collections::hash_set::HashSet; +use std::fmt::{self, Debug, Formatter}; +use std::path::PathBuf; +use std::sync::RwLock; +use std::sync::{RwLockReadGuard, RwLockWriteGuard}; #[derive(Default)] struct SegmentRegisters { diff --git a/src/indexer/segment_register.rs b/src/indexer/segment_register.rs index 12b42ee1c..7a1feaf23 100644 --- a/src/indexer/segment_register.rs +++ b/src/indexer/segment_register.rs @@ -1,10 +1,10 @@ use core::SegmentId; -use std::collections::HashMap; use core::SegmentMeta; +use indexer::delete_queue::DeleteCursor; +use indexer::segment_entry::SegmentEntry; +use std::collections::HashMap; use std::fmt; use std::fmt::{Debug, Formatter}; -use indexer::segment_entry::SegmentEntry; -use indexer::delete_queue::DeleteCursor; /// The segment register keeps track /// of the list of segment, their size as well @@ -113,11 +113,11 @@ impl SegmentRegister { #[cfg(test)] mod tests { - use indexer::SegmentState; + use super::*; use core::SegmentId; use core::SegmentMeta; use indexer::delete_queue::*; - use super::*; + use indexer::SegmentState; fn segment_ids(segment_register: &SegmentRegister) -> Vec { segment_register diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs index 036a998b8..a1425f58f 100644 --- a/src/indexer/segment_serializer.rs +++ b/src/indexer/segment_serializer.rs @@ -3,9 +3,9 @@ use Result; use core::Segment; use core::SegmentComponent; use fastfield::FastFieldSerializer; -use store::StoreWriter; use fieldnorm::FieldNormsSerializer; use postings::InvertedIndexSerializer; +use store::StoreWriter; /// Segment serializer is in charge of laying out on disk /// the data accumulated and sorted by the `SegmentWriter`. @@ -47,7 +47,7 @@ impl SegmentSerializer { } /// Accessor to the field norm serializer. - pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer { + pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer { &mut self.fieldnorms_serializer } diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 501a7813f..7d4598660 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -1,40 +1,40 @@ +use super::segment_manager::{get_mergeable_segments, SegmentManager}; use core::Index; use core::IndexMeta; -use core::META_FILEPATH; use core::Segment; use core::SegmentId; use core::SegmentMeta; use core::SerializableSegment; +use core::META_FILEPATH; use directory::Directory; -use indexer::stamper::Stamper; +use directory::FileProtection; use error::{Error, ErrorKind, Result}; -use futures_cpupool::CpuPool; -use futures::Future; use futures::oneshot; use futures::sync::oneshot::Receiver; -use directory::FileProtection; -use indexer::{DefaultMergePolicy, MergePolicy}; +use futures::Future; +use futures_cpupool::CpuFuture; +use futures_cpupool::CpuPool; +use indexer::delete_queue::DeleteCursor; use indexer::index_writer::advance_deletes; -use indexer::MergeCandidate; use indexer::merger::IndexMerger; +use indexer::stamper::Stamper; +use indexer::MergeCandidate; use indexer::SegmentEntry; use indexer::SegmentSerializer; -use futures_cpupool::CpuFuture; -use serde_json; -use indexer::delete_queue::DeleteCursor; +use indexer::{DefaultMergePolicy, MergePolicy}; use schema::Schema; +use serde_json; use std::borrow::BorrowMut; use std::collections::HashMap; use std::io::Write; use std::mem; use std::ops::DerefMut; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, AtomicUsize}; use std::sync::atomic::Ordering; +use std::sync::atomic::{AtomicBool, AtomicUsize}; +use std::sync::Arc; use std::sync::RwLock; use std::thread; use std::thread::JoinHandle; -use super::segment_manager::{get_mergeable_segments, SegmentManager}; /// Save the index meta file. /// This operation is atomic : @@ -283,10 +283,7 @@ impl SegmentUpdater { }).wait() } - pub fn start_merge( - &self, - segment_ids: &[SegmentId], - ) -> Receiver { + pub fn start_merge(&self, segment_ids: &[SegmentId]) -> Receiver { self.0.segment_manager.start_merge(segment_ids); let segment_updater_clone = self.clone(); @@ -482,9 +479,9 @@ impl SegmentUpdater { #[cfg(test)] mod tests { - use Index; - use schema::*; use indexer::merge_policy::tests::MergeWheneverPossible; + use schema::*; + use Index; #[test] fn test_delete_during_merge() { diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 4436bf09d..717610b10 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -1,23 +1,23 @@ -use Result; -use DocId; -use std::io; -use std::str; -use schema::Schema; -use schema::Term; +use super::operation::AddOperation; use core::Segment; use core::SerializableSegment; -use fastfield::FastFieldsWriter; -use schema::FieldType; -use indexer::segment_serializer::SegmentSerializer; use datastruct::stacker::Heap; +use fastfield::FastFieldsWriter; +use fieldnorm::FieldNormsWriter; use indexer::index_writer::MARGIN_IN_BYTES; -use super::operation::AddOperation; +use indexer::segment_serializer::SegmentSerializer; use postings::MultiFieldPostingsWriter; +use schema::FieldType; +use schema::Schema; +use schema::Term; +use schema::Value; +use std::io; +use std::str; use tokenizer::BoxedTokenizer; use tokenizer::FacetTokenizer; use tokenizer::{TokenStream, Tokenizer}; -use schema::Value; -use fieldnorm::FieldNormsWriter; +use DocId; +use Result; /// A `SegmentWriter` is in charge of creating segment index from a /// documents. @@ -35,7 +35,6 @@ pub struct SegmentWriter<'a> { tokenizers: Vec>>, } - impl<'a> SegmentWriter<'a> { /// Creates a new `SegmentWriter` /// @@ -179,8 +178,7 @@ impl<'a> SegmentWriter<'a> { } else { 0 }; - self.fieldnorms_writer - .record(doc_id, field, num_tokens); + self.fieldnorms_writer.record(doc_id, field, num_tokens); } FieldType::U64(ref int_option) => { if int_option.is_indexed() { diff --git a/src/indexer/stamper.rs b/src/indexer/stamper.rs index 479f5874b..430607032 100644 --- a/src/indexer/stamper.rs +++ b/src/indexer/stamper.rs @@ -2,11 +2,11 @@ // For the moment let's just use AtomicUsize on // x86/64 bit platform, and a mutex on other platform. -#[cfg(target="x86_64")] +#[cfg(target = "x86_64")] mod archicture_impl { - use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; #[derive(Clone, Default)] pub struct Stamper(Arc); @@ -22,8 +22,7 @@ mod archicture_impl { } } - -#[cfg(not(target="x86_64"))] +#[cfg(not(target = "x86_64"))] mod archicture_impl { use std::sync::{Arc, Mutex}; @@ -47,7 +46,6 @@ mod archicture_impl { pub use self::archicture_impl::Stamper; - #[cfg(test)] mod test { @@ -65,4 +63,4 @@ mod test { assert_eq!(stamper.stamp(), 10u64); assert_eq!(stamper_clone.stamp(), 11u64); } -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 15a7b0a91..3e57b9ead 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,7 @@ #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")] #![cfg_attr(feature = "cargo-clippy", allow(module_inception))] #![cfg_attr(feature = "cargo-clippy", allow(inline_always))] - -#![cfg_attr(all(feature="unstable", test), feature(test))] +#![cfg_attr(all(feature = "unstable", test), feature(test))] #![doc(test(attr(allow(unused_variables), deny(warnings))))] #![allow(unknown_lints)] #![allow(new_without_default)] @@ -123,9 +122,10 @@ extern crate log; #[macro_use] extern crate error_chain; -#[cfg(feature="mmap")] +#[cfg(feature = "mmap")] extern crate atomicwrites; extern crate bit_set; +extern crate bitpacking; extern crate byteorder; extern crate chan; extern crate combine; @@ -145,7 +145,6 @@ extern crate stable_deref_trait; extern crate tempdir; extern crate tempfile; extern crate uuid; -extern crate bitpacking; #[cfg(test)] #[macro_use] @@ -160,7 +159,7 @@ extern crate winapi; #[cfg(test)] extern crate rand; -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] extern crate test; extern crate tinysegmenter; @@ -179,36 +178,36 @@ pub use error::{Error, ErrorKind, ResultExt}; /// Tantivy result. pub type Result = std::result::Result; -mod core; -mod compression; -mod indexer; mod common; +mod compression; +mod core; +mod indexer; +mod datastruct; #[allow(unused_doc_comment)] mod error; pub mod tokenizer; -mod datastruct; -pub mod termdict; -pub mod store; -pub mod query; -pub mod directory; pub mod collector; -pub mod postings; -pub mod schema; +pub mod directory; pub mod fastfield; pub(crate) mod fieldnorm; +pub mod postings; +pub mod query; +pub mod schema; +pub mod store; +pub mod termdict; mod docset; pub use self::docset::{DocSet, SkipResult}; -pub use directory::Directory; -pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta}; -pub use indexer::IndexWriter; -pub use schema::{Document, Term}; -pub use core::{InvertedIndexReader, SegmentReader}; -pub use postings::Postings; pub use core::SegmentComponent; +pub use core::{Index, Searcher, Segment, SegmentId, SegmentMeta}; +pub use core::{InvertedIndexReader, SegmentReader}; +pub use directory::Directory; +pub use indexer::IndexWriter; +pub use postings::Postings; +pub use schema::{Document, Term}; pub use common::{i64_to_u64, u64_to_i64}; @@ -224,10 +223,10 @@ pub fn version() -> &'static str { /// Defines tantivy's merging strategy pub mod merge_policy { - pub use indexer::MergePolicy; - pub use indexer::LogMergePolicy; - pub use indexer::NoMergePolicy; pub use indexer::DefaultMergePolicy; + pub use indexer::LogMergePolicy; + pub use indexer::MergePolicy; + pub use indexer::NoMergePolicy; } /// A `u32` identifying a document within a segment. @@ -276,18 +275,23 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId); mod tests { use collector::tests::TestCollector; - use Index; use core::SegmentReader; - use query::BooleanQuery; - use schema::*; use docset::DocSet; + use query::BooleanQuery; + use rand::distributions::{IndependentSample, Range}; + use rand::{Rng, SeedableRng, XorShiftRng}; + use schema::*; + use Index; use IndexWriter; use Postings; - use rand::{Rng, SeedableRng, XorShiftRng}; - use rand::distributions::{IndependentSample, Range}; pub fn assert_nearly_equals(expected: f32, val: f32) { - assert!(nearly_equals(val, expected), "Got {}, expected {}.", val, expected); + assert!( + nearly_equals(val, expected), + "Got {}, expected {}.", + val, + expected + ); } pub fn nearly_equals(a: f32, b: f32) -> bool { @@ -314,7 +318,7 @@ mod tests { } #[test] - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] fn test_indexing() { let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", TEXT); @@ -440,7 +444,6 @@ mod tests { } } - fn advance_undeleted(docset: &mut DocSet, reader: &SegmentReader) -> bool { while docset.advance() { if !reader.is_deleted(docset.doc()) { diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 555761462..472e9e2e0 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -6,20 +6,19 @@ Postings module (also called inverted index) /// /// Postings, also called inverted lists, is the key datastructure /// to full-text search. - mod postings; -mod recorder; -mod serializer; mod postings_writer; -mod term_info; +mod recorder; mod segment_postings; +mod serializer; +mod term_info; +pub(crate) use self::postings_writer::MultiFieldPostingsWriter; use self::recorder::{NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder}; pub use self::serializer::{FieldSerializer, InvertedIndexSerializer}; -pub(crate) use self::postings_writer::MultiFieldPostingsWriter; -pub use self::term_info::TermInfo; pub use self::postings::Postings; +pub use self::term_info::TermInfo; pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings}; @@ -38,22 +37,22 @@ pub(crate) enum FreqReadingOption { pub mod tests { use super::*; + use core::Index; + use core::SegmentComponent; + use core::SegmentReader; + use datastruct::stacker::Heap; use docset::{DocSet, SkipResult}; + use fieldnorm::FieldNormReader; + use indexer::operation::AddOperation; + use indexer::SegmentWriter; + use query::Scorer; + use rand::{Rng, SeedableRng, XorShiftRng}; + use schema::Field; + use schema::IndexRecordOption; + use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT}; + use std::iter; use DocId; use Score; - use query::Scorer; - use schema::{Document, SchemaBuilder, Term, INT_INDEXED, STRING, TEXT}; - use core::SegmentComponent; - use indexer::SegmentWriter; - use core::SegmentReader; - use core::Index; - use schema::IndexRecordOption; - use std::iter; - use datastruct::stacker::Heap; - use schema::Field; - use indexer::operation::AddOperation; - use rand::{Rng, SeedableRng, XorShiftRng}; - use fieldnorm::FieldNormReader; #[test] pub fn test_position_write() { @@ -124,7 +123,6 @@ pub mod tests { assert_eq!(&[0, 5], &positions[..]); } { - let mut postings = inverted_index .read_postings(&term, IndexRecordOption::WithFreqsAndPositions) .unwrap(); @@ -203,13 +201,14 @@ pub mod tests { { let segment_reader = SegmentReader::open(&segment).unwrap(); { - let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field) ; + let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field); assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5); assert_eq!(fieldnorm_reader.fieldnorm(1), 2); for i in 2..1000 { assert_eq!( fieldnorm_reader.fieldnorm_id(i), - FieldNormReader::fieldnorm_to_id(i + 1) ); + FieldNormReader::fieldnorm_to_id(i + 1) + ); } } { @@ -446,7 +445,7 @@ pub mod tests { // delete everything else { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - index_writer.delete_term(term_1); + index_writer.delete_term(term_1); assert!(index_writer.commit().is_ok()); } @@ -504,7 +503,7 @@ pub mod tests { let posting_list_size = 1_000_000; { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - for _ in 0 .. posting_list_size { + for _ in 0..posting_list_size { let mut doc = Document::default(); if rng.gen_weighted_bool(15) { doc.add_text(text_field, "a"); @@ -595,17 +594,16 @@ pub mod tests { } - -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::{self, Bencher}; - use schema::IndexRecordOption; - use tests; use super::tests::*; use docset::SkipResult; - use DocSet; use query::Intersection; + use schema::IndexRecordOption; + use test::{self, Bencher}; + use tests; + use DocSet; #[bench] fn bench_segment_postings(b: &mut Bencher) { @@ -723,4 +721,4 @@ mod bench { s }); } -} \ No newline at end of file +} diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index f31a5ead7..31fefa205 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -1,21 +1,21 @@ -use DocId; -use schema::Term; -use postings::{FieldSerializer, InvertedIndexSerializer}; -use std::io; -use std::collections::HashMap; -use postings::Recorder; -use Result; -use schema::{Field, Schema}; -use std::marker::PhantomData; -use std::ops::DerefMut; use datastruct::stacker::{Heap, TermHashMap}; +use postings::Recorder; +use postings::UnorderedTermId; +use postings::{FieldSerializer, InvertedIndexSerializer}; use postings::{NothingRecorder, TFAndPositionRecorder, TermFrequencyRecorder}; use schema::FieldEntry; use schema::FieldType; +use schema::IndexRecordOption; +use schema::Term; +use schema::{Field, Schema}; +use std::collections::HashMap; +use std::io; +use std::marker::PhantomData; +use std::ops::DerefMut; use tokenizer::Token; use tokenizer::TokenStream; -use schema::IndexRecordOption; -use postings::UnorderedTermId; +use DocId; +use Result; fn posting_from_field_entry<'a>( field_entry: &FieldEntry, @@ -123,7 +123,8 @@ impl<'a> MultiFieldPostingsWriter<'a> { unordered_term_mappings.insert(field, mapping); let postings_writer = &self.per_field_postings_writers[field.0 as usize]; - let mut field_serializer = serializer.new_field(field, postings_writer.total_num_tokens())?; + let mut field_serializer = + serializer.new_field(field, postings_writer.total_num_tokens())?; postings_writer.serialize( &term_offsets[start..stop], &mut field_serializer, diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index 17aac8055..a8a9a707e 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -1,7 +1,7 @@ -use DocId; -use std::{self, io}; -use postings::FieldSerializer; use datastruct::stacker::{ExpUnrolledLinkedList, Heap, HeapAllocable}; +use postings::FieldSerializer; +use std::{self, io}; +use DocId; const EMPTY_ARRAY: [u32; 0] = [0u32; 0]; const POSITION_END: u32 = std::u32::MAX; diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 5b7cf9216..3f364c4ab 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -2,15 +2,15 @@ use compression::{BlockDecoder, CompressedIntStream, VIntDecoder, COMPRESSION_BL use DocId; use common::BitSet; +use common::CountingWriter; use common::HasLen; -use postings::Postings; -use docset::{DocSet, SkipResult}; -use fst::Streamer; use compression::compressed_block_size; use directory::{ReadOnlySource, SourceRead}; -use postings::FreqReadingOption; +use docset::{DocSet, SkipResult}; +use fst::Streamer; use postings::serializer::PostingsSerializer; -use common::CountingWriter; +use postings::FreqReadingOption; +use postings::Postings; struct PositionComputer { // store the amount of position int @@ -84,9 +84,13 @@ impl SegmentPostings { for &doc in docs { postings_serializer.write_doc(doc, 1u32).unwrap(); } - postings_serializer.close_term().expect("In memory Serialization should never fail."); + postings_serializer + .close_term() + .expect("In memory Serialization should never fail."); } - let (buffer , _) = counting_writer.finish().expect("Serializing in a buffer should never fail."); + let (buffer, _) = counting_writer + .finish() + .expect("Serializing in a buffer should never fail."); let data = ReadOnlySource::from(buffer); let block_segment_postings = BlockSegmentPostings::from_data( docs.len(), @@ -98,7 +102,6 @@ impl SegmentPostings { } impl SegmentPostings { - /// Reads a Segment postings from an &[u8] /// /// * `len` - number of document in the posting lists. @@ -125,7 +128,7 @@ fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usi loop { let new = start + jump; if new >= end { - return (start, end) + return (start, end); } if arr[new] > target { return (start, new); @@ -163,7 +166,8 @@ impl DocSet for SegmentPostings { if self.position_computer.is_some() { let freqs_skipped = &self.block_cursor.freqs()[self.cur..]; let sum_freq: u32 = freqs_skipped.iter().sum(); - self.position_computer.as_mut() + self.position_computer + .as_mut() .unwrap() .add_skip(sum_freq as usize); } @@ -198,7 +202,8 @@ impl DocSet for SegmentPostings { if self.position_computer.is_some() { let freqs_skipped = &self.block_cursor.freqs()[self.cur..start]; let sum_freqs: u32 = freqs_skipped.iter().sum(); - self.position_computer.as_mut() + self.position_computer + .as_mut() .unwrap() .add_skip(sum_freqs as usize); } @@ -211,7 +216,6 @@ impl DocSet for SegmentPostings { } } - // goes to the next element. // next needs to be called a first time to point to the correct element. #[inline] @@ -262,7 +266,6 @@ impl DocSet for SegmentPostings { } } - impl HasLen for SegmentPostings { fn len(&self) -> usize { self.block_cursor.doc_freq() @@ -284,7 +287,10 @@ impl Postings for SegmentPostings { } unsafe { output.set_len(term_freq); - self.position_computer.as_mut().unwrap().positions_with_offset(offset, &mut output[..]) + self.position_computer + .as_mut() + .unwrap() + .positions_with_offset(offset, &mut output[..]) } } else { output.clear(); @@ -473,16 +479,16 @@ impl<'b> Streamer<'b> for BlockSegmentPostings { #[cfg(test)] mod tests { - use docset::DocSet; + use super::BlockSegmentPostings; use super::SegmentPostings; - use schema::SchemaBuilder; + use common::HasLen; use core::Index; - use schema::INT_INDEXED; - use schema::Term; + use docset::DocSet; use fst::Streamer; use schema::IndexRecordOption; - use common::HasLen; - use super::BlockSegmentPostings; + use schema::SchemaBuilder; + use schema::Term; + use schema::INT_INDEXED; #[test] fn test_empty_segment_postings() { @@ -570,4 +576,3 @@ mod tests { assert_eq!(block_segments.docs(), &[1, 3, 5]); } } - diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 62c0f8e7a..35ae8d67a 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -1,19 +1,19 @@ -use Result; use super::TermInfo; +use common::BinarySerializable; +use common::CompositeWrite; +use common::CountingWriter; +use compression::VIntEncoder; +use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE}; +use core::Segment; +use directory::WritePtr; use schema::Field; use schema::FieldEntry; use schema::FieldType; use schema::Schema; -use directory::WritePtr; -use compression::{BlockEncoder, COMPRESSION_BLOCK_SIZE}; -use DocId; -use core::Segment; use std::io::{self, Write}; -use compression::VIntEncoder; -use common::BinarySerializable; -use common::CountingWriter; -use common::CompositeWrite; use termdict::TermDictionaryBuilder; +use DocId; +use Result; /// `PostingsSerializer` is in charge of serializing /// postings on disk, in the @@ -84,7 +84,11 @@ impl InvertedIndexSerializer { /// a given field. /// /// Loads the indexing options for the given field. - pub fn new_field(&mut self, field: Field, total_num_tokens: u64) -> io::Result { + pub fn new_field( + &mut self, + field: Field, + total_num_tokens: u64, + ) -> io::Result { let field_entry: &FieldEntry = self.schema.get_field_entry(field); let term_dictionary_write = self.terms_write.for_field(field); let postings_write = self.postings_write.for_field(field); @@ -124,7 +128,6 @@ impl<'a> FieldSerializer<'a> { postings_write: &'a mut CountingWriter, positions_write: &'a mut CountingWriter, ) -> io::Result> { - let (term_freq_enabled, position_enabled): (bool, bool) = match field_type { FieldType::Str(ref text_options) => { if let Some(text_indexing_options) = text_options.get_indexing_options() { diff --git a/src/query/all_query.rs b/src/query/all_query.rs index 47df20ab7..4e4f2fb55 100644 --- a/src/query/all_query.rs +++ b/src/query/all_query.rs @@ -1,12 +1,12 @@ -use query::Query; -use query::Weight; -use query::Scorer; +use core::Searcher; use core::SegmentReader; use docset::DocSet; +use query::Query; +use query::Scorer; +use query::Weight; +use DocId; use Result; use Score; -use DocId; -use core::Searcher; /// Query that matches all of the documents. /// diff --git a/src/query/bitset/mod.rs b/src/query/bitset/mod.rs index 1743dd602..bfffd7091 100644 --- a/src/query/bitset/mod.rs +++ b/src/query/bitset/mod.rs @@ -1,7 +1,7 @@ use common::{BitSet, TinySet}; -use DocId; use docset::{DocSet, SkipResult}; use std::cmp::Ordering; +use DocId; /// A `BitSetDocSet` makes it possible to iterate through a bitset as if it was a `DocSet`. /// @@ -120,10 +120,10 @@ impl DocSet for BitSetDocSet { #[cfg(test)] mod tests { - use DocId; + use super::BitSetDocSet; use common::BitSet; use docset::{DocSet, SkipResult}; - use super::BitSetDocSet; + use DocId; fn create_docbitset(docs: &[DocId], max_doc: DocId) -> BitSetDocSet { let mut docset = BitSet::with_max_value(max_doc); @@ -219,14 +219,13 @@ mod tests { } - -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use tests; - use test; use super::BitSet; use super::BitSetDocSet; + use test; + use tests; use DocSet; #[bench] @@ -264,4 +263,4 @@ mod bench { while docset.advance() {} }); } -} \ No newline at end of file +} diff --git a/src/query/bm25.rs b/src/query/bm25.rs index 8f7aca577..1fc6087ed 100644 --- a/src/query/bm25.rs +++ b/src/query/bm25.rs @@ -1,7 +1,7 @@ use fieldnorm::FieldNormReader; -use Term; -use Searcher; use Score; +use Searcher; +use Term; const K1: f32 = 1.2; const B: f32 = 0.75; @@ -11,7 +11,6 @@ fn idf(doc_freq: u64, doc_count: u64) -> f32 { (1f32 + x).ln() } - fn cached_tf_component(fieldnorm: u32, average_fieldnorm: f32) -> f32 { K1 * (1f32 - B + B * fieldnorm as f32 / average_fieldnorm) } @@ -32,11 +31,10 @@ pub struct BM25Weight { } impl BM25Weight { - pub fn null() -> BM25Weight { BM25Weight { weight: 0f32, - cache: [1f32; 256] + cache: [1f32; 256], } } @@ -44,7 +42,11 @@ impl BM25Weight { assert!(!terms.is_empty(), "BM25 requires at least one term"); let field = terms[0].field(); for term in &terms[1..] { - assert_eq!(term.field(), field, "All terms must belong to the same field."); + assert_eq!( + term.field(), + field, + "All terms must belong to the same field." + ); } let mut total_num_tokens = 0u64; @@ -56,7 +58,8 @@ impl BM25Weight { } let average_fieldnorm = total_num_tokens as f32 / total_num_docs as f32; - let idf = terms.iter() + let idf = terms + .iter() .map(|term| { let term_doc_freq = searcher.doc_freq(term); idf(term_doc_freq, total_num_docs) @@ -83,12 +86,12 @@ impl BM25Weight { #[cfg(test)] mod tests { - use tests::assert_nearly_equals; use super::idf; + use tests::assert_nearly_equals; #[test] fn test_idf() { - assert_nearly_equals(idf(1, 2), 0.6931472); + assert_nearly_equals(idf(1, 2), 0.6931472); } } diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index e333fafaf..802ba0614 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -1,12 +1,12 @@ -use Result; use super::boolean_weight::BooleanWeight; -use query::Weight; -use Searcher; -use query::Query; -use schema::Term; -use query::TermQuery; -use schema::IndexRecordOption; use query::Occur; +use query::Query; +use query::TermQuery; +use query::Weight; +use schema::IndexRecordOption; +use schema::Term; +use Result; +use Searcher; /// The boolean query combines a set of queries /// @@ -48,7 +48,8 @@ impl BooleanQuery { let occur_term_queries: Vec<(Occur, Box)> = terms .into_iter() .map(|term| { - let term_query: Box = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)); + let term_query: Box = + Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)); (Occur::Should, term_query) }) .collect(); diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index fd16fc1da..575bc2991 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -1,19 +1,18 @@ -use query::Weight; use core::SegmentReader; -use query::Union; -use std::collections::HashMap; -use query::EmptyScorer; -use query::Scorer; use downcast::Downcast; -use std::borrow::Borrow; +use query::intersect_scorers; +use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner}; +use query::term_query::TermScorer; +use query::EmptyScorer; use query::Exclude; use query::Occur; use query::RequiredOptionalScorer; -use query::score_combiner::{DoNothingCombiner, ScoreCombiner, SumWithCoordsCombiner}; +use query::Scorer; +use query::Union; +use query::Weight; +use std::borrow::Borrow; +use std::collections::HashMap; use Result; -use query::intersect_scorers; -use query::term_query::TermScorer; - fn scorer_union(scorers: Vec>) -> Box where @@ -41,7 +40,6 @@ where let scorer: Box = Box::new(Union::<_, TScoreCombiner>::from(scorers)); return scorer; - } pub struct BooleanWeight { @@ -78,9 +76,9 @@ impl BooleanWeight { .remove(&Occur::MustNot) .map(scorer_union::); - let must_scorer_opt: Option> = - per_occur_scorers.remove(&Occur::Must) - .map(intersect_scorers); + let must_scorer_opt: Option> = per_occur_scorers + .remove(&Occur::Must) + .map(intersect_scorers); let positive_scorer: Box = match (should_scorer_opt, must_scorer_opt) { (Some(should_scorer), Some(must_scorer)) => { diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index ea08df98d..5d72406a0 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -7,19 +7,19 @@ pub use self::boolean_query::BooleanQuery; mod tests { use super::*; - use query::Occur; - use query::Query; - use query::TermQuery; - use query::Intersection; - use query::Scorer; use collector::tests::TestCollector; - use Index; use downcast::Downcast; - use schema::*; - use query::QueryParser; - use query::RequiredOptionalScorer; use query::score_combiner::SumWithCoordsCombiner; use query::term_query::TermScorer; + use query::Intersection; + use query::Occur; + use query::Query; + use query::QueryParser; + use query::RequiredOptionalScorer; + use query::Scorer; + use query::TermQuery; + use schema::*; + use Index; fn aux_test_helper() -> (Index, Field) { let mut schema_builder = SchemaBuilder::default(); @@ -171,7 +171,6 @@ mod tests { } } - #[test] pub fn test_intersection_score() { let (index, text_field) = aux_test_helper(); @@ -193,7 +192,10 @@ mod tests { }; { - let boolean_query = BooleanQuery::from(vec![(Occur::Must, make_term_query("a")), (Occur::Must, make_term_query("b"))]); + let boolean_query = BooleanQuery::from(vec![ + (Occur::Must, make_term_query("a")), + (Occur::Must, make_term_query("b")), + ]); assert_eq!(score_docs(&boolean_query), vec![0.977973, 0.84699446]); } } diff --git a/src/query/exclude.rs b/src/query/exclude.rs index a91ea1e8c..4fcb71b72 100644 --- a/src/query/exclude.rs +++ b/src/query/exclude.rs @@ -1,7 +1,7 @@ -use query::Scorer; use docset::{DocSet, SkipResult}; -use Score; +use query::Scorer; use DocId; +use Score; #[derive(Clone, Copy, Debug)] enum State { @@ -129,10 +129,10 @@ where #[cfg(test)] mod tests { - use tests::sample_with_seed; - use postings::tests::test_skip_against_unoptimized; use super::*; + use postings::tests::test_skip_against_unoptimized; use query::VecDocSet; + use tests::sample_with_seed; #[test] fn test_exclude() { diff --git a/src/query/intersection.rs b/src/query/intersection.rs index d1954cb30..e19d479ce 100644 --- a/src/query/intersection.rs +++ b/src/query/intersection.rs @@ -1,11 +1,11 @@ use docset::{DocSet, SkipResult}; -use query::Scorer; -use query::EmptyScorer; -use DocId; use downcast::Downcast; -use std::borrow::Borrow; -use Score; use query::term_query::TermScorer; +use query::EmptyScorer; +use query::Scorer; +use std::borrow::Borrow; +use DocId; +use Score; /// Returns the intersection scorer. /// @@ -36,27 +36,29 @@ pub fn intersect_scorers(mut scorers: Vec>) -> Box { left, right, others: scorers, - num_docsets - }) + num_docsets, + }); } } return Box::new(Intersection { left, right, others: scorers, - num_docsets - }) + num_docsets, + }); + } + _ => { + unreachable!(); } - _ => { unreachable!(); } } } /// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. -pub struct Intersection> { +pub struct Intersection> { left: TDocSet, right: TDocSet, others: Vec, - num_docsets: usize + num_docsets: usize, } impl Intersection { @@ -71,18 +73,17 @@ impl Intersection { left, right, others: docsets, - num_docsets + num_docsets, } } } - impl Intersection { pub(crate) fn docset_mut_specialized(&mut self, ord: usize) -> &mut TDocSet { match ord { 0 => &mut self.left, 1 => &mut self.right, - n => &mut self.others[n - 2] + n => &mut self.others[n - 2], } } } @@ -92,7 +93,7 @@ impl Intersection match ord { 0 => &mut self.left, 1 => &mut self.right, - n => &mut self.others[n - 2] + n => &mut self.others[n - 2], } } } @@ -114,23 +115,30 @@ impl DocSet for Intersection { break; } + SkipResult::Reached => { + break; + } SkipResult::OverStep => { candidate = right.doc(); other_candidate_ord = usize::max_value(); } - SkipResult::End => { return false; } + SkipResult::End => { + return false; + } } match left.skip_next(candidate) { - SkipResult::Reached => { break; } + SkipResult::Reached => { + break; + } SkipResult::OverStep => { candidate = left.doc(); other_candidate_ord = usize::max_value(); } - SkipResult::End => { return false; } + SkipResult::End => { + return false; + } } - } // test the remaining scorers; for (ord, docset) in self.others.iter_mut().enumerate() { @@ -147,16 +155,22 @@ impl DocSet for Intersection { other_candidate_ord = ord; } + SkipResult::Reached => { + other_candidate_ord = ord; + } SkipResult::OverStep => { candidate = left.doc(); other_candidate_ord = usize::max_value(); } - SkipResult::End => { return false; } + SkipResult::End => { + return false; + } } continue 'outer; } - SkipResult::End => { return false; } + SkipResult::End => { + return false; + } } } } @@ -164,9 +178,7 @@ impl DocSet for Intersection SkipResult { - // We optimize skipping by skipping every single member // of the intersection to target. let mut current_target: DocId = target; @@ -211,18 +223,22 @@ impl DocSet for Intersection Scorer for Intersection -where TScorer: Scorer, TOtherScorer: Scorer { +where + TScorer: Scorer, + TOtherScorer: Scorer, +{ fn score(&mut self) -> Score { - self.left.score() + self.right.score() + self.others.iter_mut().map(Scorer::score).sum::() + self.left.score() + self.right.score() + + self.others.iter_mut().map(Scorer::score).sum::() } } #[cfg(test)] mod tests { - use docset::{DocSet, SkipResult}; use super::Intersection; - use query::VecDocSet; + use docset::{DocSet, SkipResult}; use postings::tests::test_skip_against_unoptimized; + use query::VecDocSet; #[test] fn test_intersection() { diff --git a/src/query/mod.rs b/src/query/mod.rs index e0ada1b17..68e5343f9 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -2,22 +2,22 @@ Query */ -mod query; -mod boolean_query; -mod scorer; -mod occur; -mod weight; -mod term_query; -mod query_parser; -mod phrase_query; mod all_query; mod bitset; -mod range_query; -mod exclude; -mod union; -mod intersection; -mod reqopt_scorer; mod bm25; +mod boolean_query; +mod exclude; +mod intersection; +mod occur; +mod phrase_query; +mod query; +mod query_parser; +mod range_query; +mod reqopt_scorer; +mod scorer; +mod term_query; +mod union; +mod weight; #[cfg(test)] mod vec_docset; @@ -30,20 +30,20 @@ pub use self::union::Union; #[cfg(test)] pub use self::vec_docset::VecDocSet; -pub use self::reqopt_scorer::RequiredOptionalScorer; -pub use self::exclude::Exclude; +pub use self::all_query::{AllQuery, AllScorer, AllWeight}; pub use self::bitset::BitSetDocSet; pub use self::boolean_query::BooleanQuery; +pub use self::exclude::Exclude; +pub use self::intersection::intersect_scorers; pub use self::occur::Occur; pub use self::phrase_query::PhraseQuery; -pub use self::query_parser::QueryParserError; -pub use self::query_parser::QueryParser; pub use self::query::Query; +pub use self::query_parser::QueryParser; +pub use self::query_parser::QueryParserError; +pub use self::range_query::RangeQuery; +pub use self::reqopt_scorer::RequiredOptionalScorer; +pub use self::scorer::ConstScorer; pub use self::scorer::EmptyScorer; pub use self::scorer::Scorer; pub use self::term_query::TermQuery; pub use self::weight::Weight; -pub use self::all_query::{AllQuery, AllScorer, AllWeight}; -pub use self::range_query::RangeQuery; -pub use self::scorer::ConstScorer; -pub use self::intersection::intersect_scorers; diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index 911e63939..b384c7a5c 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -1,20 +1,20 @@ mod phrase_query; -mod phrase_weight; mod phrase_scorer; +mod phrase_weight; pub use self::phrase_query::PhraseQuery; -pub use self::phrase_weight::PhraseWeight; pub use self::phrase_scorer::PhraseScorer; +pub use self::phrase_weight::PhraseWeight; #[cfg(test)] mod tests { use super::*; - use core::Index; - use schema::{SchemaBuilder, Term, TEXT}; use collector::tests::TestCollector; - use tests::assert_nearly_equals; + use core::Index; use error::ErrorKind; + use schema::{SchemaBuilder, Term, TEXT}; + use tests::assert_nearly_equals; fn create_index(texts: &[&'static str]) -> Index { let mut schema_builder = SchemaBuilder::default(); @@ -40,7 +40,7 @@ mod tests { "a b b d c g c", "a b a b c", "c a b a d ga a", - "a b c" + "a b c", ]); let schema = index.schema(); let text_field = schema.get_field("text").unwrap(); @@ -68,13 +68,14 @@ mod tests { #[test] pub fn test_phrase_query_no_positions() { let mut schema_builder = SchemaBuilder::default(); - use schema::TextOptions; - use schema::TextFieldIndexing; use schema::IndexRecordOption; - let no_positions = TextOptions::default() - .set_indexing_options(TextFieldIndexing::default() + use schema::TextFieldIndexing; + use schema::TextOptions; + let no_positions = TextOptions::default().set_indexing_options( + TextFieldIndexing::default() .set_tokenizer("default") - .set_index_option(IndexRecordOption::WithFreqs)); + .set_index_option(IndexRecordOption::WithFreqs), + ); let text_field = schema_builder.add_text_field("text", no_positions); let schema = schema_builder.build(); @@ -88,11 +89,18 @@ mod tests { let searcher = index.searcher(); let phrase_query = PhraseQuery::new(vec![ Term::from_field_text(text_field, "a"), - Term::from_field_text(text_field, "b") + Term::from_field_text(text_field, "b"), ]); let mut test_collector = TestCollector::default(); - if let &ErrorKind::SchemaError(ref msg) = searcher.search(&phrase_query, &mut test_collector).unwrap_err().kind() { - assert_eq!("Applied phrase query on field \"text\", which does not have positions indexed", msg.as_str()); + if let &ErrorKind::SchemaError(ref msg) = searcher + .search(&phrase_query, &mut test_collector) + .unwrap_err() + .kind() + { + assert_eq!( + "Applied phrase query on field \"text\", which does not have positions indexed", + msg.as_str() + ); } else { panic!("Should have returned an error"); } @@ -120,7 +128,6 @@ mod tests { let scores = test_query(vec!["a", "b"]); assert_nearly_equals(scores[0], 0.40618482); assert_nearly_equals(scores[1], 0.46844664); - } #[test] // motivated by #234 diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index 4a4d3fb22..1f12d9ce9 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -1,11 +1,11 @@ -use schema::{Field, Term}; -use query::Query; -use core::searcher::Searcher; use super::PhraseWeight; -use query::Weight; -use Result; -use query::bm25::BM25Weight; +use core::searcher::Searcher; use error::ErrorKind; +use query::bm25::BM25Weight; +use query::Query; +use query::Weight; +use schema::{Field, Term}; +use Result; /// `PhraseQuery` matches a specific sequence of words. /// @@ -28,18 +28,23 @@ pub struct PhraseQuery { } impl PhraseQuery { - /// Creates a new `PhraseQuery` given a list of terms. /// /// There must be at least two terms, and all terms /// must belong to the same field. pub fn new(terms: Vec) -> PhraseQuery { - assert!(terms.len() > 1, "A phrase query is required to have strictly more than one term."); + assert!( + terms.len() > 1, + "A phrase query is required to have strictly more than one term." + ); let field = terms[0].field(); - assert!(terms[1..].iter().all(|term| term.field() == field), "All terms from a phrase query must belong to the same field"); + assert!( + terms[1..].iter().all(|term| term.field() == field), + "All terms from a phrase query must belong to the same field" + ); PhraseQuery { field, - phrase_terms: terms + phrase_terms: terms, } } } @@ -50,26 +55,29 @@ impl Query for PhraseQuery { /// See [`Weight`](./trait.Weight.html). fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result> { let schema = searcher.schema(); - let field_entry= schema.get_field_entry(self.field); - let has_positions = field_entry.field_type().get_index_record_option() + let field_entry = schema.get_field_entry(self.field); + let has_positions = field_entry + .field_type() + .get_index_record_option() .map(|index_record_option| index_record_option.has_positions()) .unwrap_or(false); if !has_positions { let field_name = field_entry.name(); - bail!(ErrorKind::SchemaError(format!("Applied phrase query on field {:?}, which does not have positions indexed", - field_name))) + bail!(ErrorKind::SchemaError(format!( + "Applied phrase query on field {:?}, which does not have positions indexed", + field_name + ))) } let terms = self.phrase_terms.clone(); if scoring_enabled { let bm25_weight = BM25Weight::for_terms(searcher, &terms); + Ok(Box::new(PhraseWeight::new(terms, bm25_weight, true))) + } else { Ok(Box::new(PhraseWeight::new( terms, - bm25_weight, - true + BM25Weight::null(), + false, ))) - } else { - Ok(Box::new(PhraseWeight::new(terms, BM25Weight::null(), false))) } - } } diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index 830626a8a..90590a0b8 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -1,20 +1,20 @@ -use DocId; use docset::{DocSet, SkipResult}; -use postings::Postings; -use query::{Intersection, Scorer}; -use query::bm25::BM25Weight; use fieldnorm::FieldNormReader; +use postings::Postings; +use query::bm25::BM25Weight; +use query::{Intersection, Scorer}; +use DocId; struct PostingsWithOffset { offset: u32, - postings: TPostings + postings: TPostings, } impl PostingsWithOffset { pub fn new(segment_postings: TPostings, offset: u32) -> PostingsWithOffset { PostingsWithOffset { offset, - postings: segment_postings + postings: segment_postings, } } @@ -49,10 +49,9 @@ pub struct PhraseScorer { phrase_count: u32, fieldnorm_reader: FieldNormReader, similarity_weight: BM25Weight, - score_needed: bool + score_needed: bool, } - /// Returns true iff the two sorted array contain a common element fn intersection_exists(left: &[u32], right: &[u32]) -> bool { let mut left_i = 0; @@ -118,18 +117,20 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize { count } - impl PhraseScorer { - - pub fn new(term_postings: Vec, - similarity_weight: BM25Weight, - fieldnorm_reader: FieldNormReader, - score_needed: bool) -> PhraseScorer { + pub fn new( + term_postings: Vec, + similarity_weight: BM25Weight, + fieldnorm_reader: FieldNormReader, + score_needed: bool, + ) -> PhraseScorer { let num_docsets = term_postings.len(); let postings_with_offsets = term_postings .into_iter() .enumerate() - .map(|(offset, postings)| PostingsWithOffset::new(postings, (num_docsets - offset) as u32)) + .map(|(offset, postings)| { + PostingsWithOffset::new(postings, (num_docsets - offset) as u32) + }) .collect::>(); PhraseScorer { intersection_docset: Intersection::new(postings_with_offsets), @@ -153,7 +154,6 @@ impl PhraseScorer { } } - fn phrase_exists(&mut self) -> bool { { self.intersection_docset @@ -163,7 +163,9 @@ impl PhraseScorer { let mut intersection_len = self.left.len(); for i in 1..self.num_docsets - 1 { { - self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right); + self.intersection_docset + .docset_mut_specialized(i) + .positions(&mut self.right); } intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]); if intersection_len == 0 { @@ -171,7 +173,9 @@ impl PhraseScorer { } } - self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right); + self.intersection_docset + .docset_mut_specialized(self.num_docsets - 1) + .positions(&mut self.right); intersection_exists(&self.left[..intersection_len], &self.right[..]) } @@ -184,7 +188,9 @@ impl PhraseScorer { let mut intersection_len = self.left.len(); for i in 1..self.num_docsets - 1 { { - self.intersection_docset.docset_mut_specialized(i).positions(&mut self.right); + self.intersection_docset + .docset_mut_specialized(i) + .positions(&mut self.right); } intersection_len = intersection(&mut self.left[..intersection_len], &self.right[..]); if intersection_len == 0 { @@ -192,7 +198,9 @@ impl PhraseScorer { } } - self.intersection_docset.docset_mut_specialized(self.num_docsets - 1).positions(&mut self.right); + self.intersection_docset + .docset_mut_specialized(self.num_docsets - 1) + .positions(&mut self.right); intersection_count(&self.left[..intersection_len], &self.right[..]) as u32 } } @@ -238,15 +246,15 @@ impl Scorer for PhraseScorer { fn score(&mut self) -> f32 { let doc = self.doc(); let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc); - self.similarity_weight.score(fieldnorm_id, self.phrase_count) + self.similarity_weight + .score(fieldnorm_id, self.phrase_count) } } #[cfg(test)] mod tests { - use super::{intersection_count, intersection}; - + use super::{intersection, intersection_count}; fn test_intersection_sym(left: &[u32], right: &[u32], expected: &[u32]) { test_intersection_aux(left, right, expected); @@ -271,12 +279,11 @@ mod tests { } } - -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { + use super::{intersection, intersection_count}; use test::Bencher; - use super::{intersection_count, intersection}; #[bench] fn bench_intersection_short(b: &mut Bencher) { @@ -287,7 +294,6 @@ mod bench { }); } - #[bench] fn bench_intersection_count_short(b: &mut Bencher) { b.iter(|| { @@ -296,4 +302,4 @@ mod bench { intersection_count(&left, &right); }); } -} \ No newline at end of file +} diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index 093ccea8c..9f5a5c243 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -1,12 +1,12 @@ -use query::Weight; -use query::Scorer; -use schema::Term; -use schema::IndexRecordOption; -use core::SegmentReader; use super::PhraseScorer; -use query::EmptyScorer; -use Result; +use core::SegmentReader; use query::bm25::BM25Weight; +use query::EmptyScorer; +use query::Scorer; +use query::Weight; +use schema::IndexRecordOption; +use schema::Term; +use Result; pub struct PhraseWeight { phrase_terms: Vec, @@ -16,13 +16,15 @@ pub struct PhraseWeight { impl PhraseWeight { /// Creates a new phrase weight. - pub fn new(phrase_terms: Vec, - similarity_weight: BM25Weight, - score_needed: bool) -> PhraseWeight { + pub fn new( + phrase_terms: Vec, + similarity_weight: BM25Weight, + score_needed: bool, + ) -> PhraseWeight { PhraseWeight { phrase_terms, similarity_weight, - score_needed + score_needed, } } } @@ -37,25 +39,37 @@ impl Weight for PhraseWeight { for term in &self.phrase_terms { if let Some(postings) = reader .inverted_index(term.field()) - .read_postings(term, IndexRecordOption::WithFreqsAndPositions) { + .read_postings(term, IndexRecordOption::WithFreqsAndPositions) + { term_postings_list.push(postings); } else { return Ok(Box::new(EmptyScorer)); } } - Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed))) + Ok(Box::new(PhraseScorer::new( + term_postings_list, + similarity_weight, + fieldnorm_reader, + self.score_needed, + ))) } else { let mut term_postings_list = Vec::new(); for term in &self.phrase_terms { if let Some(postings) = reader .inverted_index(term.field()) - .read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions) { + .read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions) + { term_postings_list.push(postings); } else { return Ok(Box::new(EmptyScorer)); } } - Ok(Box::new(PhraseScorer::new(term_postings_list, similarity_weight, fieldnorm_reader, self.score_needed))) + Ok(Box::new(PhraseScorer::new( + term_postings_list, + similarity_weight, + fieldnorm_reader, + self.score_needed, + ))) } } } diff --git a/src/query/query.rs b/src/query/query.rs index 8143553f4..d59d158f2 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -1,9 +1,9 @@ -use Result; +use super::Weight; use collector::Collector; use core::searcher::Searcher; -use SegmentLocalId; -use super::Weight; use std::fmt; +use Result; +use SegmentLocalId; /// The `Query` trait defines a set of documents and a scoring method /// for those documents. diff --git a/src/query/query_parser/logical_ast.rs b/src/query/query_parser/logical_ast.rs index ec59f334e..9130cfb56 100644 --- a/src/query/query_parser/logical_ast.rs +++ b/src/query/query_parser/logical_ast.rs @@ -1,6 +1,6 @@ -use std::fmt; -use schema::Term; use query::Occur; +use schema::Term; +use std::fmt; #[derive(Clone)] pub enum LogicalLiteral { diff --git a/src/query/query_parser/mod.rs b/src/query/query_parser/mod.rs index 91bc7d172..2fd58158e 100644 --- a/src/query/query_parser/mod.rs +++ b/src/query/query_parser/mod.rs @@ -1,5 +1,5 @@ -mod query_parser; mod query_grammar; +mod query_parser; mod user_input_ast; pub mod logical_ast; diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 0d057731f..6584abd4e 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -1,6 +1,6 @@ -use combine::*; -use combine::char::*; use super::user_input_ast::*; +use combine::char::*; +use combine::*; fn literal(input: I) -> ParseResult where diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index a935aac45..beef7fcea 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1,18 +1,18 @@ -use schema::{Field, Schema}; -use query::Query; -use query::BooleanQuery; use super::logical_ast::*; -use super::user_input_ast::*; use super::query_grammar::parse_to_ast; +use super::user_input_ast::*; +use core::Index; +use query::BooleanQuery; use query::Occur; +use query::PhraseQuery; +use query::Query; use query::TermQuery; use schema::IndexRecordOption; -use query::PhraseQuery; +use schema::{Field, Schema}; use schema::{FieldType, Term}; +use std::num::ParseIntError; use std::str::FromStr; use tokenizer::TokenizerManager; -use std::num::ParseIntError; -use core::Index; /// Possible error that may happen when parsing a query. #[derive(Debug, PartialEq, Eq)] @@ -179,14 +179,14 @@ impl QueryParser { } FieldType::Str(ref str_options) => { if let Some(option) = str_options.get_indexing_options() { - let mut tokenizer = self.tokenizer_manager - .get(option.tokenizer()) - .ok_or_else(|| { + let mut tokenizer = self.tokenizer_manager.get(option.tokenizer()).ok_or_else( + || { QueryParserError::UnknownTokenizer( field_entry.name().to_string(), option.tokenizer().to_string(), ) - })?; + }, + )?; let mut terms: Vec = Vec::new(); let mut token_stream = tokenizer.token_stream(phrase); token_stream.process(&mut |token| { @@ -207,13 +207,14 @@ impl QueryParser { Ok(Some(LogicalLiteral::Phrase(terms))) } else { let fieldname = self.schema.get_field_name(field).to_string(); - Err(QueryParserError::FieldDoesNotHavePositionsIndexed(fieldname)) + Err(QueryParserError::FieldDoesNotHavePositionsIndexed( + fieldname, + )) } } else { let fieldname = self.schema.get_field_name(field).to_string(); Err(QueryParserError::FieldNotIndexed(fieldname)) } - } } else { // This should have been seen earlier really. @@ -340,16 +341,16 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box { #[cfg(test)] mod test { - use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT}; - use tokenizer::TokenizerManager; + use super::super::logical_ast::*; + use super::QueryParser; + use super::QueryParserError; use query::Query; use schema::Field; use schema::{IndexRecordOption, TextFieldIndexing, TextOptions}; - use super::QueryParser; - use super::QueryParserError; - use Index; + use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT}; use tokenizer::SimpleTokenizer; - use super::super::logical_ast::*; + use tokenizer::TokenizerManager; + use Index; fn make_query_parser() -> QueryParser { let mut schema_builder = SchemaBuilder::default(); diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 599541095..f875acd86 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -1,16 +1,16 @@ -use schema::{Field, IndexRecordOption, Term}; -use query::{Query, Scorer, Weight}; -use termdict::{TermDictionary, TermStreamer}; -use core::SegmentReader; use common::BitSet; -use Result; use core::Searcher; +use core::SegmentReader; +use error::ErrorKind; use query::BitSetDocSet; use query::ConstScorer; -use std::ops::Range; +use query::{Query, Scorer, Weight}; use schema::Type; -use error::ErrorKind; +use schema::{Field, IndexRecordOption, Term}; use std::collections::Bound; +use std::ops::Range; +use termdict::{TermDictionary, TermStreamer}; +use Result; fn map_bound Vec>( bound: Bound, @@ -89,16 +89,16 @@ pub struct RangeQuery { } impl RangeQuery { - /// Creates a new `RangeQuery` over a `i64` field. /// /// If the field is not of the type `i64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_i64( - field: Field, - range: Range - ) -> RangeQuery { - RangeQuery::new_i64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end)) + pub fn new_i64(field: Field, range: Range) -> RangeQuery { + RangeQuery::new_i64_bounds( + field, + Bound::Included(range.start), + Bound::Excluded(range.end), + ) } /// Create a new `RangeQuery` over a `i64` field. @@ -111,7 +111,7 @@ impl RangeQuery { pub fn new_i64_bounds( field: Field, left_bound: Bound, - right_bound: Bound + right_bound: Bound, ) -> RangeQuery { let make_term_val = |val: i64| Term::from_field_i64(field, val).value_bytes().to_owned(); RangeQuery { @@ -132,7 +132,7 @@ impl RangeQuery { pub fn new_u64_bounds( field: Field, left_bound: Bound, - right_bound: Bound + right_bound: Bound, ) -> RangeQuery { let make_term_val = |val: u64| Term::from_field_u64(field, val).value_bytes().to_owned(); RangeQuery { @@ -147,11 +147,12 @@ impl RangeQuery { /// /// If the field is not of the type `u64`, tantivy /// will panic when the `Weight` object is created. - pub fn new_u64( - field: Field, - range: Range - ) -> RangeQuery { - RangeQuery::new_u64_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end)) + pub fn new_u64(field: Field, range: Range) -> RangeQuery { + RangeQuery::new_u64_bounds( + field, + Bound::Included(range.start), + Bound::Excluded(range.end), + ) } /// Create a new `RangeQuery` over a `Str` field. @@ -164,7 +165,7 @@ impl RangeQuery { pub fn new_str_bounds<'b>( field: Field, left: Bound<&'b str>, - right: Bound<&'b str> + right: Bound<&'b str>, ) -> RangeQuery { let make_term_val = |val: &str| val.as_bytes().to_vec(); RangeQuery { @@ -179,11 +180,12 @@ impl RangeQuery { /// /// If the field is not of the type `Str`, tantivy /// will panic when the `Weight` object is created. - pub fn new_str<'b>( - field: Field, - range: Range<&'b str> - ) -> RangeQuery { - RangeQuery::new_str_bounds(field, Bound::Included(range.start), Bound::Excluded(range.end)) + pub fn new_str<'b>(field: Field, range: Range<&'b str>) -> RangeQuery { + RangeQuery::new_str_bounds( + field, + Bound::Included(range.start), + Bound::Excluded(range.end), + ) } } @@ -256,13 +258,13 @@ impl Weight for RangeWeight { #[cfg(test)] mod tests { - use Index; - use schema::{Document, Field, SchemaBuilder, INT_INDEXED}; - use collector::CountCollector; - use std::collections::Bound; - use query::Query; - use Result; use super::RangeQuery; + use collector::CountCollector; + use query::Query; + use schema::{Document, Field, SchemaBuilder, INT_INDEXED}; + use std::collections::Bound; + use Index; + use Result; #[test] fn test_range_query_simple() { @@ -349,7 +351,14 @@ mod tests { )), 9 ); - assert_eq!(count_multiples(RangeQuery::new_i64_bounds(int_field, Bound::Included(9), Bound::Unbounded)), 91); + assert_eq!( + count_multiples(RangeQuery::new_i64_bounds( + int_field, + Bound::Included(9), + Bound::Unbounded + )), + 91 + ); } } diff --git a/src/query/reqopt_scorer.rs b/src/query/reqopt_scorer.rs index deec7d7aa..16b367804 100644 --- a/src/query/reqopt_scorer.rs +++ b/src/query/reqopt_scorer.rs @@ -1,10 +1,10 @@ -use DocId; -use query::Scorer; -use query::score_combiner::ScoreCombiner; -use Score; use docset::{DocSet, SkipResult}; +use query::score_combiner::ScoreCombiner; +use query::Scorer; use std::cmp::Ordering; use std::marker::PhantomData; +use DocId; +use Score; /// Given a required scorer and an optional scorer /// matches all document from the required scorer @@ -101,14 +101,14 @@ where #[cfg(test)] mod tests { - use tests::sample_with_seed; use super::RequiredOptionalScorer; - use query::VecDocSet; - use query::ConstScorer; use docset::DocSet; use postings::tests::test_skip_against_unoptimized; - use query::Scorer; use query::score_combiner::{DoNothingCombiner, SumCombiner}; + use query::ConstScorer; + use query::Scorer; + use query::VecDocSet; + use tests::sample_with_seed; #[test] fn test_reqopt_scorer_empty() { diff --git a/src/query/score_combiner.rs b/src/query/score_combiner.rs index 1c71c5ede..74c541b37 100644 --- a/src/query/score_combiner.rs +++ b/src/query/score_combiner.rs @@ -1,5 +1,5 @@ -use Score; use query::Scorer; +use Score; /// The `ScoreCombiner` trait defines how to compute /// an overall score given a list of scores. diff --git a/src/query/scorer.rs b/src/query/scorer.rs index 3f9de7ab5..a94b03a5b 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -1,11 +1,11 @@ -use DocId; -use Score; use collector::Collector; -use docset::{DocSet, SkipResult}; use common::BitSet; -use std::ops::DerefMut; +use docset::{DocSet, SkipResult}; use downcast; use fastfield::DeleteBitSet; +use std::ops::DerefMut; +use DocId; +use Score; /// Scored set of documents matching a query within a specific segment. /// @@ -34,7 +34,6 @@ pub trait Scorer: downcast::Any + DocSet + 'static { } } - #[allow(missing_docs)] mod downcast_impl { downcast!(super::Scorer); @@ -61,7 +60,6 @@ impl DocSet for EmptyScorer { false } - fn doc(&self) -> DocId { panic!( "You may not call .doc() on a scorer \ diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 77d4cfb29..e8a865e02 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -1,22 +1,21 @@ mod term_query; -mod term_weight; mod term_scorer; +mod term_weight; pub use self::term_query::TermQuery; -pub use self::term_weight::TermWeight; pub use self::term_scorer::TermScorer; +pub use self::term_weight::TermWeight; #[cfg(test)] mod tests { - use docset::DocSet; - use Index; - use tests::assert_nearly_equals; - use Term; - use schema::{TEXT, STRING, SchemaBuilder, IndexRecordOption}; use collector::TopCollector; - use query::{TermQuery, QueryParser, Query, Scorer}; - + use docset::DocSet; + use query::{Query, QueryParser, Scorer, TermQuery}; + use schema::{IndexRecordOption, SchemaBuilder, STRING, TEXT}; + use tests::assert_nearly_equals; + use Index; + use Term; #[test] pub fn test_term_query_no_freq() { @@ -48,7 +47,6 @@ mod tests { assert_eq!(term_scorer.score(), 0.28768212); } - #[test] pub fn test_term_weight() { let mut schema_builder = SchemaBuilder::new(); @@ -66,7 +64,6 @@ mod tests { )); index_writer.add_document(doc!(left_field => "left4 left1")); index_writer.commit().unwrap(); - } index.load_searchers().unwrap(); let searcher = index.searcher(); diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index ea0e0afbb..854263fd1 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -1,11 +1,11 @@ -use Term; -use Result; use super::term_weight::TermWeight; +use query::bm25::BM25Weight; use query::Query; use query::Weight; use schema::IndexRecordOption; +use Result; use Searcher; -use query::bm25::BM25Weight; +use Term; /// A Term query matches all of the documents /// containing a specific term. @@ -44,11 +44,7 @@ impl TermQuery { } else { IndexRecordOption::Basic }; - TermWeight::new( - self.term.clone(), - index_record_option, - bm25_weight - ) + TermWeight::new(self.term.clone(), index_record_option, bm25_weight) } } @@ -57,4 +53,3 @@ impl Query for TermQuery { Ok(Box::new(self.specialized_weight(searcher, scoring_enabled))) } } - diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 12c73dacf..96b980eb2 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -1,12 +1,12 @@ -use Score; -use DocId; use docset::{DocSet, SkipResult}; use query::Scorer; +use DocId; +use Score; -use postings::Postings; use fieldnorm::FieldNormReader; -use query::bm25::BM25Weight; +use postings::Postings; use postings::SegmentPostings; +use query::bm25::BM25Weight; pub struct TermScorer { postings: SegmentPostings, @@ -14,11 +14,12 @@ pub struct TermScorer { similarity_weight: BM25Weight, } - impl TermScorer { - pub fn new(postings: SegmentPostings, - fieldnorm_reader: FieldNormReader, - similarity_weight: BM25Weight) -> TermScorer { + pub fn new( + postings: SegmentPostings, + fieldnorm_reader: FieldNormReader, + similarity_weight: BM25Weight, + ) -> TermScorer { TermScorer { postings, fieldnorm_reader, @@ -49,7 +50,7 @@ impl Scorer for TermScorer { fn score(&mut self) -> Score { let doc = self.doc(); let fieldnorm_id = self.fieldnorm_reader.fieldnorm_id(doc); - self.similarity_weight.score(fieldnorm_id, self.postings.term_freq()) + self.similarity_weight + .score(fieldnorm_id, self.postings.term_freq()) } } - diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 6cb37e276..ba45a8042 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -1,13 +1,13 @@ -use Term; -use query::Weight; +use super::term_scorer::TermScorer; use core::SegmentReader; -use query::Scorer; use docset::DocSet; use postings::SegmentPostings; -use schema::IndexRecordOption; -use super::term_scorer::TermScorer; -use Result; use query::bm25::BM25Weight; +use query::Scorer; +use query::Weight; +use schema::IndexRecordOption; +use Result; +use Term; pub struct TermWeight { term: Term, @@ -16,24 +16,26 @@ pub struct TermWeight { } impl Weight for TermWeight { - fn scorer(&self, reader: &SegmentReader) -> Result> { let field = self.term.field(); let inverted_index = reader.inverted_index(field); let fieldnorm_reader = reader.get_fieldnorms_reader(field); let similarity_weight = self.similarity_weight.clone(); - let postings_opt: Option = - inverted_index.read_postings(&self.term, self.index_record_option); - if let Some(segment_postings) = postings_opt { - Ok(Box::new(TermScorer::new(segment_postings, - fieldnorm_reader, - similarity_weight))) - } else { - Ok(Box::new(TermScorer::new( - SegmentPostings::empty(), - fieldnorm_reader, - similarity_weight))) - } + let postings_opt: Option = + inverted_index.read_postings(&self.term, self.index_record_option); + if let Some(segment_postings) = postings_opt { + Ok(Box::new(TermScorer::new( + segment_postings, + fieldnorm_reader, + similarity_weight, + ))) + } else { + Ok(Box::new(TermScorer::new( + SegmentPostings::empty(), + fieldnorm_reader, + similarity_weight, + ))) + } } fn count(&self, reader: &SegmentReader) -> Result { @@ -50,12 +52,12 @@ impl Weight for TermWeight { } } - impl TermWeight { - - pub fn new(term: Term, - index_record_option: IndexRecordOption, - similarity_weight: BM25Weight) -> TermWeight { + pub fn new( + term: Term, + index_record_option: IndexRecordOption, + similarity_weight: BM25Weight, + ) -> TermWeight { TermWeight { term, index_record_option, @@ -63,4 +65,3 @@ impl TermWeight { } } } - diff --git a/src/query/union.rs b/src/query/union.rs index 350bee955..0e7baba25 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -1,22 +1,23 @@ -use docset::{DocSet, SkipResult}; -use query::Scorer; use common::TinySet; +use docset::{DocSet, SkipResult}; +use query::score_combiner::{DoNothingCombiner, ScoreCombiner}; +use query::Scorer; use std::cmp::Ordering; use DocId; use Score; -use query::score_combiner::{DoNothingCombiner, ScoreCombiner}; const HORIZON_NUM_TINYBITSETS: usize = 64; const HORIZON: u32 = 64u32 * HORIZON_NUM_TINYBITSETS as u32; - // `drain_filter` is not stable yet. // This function is similar except that it does is not unsafe, and // it does not keep the original vector ordering. // // Also, it does not "yield" any elements. fn unordered_drain_filter(v: &mut Vec, mut predicate: P) - where P: FnMut(&mut T) -> bool { +where + P: FnMut(&mut T) -> bool, +{ let mut i = 0; while i < v.len() { if predicate(&mut v[i]) { @@ -262,15 +263,15 @@ where mod tests { use super::Union; - use tests; - use DocId; - use std::collections::BTreeSet; use super::HORIZON; use docset::{DocSet, SkipResult}; use postings::tests::test_skip_against_unoptimized; - use query::VecDocSet; - use query::ConstScorer; use query::score_combiner::DoNothingCombiner; + use query::ConstScorer; + use query::VecDocSet; + use std::collections::BTreeSet; + use tests; + use DocId; fn aux_test_union(vals: Vec>) { let mut val_set: BTreeSet = BTreeSet::new(); @@ -406,19 +407,18 @@ mod tests { ); } - } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::Bencher; - use tests; use query::score_combiner::DoNothingCombiner; - use DocId; use query::ConstScorer; use query::Union; use query::VecDocSet; + use test::Bencher; + use tests; + use DocId; use DocSet; #[bench] diff --git a/src/query/vec_docset.rs b/src/query/vec_docset.rs index 01ca081be..cc3f991da 100644 --- a/src/query/vec_docset.rs +++ b/src/query/vec_docset.rs @@ -1,9 +1,9 @@ #![allow(dead_code)] -use DocId; -use docset::DocSet; use common::HasLen; +use docset::DocSet; use std::num::Wrapping; +use DocId; const EMPTY_ARRAY: [u32; 0] = []; @@ -51,8 +51,8 @@ impl HasLen for VecDocSet { pub mod tests { use super::*; - use DocId; use docset::{DocSet, SkipResult}; + use DocId; #[test] pub fn test_vec_postings() { diff --git a/src/query/weight.rs b/src/query/weight.rs index 44a4860e2..d3d8b3520 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -1,6 +1,6 @@ use super::Scorer; -use Result; use core::SegmentReader; +use Result; /// A Weight is the specialization of a Query /// for a given set of segments. diff --git a/src/schema/document.rs b/src/schema/document.rs index 82a07f184..4f32c7d97 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -1,8 +1,8 @@ use super::*; -use itertools::Itertools; -use common::VInt; -use std::io::{self, Read, Write}; use common::BinarySerializable; +use common::VInt; +use itertools::Itertools; +use std::io::{self, Read, Write}; /// Tantivy's Document is the object that can /// be indexed and then searched for. diff --git a/src/schema/facet.rs b/src/schema/facet.rs index 061848df4..bda6bb59d 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -1,11 +1,11 @@ -use std::fmt::{self, Debug, Display, Formatter}; -use std::str; -use std::io::{self, Read, Write}; -use regex::Regex; -use std::borrow::Borrow; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::Cow; use common::BinarySerializable; +use regex::Regex; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Borrow; +use std::borrow::Cow; +use std::fmt::{self, Debug, Display, Formatter}; +use std::io::{self, Read, Write}; +use std::str; const SLASH_BYTE: u8 = b'/'; const ESCAPE_BYTE: u8 = b'\\'; diff --git a/src/schema/field.rs b/src/schema/field.rs index 6c2ce8316..557bf36e4 100644 --- a/src/schema/field.rs +++ b/src/schema/field.rs @@ -1,7 +1,7 @@ -use std::io; -use std::io::Write; -use std::io::Read; use common::BinarySerializable; +use std::io; +use std::io::Read; +use std::io::Write; /// `Field` is actually a `u8` identifying a `Field` /// The schema is in charge of holding mapping between field names diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index f120375bd..4e6cf43f9 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -1,11 +1,11 @@ -use schema::TextOptions; use schema::IntOptions; +use schema::TextOptions; -use std::fmt; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde::ser::SerializeStruct; -use serde::de::{self, MapAccess, Visitor}; use schema::FieldType; +use serde::de::{self, MapAccess, Visitor}; +use serde::ser::SerializeStruct; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt; /// A `FieldEntry` represents a field and its configuration. /// `Schema` are a collection of `FieldEntry` diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 8a3a4b354..692e3b895 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -1,9 +1,9 @@ use schema::{IntOptions, TextOptions}; -use serde_json::Value as JsonValue; -use schema::Value; -use schema::IndexRecordOption; use schema::Facet; +use schema::IndexRecordOption; +use schema::Value; +use serde_json::Value as JsonValue; /// Possible error that may occur while parsing a field value /// At this point the JSON is known to be valid. @@ -30,7 +30,7 @@ pub enum Type { /// `i64` I64, /// `tantivy::schema::Facet`. Passed as a string in JSON. - HierarchicalFacet + HierarchicalFacet, } /// A `FieldType` describes the type (text, u64) of a field as well as @@ -48,18 +48,13 @@ pub enum FieldType { } impl FieldType { - /// Returns the value type associated for this field. pub fn value_type(&self) -> Type { match *self { - FieldType::Str(_) => - Type::Str, - FieldType::U64(_) => - Type::U64, - FieldType::I64(_) => - Type::I64, - FieldType::HierarchicalFacet => - Type::HierarchicalFacet, + FieldType::Str(_) => Type::Str, + FieldType::U64(_) => Type::U64, + FieldType::I64(_) => Type::I64, + FieldType::HierarchicalFacet => Type::HierarchicalFacet, } } diff --git a/src/schema/field_value.rs b/src/schema/field_value.rs index 7bdf9b75e..eb10c7e6e 100644 --- a/src/schema/field_value.rs +++ b/src/schema/field_value.rs @@ -1,9 +1,9 @@ -use std::io; use common::BinarySerializable; -use std::io::Read; -use std::io::Write; use schema::Field; use schema::Value; +use std::io; +use std::io::Read; +use std::io::Write; /// `FieldValue` holds together a `Field` and its `Value`. #[derive(Debug, Clone, Ord, PartialEq, Eq, PartialOrd, Serialize, Deserialize)] diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 7f3f637b1..1570f0af0 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -100,26 +100,26 @@ the field is required during scoring or collection for instance. */ -mod schema; -mod term; mod document; mod facet; +mod schema; +mod term; -mod field_type; mod field_entry; +mod field_type; mod field_value; -mod text_options; -mod int_options; mod field; -mod value; -mod named_field_document; mod index_record_option; +mod int_options; +mod named_field_document; +mod text_options; +mod value; pub use self::named_field_document::NamedFieldDocument; +pub use self::schema::DocParsingError; pub use self::schema::{Schema, SchemaBuilder}; pub use self::value::Value; -pub use self::schema::DocParsingError; pub use self::facet::Facet; pub use self::facet::FACET_SEP_BYTE; @@ -128,22 +128,22 @@ pub use self::document::Document; pub use self::field::Field; pub use self::term::Term; -pub use self::field_type::{Type, FieldType}; pub use self::field_entry::FieldEntry; +pub use self::field_type::{FieldType, Type}; pub use self::field_value::FieldValue; -pub use self::text_options::TextOptions; pub use self::index_record_option::IndexRecordOption; pub use self::text_options::TextFieldIndexing; -pub use self::text_options::TEXT; -pub use self::text_options::STRING; +pub use self::text_options::TextOptions; pub use self::text_options::STORED; +pub use self::text_options::STRING; +pub use self::text_options::TEXT; +pub use self::int_options::Cardinality; pub use self::int_options::IntOptions; pub use self::int_options::FAST; pub use self::int_options::INT_INDEXED; pub use self::int_options::INT_STORED; -pub use self::int_options::Cardinality; use regex::Regex; diff --git a/src/schema/named_field_document.rs b/src/schema/named_field_document.rs index 0acd9ae47..c971499d5 100644 --- a/src/schema/named_field_document.rs +++ b/src/schema/named_field_document.rs @@ -1,5 +1,5 @@ -use std::collections::BTreeMap; use schema::Value; +use std::collections::BTreeMap; /// Internal representation of a document used for JSON /// serialization. diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 99ddfbd47..c358912a6 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -1,13 +1,13 @@ -use std::collections::HashMap; -use std::collections::BTreeMap; use schema::field_type::ValueParsingError; +use std::collections::BTreeMap; +use std::collections::HashMap; use std::sync::Arc; -use serde_json::{self, Map as JsonObject, Value as JsonValue}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde::ser::SerializeSeq; -use serde::de::{SeqAccess, Visitor}; use super::*; +use serde::de::{SeqAccess, Visitor}; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde_json::{self, Map as JsonObject, Value as JsonValue}; use std::fmt; /// Tantivy has a very strict schema. @@ -305,10 +305,10 @@ pub enum DocParsingError { #[cfg(test)] mod tests { - use schema::*; - use serde_json; use schema::field_type::ValueParsingError; use schema::schema::DocParsingError::NotJSON; + use schema::*; + use serde_json; #[test] pub fn is_indexed_test() { diff --git a/src/schema/term.rs b/src/schema/term.rs index 44ba5c639..8b06cae99 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -1,8 +1,8 @@ use std::fmt; -use common; -use byteorder::{BigEndian, ByteOrder}; use super::Field; +use byteorder::{BigEndian, ByteOrder}; +use common; use std::str; /// Size (in bytes) of the buffer of a int field. diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 401ed65a9..833135f84 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -1,6 +1,6 @@ -use std::ops::BitOr; -use std::borrow::Cow; use schema::IndexRecordOption; +use std::borrow::Cow; +use std::ops::BitOr; /// Define how a text field should be handled by tantivy. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] diff --git a/src/schema/value.rs b/src/schema/value.rs index 90e573ade..bae96d867 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -1,7 +1,7 @@ -use std::fmt; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde::de::Visitor; use schema::Facet; +use serde::de::Visitor; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt; /// Value represents the value of a any field. /// It is an enum over all over all of the possible field type. @@ -132,10 +132,10 @@ impl<'a> From for Value { } mod binary_serialize { - use common::BinarySerializable; - use std::io::{self, Read, Write}; use super::Value; + use common::BinarySerializable; use schema::Facet; + use std::io::{self, Read, Write}; const TEXT_CODE: u8 = 0; const U64_CODE: u8 = 1; diff --git a/src/store/mod.rs b/src/store/mod.rs index 22ccd1248..1eba49aca 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -42,12 +42,12 @@ pub use self::writer::StoreWriter; pub mod tests { use super::*; - use std::path::Path; - use schema::{Schema, SchemaBuilder}; - use schema::TextOptions; - use schema::FieldValue; - use schema::Document; use directory::{Directory, RAMDirectory, WritePtr}; + use schema::Document; + use schema::FieldValue; + use schema::TextOptions; + use schema::{Schema, SchemaBuilder}; + use std::path::Path; pub fn write_lorem_ipsum_store(writer: WritePtr, num_docs: usize) -> Schema { let mut schema_builder = SchemaBuilder::default(); @@ -106,18 +106,18 @@ pub mod tests { } -#[cfg(all(test, feature="unstable"))] +#[cfg(all(test, feature = "unstable"))] mod bench { - use test::Bencher; + use super::tests::write_lorem_ipsum_store; + use directory::Directory; use directory::RAMDirectory; use std::path::Path; use store::StoreReader; - use directory::Directory; - use super::tests::write_lorem_ipsum_store; + use test::Bencher; #[bench] - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] fn bench_store_encode(b: &mut Bencher) { let mut directory = RAMDirectory::create(); let path = Path::new("store"); @@ -138,4 +138,4 @@ mod bench { store.get(12).unwrap(); }); } -} \ No newline at end of file +} diff --git a/src/store/reader.rs b/src/store/reader.rs index 0940bd752..da6973977 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -1,15 +1,15 @@ use Result; -use directory::ReadOnlySource; -use std::cell::RefCell; -use DocId; -use schema::Document; use common::BinarySerializable; -use std::mem::size_of; -use std::io::{self, Read}; use common::VInt; use datastruct::SkipList; +use directory::ReadOnlySource; use lz4; +use schema::Document; +use std::cell::RefCell; +use std::io::{self, Read}; +use std::mem::size_of; +use DocId; /// Reads document off tantivy's [`Store`](./index.html) #[derive(Clone)] diff --git a/src/store/writer.rs b/src/store/writer.rs index 50f6ade8a..052a5c0bb 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -1,12 +1,12 @@ -use directory::WritePtr; -use DocId; -use common::{BinarySerializable, VInt}; -use std::io::{self, Write}; use super::StoreReader; -use lz4; -use datastruct::SkipListBuilder; use common::CountingWriter; +use common::{BinarySerializable, VInt}; +use datastruct::SkipListBuilder; +use directory::WritePtr; +use lz4; use schema::Document; +use std::io::{self, Write}; +use DocId; const BLOCK_SIZE: usize = 16_384; diff --git a/src/termdict/merger.rs b/src/termdict/merger.rs index c2d1f5d3d..bf8496b96 100644 --- a/src/termdict/merger.rs +++ b/src/termdict/merger.rs @@ -1,7 +1,7 @@ +use schema::Term; +use std::cmp::Ordering; use std::collections::BinaryHeap; use termdict::TermStreamer; -use std::cmp::Ordering; -use schema::Term; pub struct HeapItem<'a> { pub streamer: TermStreamer<'a>, diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index fe31d6622..b10fa9116 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -50,24 +50,24 @@ term `stream`. /// Position of the term in the sorted list of terms. pub type TermOrdinal = u64; -mod term_info_store; -mod streamer; -mod termdict; mod merger; +mod streamer; +mod term_info_store; +mod termdict; -pub use self::termdict::{TermDictionary, TermDictionaryBuilder}; -pub use self::streamer::{TermStreamer, TermStreamerBuilder}; pub use self::merger::TermMerger; +pub use self::streamer::{TermStreamer, TermStreamerBuilder}; +pub use self::termdict::{TermDictionary, TermDictionaryBuilder}; #[cfg(test)] mod tests { - use super::{TermDictionaryBuilder, TermDictionary, TermStreamer}; - use directory::{Directory, RAMDirectory, ReadOnlySource}; - use std::path::PathBuf; - use schema::{Document, FieldType, SchemaBuilder, TEXT}; + use super::{TermDictionary, TermDictionaryBuilder, TermStreamer}; use core::Index; - use std::str; + use directory::{Directory, RAMDirectory, ReadOnlySource}; use postings::TermInfo; + use schema::{Document, FieldType, SchemaBuilder, TEXT}; + use std::path::PathBuf; + use std::str; const BLOCK_SIZE: usize = 1_500; diff --git a/src/termdict/streamer.rs b/src/termdict/streamer.rs index 6486a5202..759964f1b 100644 --- a/src/termdict/streamer.rs +++ b/src/termdict/streamer.rs @@ -1,7 +1,7 @@ -use fst::{IntoStreamer, Streamer}; -use fst::map::{Stream, StreamBuilder}; -use postings::TermInfo; use super::TermDictionary; +use fst::map::{Stream, StreamBuilder}; +use fst::{IntoStreamer, Streamer}; +use postings::TermInfo; use termdict::TermOrdinal; /// `TermStreamerBuilder` is an helper object used to define @@ -11,9 +11,7 @@ pub struct TermStreamerBuilder<'a> { stream_builder: StreamBuilder<'a>, } - impl<'a> TermStreamerBuilder<'a> { - pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a>) -> Self { TermStreamerBuilder { fst_map, @@ -69,7 +67,6 @@ pub struct TermStreamer<'a> { } impl<'a> TermStreamer<'a> { - /// Advance position the stream on the next item. /// Before the first call to `.advance()`, the stream /// is an unitialized state. diff --git a/src/termdict/term_info_store.rs b/src/termdict/term_info_store.rs index 289c71222..86d74fffe 100644 --- a/src/termdict/term_info_store.rs +++ b/src/termdict/term_info_store.rs @@ -1,14 +1,14 @@ -use std::io; -use std::cmp; -use std::io::{Read, Write}; -use postings::TermInfo; -use common::{BinarySerializable, FixedSize}; +use byteorder::ByteOrder; +use common::bitpacker::BitPacker; use common::compute_num_bits; use common::Endianness; -use common::bitpacker::BitPacker; +use common::{BinarySerializable, FixedSize}; use directory::ReadOnlySource; +use postings::TermInfo; +use std::cmp; +use std::io; +use std::io::{Read, Write}; use termdict::TermOrdinal; -use byteorder::ByteOrder; const BLOCK_LEN: usize = 256; @@ -253,14 +253,14 @@ impl TermInfoStoreWriter { mod tests { use super::extract_bits; - use common::bitpacker::BitPacker; - use common::BinarySerializable; use super::TermInfoBlockMeta; use super::{TermInfoStore, TermInfoStoreWriter}; + use common; + use common::bitpacker::BitPacker; + use common::compute_num_bits; + use common::BinarySerializable; use directory::ReadOnlySource; use postings::TermInfo; - use common::compute_num_bits; - use common; #[test] fn test_term_info_block() { diff --git a/src/termdict/termdict.rs b/src/termdict/termdict.rs index 92966bcea..357e04048 100644 --- a/src/termdict/termdict.rs +++ b/src/termdict/termdict.rs @@ -1,14 +1,14 @@ -use std::io::{self, Write}; -use fst; -use fst::raw::Fst; -use directory::ReadOnlySource; +use super::term_info_store::{TermInfoStore, TermInfoStoreWriter}; +use super::{TermStreamer, TermStreamerBuilder}; use common::BinarySerializable; use common::CountingWriter; -use schema::FieldType; +use directory::ReadOnlySource; +use fst; +use fst::raw::Fst; use postings::TermInfo; +use schema::FieldType; +use std::io::{self, Write}; use termdict::TermOrdinal; -use super::{TermStreamerBuilder, TermStreamer}; -use super::term_info_store::{TermInfoStore, TermInfoStoreWriter}; fn convert_fst_error(e: fst::Error) -> io::Error { io::Error::new(io::ErrorKind::Other, e) @@ -23,10 +23,9 @@ pub struct TermDictionaryBuilder { term_ord: u64, } - impl TermDictionaryBuilder where - W: Write + W: Write, { /// Creates a new `TermDictionaryBuilder` pub fn new(w: W, _field_type: FieldType) -> io::Result { @@ -91,7 +90,7 @@ fn open_fst_index(source: ReadOnlySource) -> fst::Map { ReadOnlySource::Anonymous(data) => { Fst::from_shared_bytes(data.data, data.start, data.len).expect("FST data is corrupted") } - #[cfg(feature="mmap")] + #[cfg(feature = "mmap")] ReadOnlySource::Mmap(mmap_readonly) => { Fst::from_mmap(mmap_readonly).expect("FST data is corrupted") } @@ -111,7 +110,6 @@ pub struct TermDictionary { } impl TermDictionary { - /// Opens a `TermDictionary` given a data source. pub fn from_source(source: ReadOnlySource) -> Self { let total_len = source.len(); @@ -197,7 +195,6 @@ impl TermDictionary { TermStreamerBuilder::new(self, self.fst_index.range()) } - /// A stream of all the sorted terms. [See also `.stream_field()`](#method.stream_field) pub fn stream<'a>(&'a self) -> TermStreamer<'a> { self.range().into_stream() diff --git a/src/tokenizer/facet_tokenizer.rs b/src/tokenizer/facet_tokenizer.rs index fd408a4c4..321831934 100644 --- a/src/tokenizer/facet_tokenizer.rs +++ b/src/tokenizer/facet_tokenizer.rs @@ -1,6 +1,6 @@ use super::{Token, TokenStream, Tokenizer}; -use std::str; use schema::FACET_SEP_BYTE; +use std::str; /// The `FacetTokenizer` process a `Facet` binary representation /// and emits a token for all of its parent. @@ -84,10 +84,10 @@ impl<'a> TokenStream for FacetTokenStream<'a> { #[cfg(test)] mod tests { - use tokenizer::{Token, TokenStream, Tokenizer}; use super::FacetTokenizer; use schema::Facet; use std::str; + use tokenizer::{Token, TokenStream, Tokenizer}; #[test] fn test_facet_tokenizer() { diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index f885df140..70bf35ab7 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -128,30 +128,30 @@ //! # } //! ``` //! -mod tokenizer; -mod simple_tokenizer; -mod lower_caser; -mod remove_long; -mod stemmer; -mod facet_tokenizer; -mod tokenizer_manager; -mod japanese_tokenizer; -mod token_stream_chain; -mod raw_tokenizer; mod alphanum_only; +mod facet_tokenizer; +mod japanese_tokenizer; +mod lower_caser; +mod raw_tokenizer; +mod remove_long; +mod simple_tokenizer; +mod stemmer; +mod token_stream_chain; +mod tokenizer; +mod tokenizer_manager; pub use self::alphanum_only::AlphaNumOnlyFilter; -pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer}; -pub use self::tokenizer::BoxedTokenizer; -pub use self::tokenizer_manager::TokenizerManager; -pub use self::simple_tokenizer::SimpleTokenizer; -pub use self::raw_tokenizer::RawTokenizer; -pub(crate) use self::token_stream_chain::TokenStreamChain; -pub use self::japanese_tokenizer::JapaneseTokenizer; -pub use self::remove_long::RemoveLongFilter; -pub use self::lower_caser::LowerCaser; -pub use self::stemmer::Stemmer; pub use self::facet_tokenizer::FacetTokenizer; +pub use self::japanese_tokenizer::JapaneseTokenizer; +pub use self::lower_caser::LowerCaser; +pub use self::raw_tokenizer::RawTokenizer; +pub use self::remove_long::RemoveLongFilter; +pub use self::simple_tokenizer::SimpleTokenizer; +pub use self::stemmer::Stemmer; +pub(crate) use self::token_stream_chain::TokenStreamChain; +pub use self::tokenizer::BoxedTokenizer; +pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer}; +pub use self::tokenizer_manager::TokenizerManager; #[cfg(test)] mod test { diff --git a/src/tokenizer/simple_tokenizer.rs b/src/tokenizer/simple_tokenizer.rs index 69f7b09a6..5f1ca0e57 100644 --- a/src/tokenizer/simple_tokenizer.rs +++ b/src/tokenizer/simple_tokenizer.rs @@ -1,5 +1,5 @@ -use std::str::CharIndices; use super::{Token, TokenStream, Tokenizer}; +use std::str::CharIndices; /// Tokenize the text by splitting on whitespaces and punctuation. #[derive(Clone)] diff --git a/src/tokenizer/stemmer.rs b/src/tokenizer/stemmer.rs index f2f4a8a93..4c91bfb93 100644 --- a/src/tokenizer/stemmer.rs +++ b/src/tokenizer/stemmer.rs @@ -1,6 +1,6 @@ -use std::sync::Arc; use super::{Token, TokenFilter, TokenStream}; use rust_stemmers::{self, Algorithm}; +use std::sync::Arc; /// `Stemmer` token filter. Currently only English is supported. /// Tokens are expected to be lowercased beforehands. diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index 6fcc6390f..104cd0e7e 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -1,6 +1,5 @@ /// The tokenizer module contains all of the tools used to process /// text in `tantivy`. - use std::borrow::{Borrow, BorrowMut}; use tokenizer::TokenStreamChain; diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index bbc141c92..cbb46af3b 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -1,14 +1,14 @@ use std::collections::HashMap; use std::sync::{Arc, RwLock}; -use tokenizer::BoxedTokenizer; -use tokenizer::Tokenizer; use tokenizer::tokenizer::box_tokenizer; -use tokenizer::RawTokenizer; -use tokenizer::SimpleTokenizer; +use tokenizer::BoxedTokenizer; use tokenizer::JapaneseTokenizer; -use tokenizer::RemoveLongFilter; use tokenizer::LowerCaser; +use tokenizer::RawTokenizer; +use tokenizer::RemoveLongFilter; +use tokenizer::SimpleTokenizer; use tokenizer::Stemmer; +use tokenizer::Tokenizer; /// The tokenizer manager serves as a store for /// all of the pre-configured tokenizer pipelines.