From a6e767c8770d4b8f32d42ad8aa6ccc3bf4394a41 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 30 Nov 2018 22:52:45 +0900 Subject: [PATCH] Cargo fmt --- examples/basic_search.rs | 1 - examples/custom_collector.rs | 28 +++---- examples/stop_words.rs | 2 +- src/collector/count_collector.rs | 10 +-- src/collector/facet_collector.rs | 31 ++++---- src/collector/mod.rs | 93 ++++++++++++---------- src/collector/multi_collector.rs | 115 +++++++++++++-------------- src/collector/tests.rs | 61 +++++++------- src/collector/top_collector.rs | 81 ++++++++++--------- src/collector/top_field_collector.rs | 63 +++++++++------ src/collector/top_score_collector.rs | 73 +++++++++-------- src/common/composite_file.rs | 5 +- src/core/executor.rs | 44 +++++++--- src/core/index.rs | 14 ++-- src/core/mod.rs | 2 +- src/core/searcher.rs | 62 +++++++++------ src/core/segment_reader.rs | 7 +- src/fastfield/mod.rs | 4 +- src/indexer/index_writer.rs | 9 ++- src/indexer/merger.rs | 71 +++++++++++------ src/indexer/segment_updater.rs | 4 +- src/lib.rs | 15 ++-- src/postings/compression/mod.rs | 7 +- src/postings/mod.rs | 2 +- src/postings/stacker/expull.rs | 2 +- src/query/automaton_weight.rs | 5 +- src/query/boolean_query/mod.rs | 5 +- src/query/fuzzy_query.rs | 4 +- src/query/mod.rs | 33 +++++--- src/query/phrase_query/mod.rs | 12 +-- src/query/range_query.rs | 13 +-- src/query/regex_query.rs | 7 +- src/query/term_query/mod.rs | 13 ++- src/schema/schema.rs | 2 - src/snippet/mod.rs | 38 +++++---- src/space_usage/mod.rs | 40 ++++++---- src/store/mod.rs | 2 +- src/tokenizer/mod.rs | 2 - src/tokenizer/ngram_tokenizer.rs | 78 ++++++++---------- 39 files changed, 589 insertions(+), 471 deletions(-) diff --git a/examples/basic_search.rs b/examples/basic_search.rs index f96fedf12..5195bd29c 100644 --- a/examples/basic_search.rs +++ b/examples/basic_search.rs @@ -232,4 +232,3 @@ fn main() -> tantivy::Result<()> { Ok(()) } - diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index 5af31ceba..0d19a83e5 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -13,23 +13,22 @@ extern crate tempdir; // Importing tantivy... #[macro_use] extern crate tantivy; -use tantivy::query::QueryParser; -use tantivy::schema::{FAST, TEXT, INT_INDEXED, Schema}; -use tantivy::Index; use tantivy::collector::{Collector, SegmentCollector}; -use tantivy::SegmentReader; -use tantivy::schema::Field; use tantivy::fastfield::FastFieldReader; +use tantivy::query::QueryParser; +use tantivy::schema::Field; +use tantivy::schema::{Schema, FAST, INT_INDEXED, TEXT}; +use tantivy::Index; +use tantivy::SegmentReader; #[derive(Default)] struct Stats { count: usize, sum: f64, - squared_sum: f64 + squared_sum: f64, } impl Stats { - pub fn count(&self) -> usize { self.count } @@ -56,10 +55,8 @@ impl Stats { } } - - struct StatsCollector { - field: Field + field: Field, } impl StatsCollector { @@ -75,11 +72,15 @@ impl Collector for StatsCollector { type Child = StatsSegmentCollector; - fn for_segment(&self, _segment_local_id: u32, segment: &SegmentReader) -> tantivy::Result { + fn for_segment( + &self, + _segment_local_id: u32, + segment: &SegmentReader, + ) -> tantivy::Result { let fast_field_reader = segment.fast_field_reader(self.field)?; Ok(StatsSegmentCollector { fast_field_reader, - stats: Stats::default() + stats: Stats::default(), }) } @@ -101,7 +102,6 @@ impl Collector for StatsCollector { } } - struct StatsSegmentCollector { fast_field_reader: FastFieldReader, stats: Stats, @@ -122,7 +122,6 @@ impl SegmentCollector for StatsSegmentCollector { } } - fn main() -> tantivy::Result<()> { // # Defining the schema // @@ -186,4 +185,3 @@ fn main() -> tantivy::Result<()> { Ok(()) } - diff --git a/examples/stop_words.rs b/examples/stop_words.rs index f7318cd8b..7eba72bdd 100644 --- a/examples/stop_words.rs +++ b/examples/stop_words.rs @@ -105,7 +105,7 @@ fn main() -> tantivy::Result<()> { // stop words are applied on the query as well. // The following will be equivalent to `title:frankenstein` let query = query_parser.parse_query("title:\"the Frankenstein\"")?; - let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; + let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; for (score, doc_address) in top_docs { let retrieved_doc = searcher.doc(doc_address)?; diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 0af8b2316..ea2a1d9cd 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -1,10 +1,10 @@ use super::Collector; +use collector::SegmentCollector; use DocId; use Result; use Score; use SegmentLocalId; use SegmentReader; -use collector::SegmentCollector; /// `CountCollector` collector only counts how many /// documents match the query. @@ -56,9 +56,7 @@ use collector::SegmentCollector; /// ``` pub struct Count; - impl Collector for Count { - type Fruit = usize; type Child = SegmentCountCollector; @@ -76,13 +74,11 @@ impl Collector for Count { } } - #[derive(Default)] pub struct SegmentCountCollector { count: usize, } - impl SegmentCollector for SegmentCountCollector { type Fruit = usize; @@ -95,12 +91,11 @@ impl SegmentCollector for SegmentCountCollector { } } - #[cfg(test)] mod tests { use super::{Count, SegmentCountCollector}; - use collector::SegmentCollector; use collector::Collector; + use collector::SegmentCollector; #[test] fn test_count_collect_does_not_requires_scoring() { @@ -129,7 +124,6 @@ mod tests { count_collector.collect(1u32, 1f32); assert_eq!(count_collector.harvest(), 2); } - } } diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 9c8bc31b6..2937a7cfe 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -1,8 +1,10 @@ use collector::Collector; +use collector::SegmentCollector; use docset::SkipResult; use fastfield::FacetReader; use schema::Facet; use schema::Field; +use std::cmp::Ordering; use std::collections::btree_map; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -10,13 +12,11 @@ use std::collections::BinaryHeap; use std::collections::Bound; use std::iter::Peekable; use std::{u64, usize}; -use std::cmp::Ordering; use DocId; use Result; use Score; use SegmentLocalId; use SegmentReader; -use collector::SegmentCollector; struct Hit<'a> { count: u64, @@ -223,7 +223,7 @@ impl FacetCollector { pub fn for_field(field: Field) -> FacetCollector { FacetCollector { field, - facets: BTreeSet::default() + facets: BTreeSet::default(), } } @@ -260,7 +260,11 @@ impl Collector for FacetCollector { type Child = FacetSegmentCollector; - fn for_segment(&self, _: SegmentLocalId, reader: &SegmentReader) -> Result { + fn for_segment( + &self, + _: SegmentLocalId, + reader: &SegmentReader, + ) -> Result { let facet_reader = reader.facet_reader(self.field)?; let mut collapse_mapping = Vec::new(); @@ -335,21 +339,18 @@ impl Collector for FacetCollector { } impl SegmentCollector for FacetSegmentCollector { - type Fruit = FacetCounts; - fn collect(&mut self, doc: DocId, _: Score) { self.reader.facet_ords(doc, &mut self.facet_ords_buf); let mut previous_collapsed_ord: usize = usize::MAX; for &facet_ord in &self.facet_ords_buf { let collapsed_ord = self.collapse_mapping[facet_ord as usize]; - self.counts[collapsed_ord] += - if collapsed_ord == previous_collapsed_ord { - 0 - } else { - 1 - }; + self.counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord { + 0 + } else { + 1 + }; previous_collapsed_ord = collapsed_ord; } } @@ -451,9 +452,9 @@ mod tests { use core::Index; use query::AllQuery; use rand::distributions::Uniform; - use rand::{thread_rng, Rng}; - use schema::{Field, Document, Facet, Schema}; use rand::prelude::SliceRandom; + use rand::{thread_rng, Rng}; + use schema::{Document, Facet, Field, Schema}; use std::iter; #[test] @@ -482,7 +483,7 @@ mod tests { index_writer.commit().unwrap(); index.load_searchers().unwrap(); let searcher = index.searcher(); - let mut facet_collector= FacetCollector::for_field(facet_field); + let mut facet_collector = FacetCollector::for_field(facet_field); facet_collector.add_facet(Facet::from("/top1")); let counts = searcher.search(&AllQuery, &facet_collector).unwrap(); diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 8db4da52f..776b8a51b 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -85,12 +85,12 @@ See the `custom_collector` example. */ +use downcast; use DocId; use Result; use Score; use SegmentLocalId; use SegmentReader; -use downcast; mod count_collector; pub use self::count_collector::Count; @@ -132,13 +132,12 @@ impl Fruit for T where T: Send + downcast::Any {} /// /// Segments are not guaranteed to be visited in any specific order. pub trait Collector: Sync { - /// `Fruit` is the type for the result of our collection. /// e.g. `usize` for the `Count` collector. type Fruit: Fruit; /// Type of the `SegmentCollector` associated to this collector. - type Child: SegmentCollector; + type Child: SegmentCollector; /// `set_segment` is called before beginning to enumerate /// on this segment. @@ -156,7 +155,6 @@ pub trait Collector: Sync { fn merge_fruits(&self, segment_fruits: Vec) -> Result; } - /// The `SegmentCollector` is the trait in charge of defining the /// collect operation at the scale of the segment. /// @@ -177,11 +175,10 @@ pub trait SegmentCollector: 'static { // ----------------------------------------------- // Tuple implementations. - impl Collector for (Left, Right) where Left: Collector, - Right: Collector + Right: Collector, { type Fruit = (Left::Fruit, Right::Fruit); type Child = (Left::Child, Right::Child); @@ -196,22 +193,27 @@ where self.0.requires_scoring() || self.1.requires_scoring() } - fn merge_fruits(&self, children: Vec<(Left::Fruit, Right::Fruit)>) -> Result<(Left::Fruit, Right::Fruit)> { + fn merge_fruits( + &self, + children: Vec<(Left::Fruit, Right::Fruit)>, + ) -> Result<(Left::Fruit, Right::Fruit)> { let mut left_fruits = vec![]; let mut right_fruits = vec![]; for (left_fruit, right_fruit) in children { left_fruits.push(left_fruit); right_fruits.push(right_fruit); } - Ok((self.0.merge_fruits(left_fruits)?, - self.1.merge_fruits(right_fruits)?)) + Ok(( + self.0.merge_fruits(left_fruits)?, + self.1.merge_fruits(right_fruits)?, + )) } } impl SegmentCollector for (Left, Right) - where - Left: SegmentCollector, - Right: SegmentCollector +where + Left: SegmentCollector, + Right: SegmentCollector, { type Fruit = (Left::Fruit, Right::Fruit); @@ -228,9 +230,10 @@ impl SegmentCollector for (Left, Right) // 3-Tuple impl Collector for (One, Two, Three) - where One: Collector, - Two: Collector, - Three: Collector +where + One: Collector, + Two: Collector, + Three: Collector, { type Fruit = (One::Fruit, Two::Fruit, Three::Fruit); type Child = (One::Child, Two::Child, Three::Child); @@ -243,9 +246,7 @@ impl Collector for (One, Two, Three) } fn requires_scoring(&self) -> bool { - self.0.requires_scoring() || - self.1.requires_scoring() || - self.2.requires_scoring() + self.0.requires_scoring() || self.1.requires_scoring() || self.2.requires_scoring() } fn merge_fruits(&self, children: Vec) -> Result { @@ -257,17 +258,19 @@ impl Collector for (One, Two, Three) two_fruits.push(two_fruit); three_fruits.push(three_fruit); } - Ok((self.0.merge_fruits(one_fruits)?, + Ok(( + self.0.merge_fruits(one_fruits)?, self.1.merge_fruits(two_fruits)?, - self.2.merge_fruits(three_fruits)?)) + self.2.merge_fruits(three_fruits)?, + )) } } impl SegmentCollector for (One, Two, Three) - where - One: SegmentCollector, - Two: SegmentCollector, - Three: SegmentCollector +where + One: SegmentCollector, + Two: SegmentCollector, + Three: SegmentCollector, { type Fruit = (One::Fruit, Two::Fruit, Three::Fruit); @@ -282,14 +285,14 @@ impl SegmentCollector for (One, Two, Three) } } - // 4-Tuple impl Collector for (One, Two, Three, Four) - where One: Collector, - Two: Collector, - Three: Collector, - Four: Collector +where + One: Collector, + Two: Collector, + Three: Collector, + Four: Collector, { type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit); type Child = (One::Child, Two::Child, Three::Child, Four::Child); @@ -303,10 +306,10 @@ impl Collector for (One, Two, Three, Four) } fn requires_scoring(&self) -> bool { - self.0.requires_scoring() || - self.1.requires_scoring() || - self.2.requires_scoring() || - self.3.requires_scoring() + self.0.requires_scoring() + || self.1.requires_scoring() + || self.2.requires_scoring() + || self.3.requires_scoring() } fn merge_fruits(&self, children: Vec) -> Result { @@ -320,19 +323,21 @@ impl Collector for (One, Two, Three, Four) three_fruits.push(three_fruit); four_fruits.push(four_fruit); } - Ok((self.0.merge_fruits(one_fruits)?, + Ok(( + self.0.merge_fruits(one_fruits)?, self.1.merge_fruits(two_fruits)?, self.2.merge_fruits(three_fruits)?, - self.3.merge_fruits(four_fruits)?)) + self.3.merge_fruits(four_fruits)?, + )) } } impl SegmentCollector for (One, Two, Three, Four) - where - One: SegmentCollector, - Two: SegmentCollector, - Three: SegmentCollector, - Four: SegmentCollector +where + One: SegmentCollector, + Two: SegmentCollector, + Three: SegmentCollector, + Four: SegmentCollector, { type Fruit = (One::Fruit, Two::Fruit, Three::Fruit, Four::Fruit); @@ -344,7 +349,12 @@ impl SegmentCollector for (One, Two, Three, Four) } fn harvest(self) -> ::Fruit { - (self.0.harvest(), self.1.harvest(), self.2.harvest(), self.3.harvest()) + ( + self.0.harvest(), + self.1.harvest(), + self.2.harvest(), + self.3.harvest(), + ) } } @@ -353,6 +363,5 @@ mod downcast_impl { downcast!(super::Fruit); } - #[cfg(test)] pub mod tests; diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 24f951ef4..1fb119f1e 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -1,28 +1,30 @@ use super::Collector; use super::SegmentCollector; +use collector::Fruit; +use downcast::Downcast; +use std::marker::PhantomData; use DocId; -use Score; use Result; +use Score; use SegmentLocalId; use SegmentReader; -use downcast::Downcast; -use collector::Fruit; -use std::marker::PhantomData; use TantivyError; - pub struct MultiFruit { - sub_fruits: Vec>> + sub_fruits: Vec>>, } - pub struct CollectorWrapper(TCollector); impl Collector for CollectorWrapper { type Fruit = Box; type Child = Box; - fn for_segment(&self, segment_local_id: u32, reader: &SegmentReader) -> Result> { + fn for_segment( + &self, + segment_local_id: u32, + reader: &SegmentReader, + ) -> Result> { let child = self.0.for_segment(segment_local_id, reader)?; Ok(Box::new(SegmentCollectorWrapper(child))) } @@ -32,7 +34,8 @@ impl Collector for CollectorWrapper { } fn merge_fruits(&self, children: Vec<::Fruit>) -> Result> { - let typed_fruit: Vec = children.into_iter() + let typed_fruit: Vec = children + .into_iter() .map(|untyped_fruit| { Downcast::::downcast(untyped_fruit) .map(|boxed_but_typed| *boxed_but_typed) @@ -40,16 +43,13 @@ impl Collector for CollectorWrapper { let err_msg = format!("Failed to cast child collector fruit. {:?}", e); TantivyError::InvalidArgument(err_msg) }) - }) - .collect::>()?; + }).collect::>()?; let merged_fruit = self.0.merge_fruits(typed_fruit)?; Ok(Box::new(merged_fruit)) } } - impl SegmentCollector for Box { - type Fruit = Box; fn collect(&mut self, doc: u32, score: f32) { @@ -66,13 +66,11 @@ pub trait BoxableSegmentCollector { fn harvest_from_box(self: Box) -> Box; } - - pub struct SegmentCollectorWrapper(TSegmentCollector); - -impl BoxableSegmentCollector for SegmentCollectorWrapper { - +impl BoxableSegmentCollector + for SegmentCollectorWrapper +{ fn collect(&mut self, doc: u32, score: f32) { self.0.collect(doc, score); } @@ -84,16 +82,13 @@ impl BoxableSegmentCollector for SegmentCol pub struct FruitHandle { pos: usize, - _phantom: PhantomData + _phantom: PhantomData, } impl FruitHandle { pub fn extract(self, fruits: &mut MultiFruit) -> TFruit { - let boxed_fruit = fruits.sub_fruits[self.pos] - .take() - .expect(""); - *Downcast::::downcast(boxed_fruit) - .expect("Failed") + let boxed_fruit = fruits.sub_fruits[self.pos].take().expect(""); + *Downcast::::downcast(boxed_fruit).expect("Failed") } } @@ -153,25 +148,29 @@ impl FruitHandle { /// } /// ``` pub struct MultiCollector<'a> { - collector_wrappers: Vec,Fruit=Box> + 'a>> + collector_wrappers: + Vec, Fruit = Box> + 'a>>, } impl<'a> MultiCollector<'a> { - /// Create a new `MultiCollector` pub fn new() -> MultiCollector<'a> { MultiCollector { - collector_wrappers: Vec::new() + collector_wrappers: Vec::new(), } } /// Add a new collector to our `MultiCollector`. - pub fn add_collector<'b: 'a, TCollector: Collector + 'b>(&mut self, collector: TCollector) -> FruitHandle { + pub fn add_collector<'b: 'a, TCollector: Collector + 'b>( + &mut self, + collector: TCollector, + ) -> FruitHandle { let pos = self.collector_wrappers.len(); - self.collector_wrappers.push(Box::new(CollectorWrapper(collector))); + self.collector_wrappers + .push(Box::new(CollectorWrapper(collector))); FruitHandle { pos, - _phantom: PhantomData + _phantom: PhantomData, } } } @@ -180,30 +179,27 @@ impl<'a> Collector for MultiCollector<'a> { type Fruit = MultiFruit; type Child = MultiCollectorChild; - fn for_segment(&self, segment_local_id: SegmentLocalId, segment: &SegmentReader) -> Result { - let children = self.collector_wrappers + fn for_segment( + &self, + segment_local_id: SegmentLocalId, + segment: &SegmentReader, + ) -> Result { + let children = self + .collector_wrappers .iter() - .map(|collector_wrapper| { - collector_wrapper.for_segment(segment_local_id, segment) - }) + .map(|collector_wrapper| collector_wrapper.for_segment(segment_local_id, segment)) .collect::>>()?; - Ok(MultiCollectorChild { - children - }) + Ok(MultiCollectorChild { children }) } fn requires_scoring(&self) -> bool { - self.collector_wrappers - .iter() - .any(|c| c.requires_scoring()) + self.collector_wrappers.iter().any(|c| c.requires_scoring()) } - fn merge_fruits(&self, segments_multifruits: Vec) - -> Result { - let mut segment_fruits_list: Vec>> = - (0..self.collector_wrappers.len()) - .map(|_| Vec::with_capacity(segments_multifruits.len())) - .collect::>(); + fn merge_fruits(&self, segments_multifruits: Vec) -> Result { + let mut segment_fruits_list: Vec>> = (0..self.collector_wrappers.len()) + .map(|_| Vec::with_capacity(segments_multifruits.len())) + .collect::>(); for segment_multifruit in segments_multifruits { for (idx, segment_fruit_opt) in segment_multifruit.sub_fruits.into_iter().enumerate() { if let Some(segment_fruit) = segment_fruit_opt { @@ -211,21 +207,19 @@ impl<'a> Collector for MultiCollector<'a> { } } } - let sub_fruits = self.collector_wrappers + let sub_fruits = self + .collector_wrappers .iter() .zip(segment_fruits_list) - .map(|(child_collector, segment_fruits)| + .map(|(child_collector, segment_fruits)| { Ok(Some(child_collector.merge_fruits(segment_fruits)?)) - ) - .collect::>()?; + }).collect::>()?; Ok(MultiFruit { sub_fruits }) } - } - pub struct MultiCollectorChild { - children: Vec> + children: Vec>, } impl SegmentCollector for MultiCollectorChild { @@ -239,25 +233,25 @@ impl SegmentCollector for MultiCollectorChild { fn harvest(self) -> MultiFruit { MultiFruit { - sub_fruits: self.children + sub_fruits: self + .children .into_iter() - .map(|child| Some(child.harvest()) ) - .collect() + .map(|child| Some(child.harvest())) + .collect(), } } } - #[cfg(test)] mod tests { use super::*; use collector::{Count, TopDocs}; - use schema::{TEXT, Schema}; use query::TermQuery; + use schema::IndexRecordOption; + use schema::{Schema, TEXT}; use Index; use Term; - use schema::IndexRecordOption; #[test] fn test_multi_collector() { @@ -291,4 +285,3 @@ mod tests { assert_eq!(topdocs_handler.extract(&mut multifruits).len(), 2); } } - diff --git a/src/collector/tests.rs b/src/collector/tests.rs index d67ff14b2..cc8bcfff4 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -3,10 +3,10 @@ use core::SegmentReader; use fastfield::BytesFastFieldReader; use fastfield::FastFieldReader; use schema::Field; +use DocAddress; use DocId; use Score; use SegmentLocalId; -use DocAddress; /// Stores all of the doc ids. /// This collector is only used for tests. @@ -24,12 +24,12 @@ pub struct TestSegmentCollector { #[derive(Default)] pub struct TestFruit { docs: Vec, - scores: Vec + scores: Vec, } impl TestFruit { /// Return the list of matching documents exhaustively. - pub fn docs(&self) ->&[DocAddress] { + pub fn docs(&self) -> &[DocAddress] { &self.docs[..] } @@ -42,10 +42,14 @@ impl Collector for TestCollector { type Fruit = TestFruit; type Child = TestSegmentCollector; - fn for_segment(&self, segment_id: SegmentLocalId, _reader: &SegmentReader) -> Result { + fn for_segment( + &self, + segment_id: SegmentLocalId, + _reader: &SegmentReader, + ) -> Result { Ok(TestSegmentCollector { segment_id, - fruit: TestFruit::default() + fruit: TestFruit::default(), }) } @@ -54,13 +58,13 @@ impl Collector for TestCollector { } fn merge_fruits(&self, mut children: Vec) -> Result { - children - .sort_by_key(|fruit| - if fruit.docs().is_empty() { - 0 - } else { - fruit.docs()[0].segment_ord() - }); + children.sort_by_key(|fruit| { + if fruit.docs().is_empty() { + 0 + } else { + fruit.docs()[0].segment_ord() + } + }); let mut docs = vec![]; let mut scores = vec![]; for child in children { @@ -72,11 +76,10 @@ impl Collector for TestCollector { } impl SegmentCollector for TestSegmentCollector { - type Fruit = TestFruit; fn collect(&mut self, doc: DocId, score: Score) { - self.fruit.docs.push(DocAddress(self.segment_id, doc )); + self.fruit.docs.push(DocAddress(self.segment_id, doc)); self.fruit.scores.push(score); } @@ -85,7 +88,6 @@ impl SegmentCollector for TestSegmentCollector { } } - /// Collects in order all of the fast fields for all of the /// doc in the `DocSet` /// @@ -101,18 +103,19 @@ pub struct FastFieldSegmentCollector { impl FastFieldTestCollector { pub fn for_field(field: Field) -> FastFieldTestCollector { - FastFieldTestCollector { - field, - } + FastFieldTestCollector { field } } } impl Collector for FastFieldTestCollector { - type Fruit = Vec; type Child = FastFieldSegmentCollector; - fn for_segment(&self, _: SegmentLocalId, reader: &SegmentReader) -> Result { + fn for_segment( + &self, + _: SegmentLocalId, + reader: &SegmentReader, + ) -> Result { Ok(FastFieldSegmentCollector { vals: Vec::new(), reader: reader.fast_field_reader(self.field)?, @@ -124,10 +127,7 @@ impl Collector for FastFieldTestCollector { } fn merge_fruits(&self, children: Vec>) -> Result> { - Ok(children - .into_iter() - .flat_map(|v| v.into_iter()) - .collect()) + Ok(children.into_iter().flat_map(|v| v.into_iter()).collect()) } } @@ -164,11 +164,14 @@ impl BytesFastFieldTestCollector { } impl Collector for BytesFastFieldTestCollector { - type Fruit = Vec; type Child = BytesFastFieldSegmentCollector; - fn for_segment(&self, _segment_local_id: u32, segment: &SegmentReader) -> Result { + fn for_segment( + &self, + _segment_local_id: u32, + segment: &SegmentReader, + ) -> Result { Ok(BytesFastFieldSegmentCollector { vals: Vec::new(), reader: segment.bytes_fast_field_reader(self.field)?, @@ -180,15 +183,11 @@ impl Collector for BytesFastFieldTestCollector { } fn merge_fruits(&self, children: Vec>) -> Result> { - Ok(children - .into_iter() - .flat_map(|c| c.into_iter()) - .collect()) + Ok(children.into_iter().flat_map(|c| c.into_iter()).collect()) } } impl SegmentCollector for BytesFastFieldSegmentCollector { - type Fruit = Vec; fn collect(&mut self, doc: u32, _score: f32) { diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index ff81c1fca..d4bb7239d 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -1,12 +1,11 @@ +use serde::export::PhantomData; use std::cmp::Ordering; use std::collections::BinaryHeap; use DocAddress; use DocId; +use Result; use SegmentLocalId; use SegmentReader; -use Result; -use serde::export::PhantomData; - /// Contains a feature (field, score, etc.) of a document along with the document address. /// @@ -28,7 +27,7 @@ impl PartialOrd for ComparableDoc { } } -impl Ord for ComparableDoc { +impl Ord for ComparableDoc { #[inline] fn cmp(&self, other: &Self) -> Ordering { other @@ -46,14 +45,15 @@ impl PartialEq for ComparableDoc { impl Eq for ComparableDoc {} - pub(crate) struct TopCollector { limit: usize, - _marker: PhantomData + _marker: PhantomData, } -impl TopCollector where T: PartialOrd + Clone { - +impl TopCollector +where + T: PartialOrd + Clone, +{ /// Creates a top collector, with a number of documents equal to "limit". /// /// # Panics @@ -72,7 +72,10 @@ impl TopCollector where T: PartialOrd + Clone { self.limit } - pub fn merge_fruits(&self, children: Vec>) -> Result> { + pub fn merge_fruits( + &self, + children: Vec>, + ) -> Result> { if self.limit == 0 { return Ok(Vec::new()); } @@ -80,17 +83,11 @@ impl TopCollector where T: PartialOrd + Clone { for child_fruit in children { for (feature, doc) in child_fruit { if top_collector.len() < self.limit { - top_collector.push(ComparableDoc { - feature, - doc - }); + top_collector.push(ComparableDoc { feature, doc }); } else { if let Some(mut head) = top_collector.peek_mut() { if head.feature < feature { - *head = ComparableDoc { - feature, - doc - }; + *head = ComparableDoc { feature, doc }; } } } @@ -103,12 +100,15 @@ impl TopCollector where T: PartialOrd + Clone { .collect()) } - pub(crate) fn for_segment(&self, segment_id: SegmentLocalId, _: &SegmentReader) -> Result> { + pub(crate) fn for_segment( + &self, + segment_id: SegmentLocalId, + _: &SegmentReader, + ) -> Result> { Ok(TopSegmentCollector::new(segment_id, self.limit)) } } - /// The Top Collector keeps track of the K documents /// sorted by type `T`. /// @@ -126,7 +126,7 @@ impl TopSegmentCollector { TopSegmentCollector { limit, heap: BinaryHeap::with_capacity(limit), - segment_id + segment_id, } } } @@ -134,14 +134,17 @@ impl TopSegmentCollector { impl TopSegmentCollector { pub fn harvest(self) -> Vec<(T, DocAddress)> { let segment_id = self.segment_id; - self.heap.into_sorted_vec() + self.heap + .into_sorted_vec() .into_iter() - .map(|comparable_doc| - (comparable_doc.feature, DocAddress(segment_id, comparable_doc.doc)) ) - .collect() + .map(|comparable_doc| { + ( + comparable_doc.feature, + DocAddress(segment_id, comparable_doc.doc), + ) + }).collect() } - /// Return true iff at least K documents have gone through /// the collector. #[inline(always)] @@ -157,9 +160,7 @@ impl TopSegmentCollector { pub fn collect(&mut self, doc: DocId, feature: T) { if self.at_capacity() { // It's ok to unwrap as long as a limit of 0 is forbidden. - if let Some(limit_feature) = self.heap - .peek() - .map(|head| head.feature.clone()) { + if let Some(limit_feature) = self.heap.peek().map(|head| head.feature.clone()) { if limit_feature < feature { if let Some(mut head) = self.heap.peek_mut() { head.feature = feature; @@ -170,10 +171,7 @@ impl TopSegmentCollector { } else { // we have not reached capacity yet, so we can just push the // element. - self.heap.push(ComparableDoc { - feature, - doc, - }); + self.heap.push(ComparableDoc { feature, doc }); } } } @@ -192,9 +190,11 @@ mod tests { top_collector.collect(5, 0.3); assert_eq!( top_collector.harvest(), - vec![(0.8, DocAddress(0,1)), - (0.3, DocAddress(0,5)), - (0.2, DocAddress(0,3))] + vec![ + (0.8, DocAddress(0, 1)), + (0.3, DocAddress(0, 5)), + (0.2, DocAddress(0, 3)) + ] ); } @@ -208,10 +208,13 @@ mod tests { top_collector.collect(9, -0.2); assert_eq!( top_collector.harvest(), - vec![(0.9, DocAddress(0,7)), - (0.8, DocAddress(0,1)), - (0.3, DocAddress(0,5)), - (0.2, DocAddress(0,3))]); + vec![ + (0.9, DocAddress(0, 7)), + (0.8, DocAddress(0, 1)), + (0.3, DocAddress(0, 5)), + (0.2, DocAddress(0, 3)) + ] + ); } #[test] diff --git a/src/collector/top_field_collector.rs b/src/collector/top_field_collector.rs index f57d747b2..c6c5c29a7 100644 --- a/src/collector/top_field_collector.rs +++ b/src/collector/top_field_collector.rs @@ -1,14 +1,14 @@ use super::Collector; use collector::top_collector::TopCollector; +use collector::top_collector::TopSegmentCollector; use collector::SegmentCollector; use fastfield::FastFieldReader; use fastfield::FastValue; use schema::Field; -use Result; -use SegmentReader; -use SegmentLocalId; -use collector::top_collector::TopSegmentCollector; use DocAddress; +use Result; +use SegmentLocalId; +use SegmentReader; /// The Top Field Collector keeps track of the K documents /// sorted by a fast field in the index @@ -67,10 +67,10 @@ use DocAddress; /// ``` pub struct TopDocsByField { collector: TopCollector, - field: Field + field: Field, } -impl TopDocsByField { +impl TopDocsByField { /// Creates a top field collector, with a number of documents equal to "limit". /// /// The given field name must be a fast field, otherwise the collector have an error while @@ -81,19 +81,21 @@ impl TopDocsByField { pub(crate) fn new(field: Field, limit: usize) -> TopDocsByField { TopDocsByField { collector: TopCollector::with_limit(limit), - field + field, } } } - impl Collector for TopDocsByField { - type Fruit = Vec<(T, DocAddress)>; type Child = TopFieldSegmentCollector; - fn for_segment(&self, segment_local_id: SegmentLocalId, reader: &SegmentReader) -> Result> { + fn for_segment( + &self, + segment_local_id: SegmentLocalId, + reader: &SegmentReader, + ) -> Result> { let collector = self.collector.for_segment(segment_local_id, reader)?; let reader = reader.fast_field_reader(self.field)?; Ok(TopFieldSegmentCollector { collector, reader }) @@ -103,7 +105,10 @@ impl Collector for TopDocsByF false } - fn merge_fruits(&self, segment_fruits: Vec>) -> Result> { + fn merge_fruits( + &self, + segment_fruits: Vec>, + ) -> Result> { self.collector.merge_fruits(segment_fruits) } } @@ -113,8 +118,9 @@ pub struct TopFieldSegmentCollector { reader: FastFieldReader, } -impl SegmentCollector for TopFieldSegmentCollector { - +impl SegmentCollector + for TopFieldSegmentCollector +{ type Fruit = Vec<(T, DocAddress)>; fn collect(&mut self, doc: u32, _score: f32) { @@ -130,17 +136,17 @@ impl SegmentCollector for Top #[cfg(test)] mod tests { use super::TopDocsByField; + use collector::Collector; + use collector::TopDocs; use query::Query; use query::QueryParser; use schema::Field; use schema::IntOptions; use schema::{Schema, FAST, TEXT}; + use DocAddress; use Index; use IndexWriter; use TantivyError; - use collector::Collector; - use DocAddress; - use collector::TopDocs; const TITLE: &str = "title"; const SIZE: &str = "size"; @@ -169,10 +175,14 @@ mod tests { let top_collector = TopDocs::with_limit(4).order_by_field(size); let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector).unwrap(); - assert_eq!(top_docs, vec![ - (64, DocAddress(0,1)), - (16, DocAddress(0,2)), - (12, DocAddress(0,0))]); + assert_eq!( + top_docs, + vec![ + (64, DocAddress(0, 1)), + (16, DocAddress(0, 2)), + (12, DocAddress(0, 0)) + ] + ); } #[test] @@ -189,11 +199,11 @@ mod tests { )); }); let searcher = index.searcher(); - let top_collector: TopDocsByField = - TopDocs::with_limit(4).order_by_field(Field(2)); + let top_collector: TopDocsByField = TopDocs::with_limit(4).order_by_field(Field(2)); let segment_reader = searcher.segment_reader(0u32); - top_collector.for_segment(0, segment_reader) - .expect("should panic"); + top_collector + .for_segment(0, segment_reader) + .expect("should panic"); } #[test] @@ -212,7 +222,10 @@ mod tests { let segment = searcher.segment_reader(0); let top_collector: TopDocsByField = TopDocs::with_limit(4).order_by_field(size); assert_matches!( - top_collector.for_segment(0, segment).map(|_| ()).unwrap_err(), + top_collector + .for_segment(0, segment) + .map(|_| ()) + .unwrap_err(), TantivyError::FastFieldError(_) ); } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index ccc976ceb..2ba5ffc87 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -1,16 +1,16 @@ use super::Collector; +use collector::top_collector::TopCollector; use collector::top_collector::TopSegmentCollector; +use collector::SegmentCollector; +use collector::TopDocsByField; +use fastfield::FastValue; +use schema::Field; +use DocAddress; use DocId; use Result; use Score; use SegmentLocalId; use SegmentReader; -use collector::SegmentCollector; -use collector::top_collector::TopCollector; -use DocAddress; -use collector::TopDocsByField; -use schema::Field; -use fastfield::FastValue; /// The Top Score Collector keeps track of the K documents /// sorted by their score. @@ -66,7 +66,6 @@ use fastfield::FastValue; /// ``` pub struct TopDocs(TopCollector); - impl TopDocs { /// Creates a top score collector, with a number of documents equal to "limit". /// @@ -80,18 +79,24 @@ impl TopDocs { /// /// (By default, `TopDocs` collects the top-K documents sorted by /// the similarity score.) - pub fn order_by_field(self, field: Field) -> TopDocsByField { + pub fn order_by_field( + self, + field: Field, + ) -> TopDocsByField { TopDocsByField::new(field, self.0.limit()) } } impl Collector for TopDocs { - type Fruit = Vec<(Score, DocAddress)>; type Child = TopScoreSegmentCollector; - fn for_segment(&self, segment_local_id: SegmentLocalId, reader: &SegmentReader) -> Result { + fn for_segment( + &self, + segment_local_id: SegmentLocalId, + reader: &SegmentReader, + ) -> Result { let collector = self.0.for_segment(segment_local_id, reader)?; Ok(TopScoreSegmentCollector(collector)) } @@ -120,18 +125,15 @@ impl SegmentCollector for TopScoreSegmentCollector { } } - - - #[cfg(test)] mod tests { use super::TopDocs; - use Score; - use schema::Schema; - use Index; - use schema::TEXT; use query::QueryParser; + use schema::Schema; + use schema::TEXT; use DocAddress; + use Index; + use Score; fn make_index() -> Index { let mut schema_builder = Schema::builder(); @@ -150,33 +152,43 @@ mod tests { index } - #[test] fn test_top_collector_not_at_capacity() { let index = make_index(); let field = index.schema().get_field("text").unwrap(); let query_parser = QueryParser::for_index(&index, vec![field]); let text_query = query_parser.parse_query("droopy tax").unwrap(); - let score_docs: Vec<(Score, DocAddress)> = index.searcher().search(&text_query, &TopDocs::with_limit(4)).unwrap(); - assert_eq!(score_docs, vec![ - (0.81221175, DocAddress(0u32, 1)), - (0.5376842, DocAddress(0u32, 2)), - (0.48527452, DocAddress(0, 0)) - ]); + let score_docs: Vec<(Score, DocAddress)> = index + .searcher() + .search(&text_query, &TopDocs::with_limit(4)) + .unwrap(); + assert_eq!( + score_docs, + vec![ + (0.81221175, DocAddress(0u32, 1)), + (0.5376842, DocAddress(0u32, 2)), + (0.48527452, DocAddress(0, 0)) + ] + ); } - #[test] fn test_top_collector_at_capacity() { let index = make_index(); let field = index.schema().get_field("text").unwrap(); let query_parser = QueryParser::for_index(&index, vec![field]); let text_query = query_parser.parse_query("droopy tax").unwrap(); - let score_docs: Vec<(Score, DocAddress)> = index.searcher().search(&text_query, &TopDocs::with_limit(2)).unwrap(); - assert_eq!(score_docs, vec![ - (0.81221175, DocAddress(0u32, 1)), - (0.5376842, DocAddress(0u32, 2)), - ]); + let score_docs: Vec<(Score, DocAddress)> = index + .searcher() + .search(&text_query, &TopDocs::with_limit(2)) + .unwrap(); + assert_eq!( + score_docs, + vec![ + (0.81221175, DocAddress(0u32, 1)), + (0.5376842, DocAddress(0u32, 2)), + ] + ); } #[test] @@ -186,4 +198,3 @@ mod tests { } } - diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs index 0cdfdff87..0b7cdf03c 100644 --- a/src/common/composite_file.rs +++ b/src/common/composite_file.rs @@ -4,8 +4,8 @@ use common::VInt; use directory::ReadOnlySource; use directory::WritePtr; use schema::Field; -use space_usage::PerFieldSpaceUsage; use space_usage::FieldUsage; +use space_usage::PerFieldSpaceUsage; use std::collections::HashMap; use std::io::Write; use std::io::{self, Read}; @@ -172,7 +172,8 @@ impl CompositeFile { pub fn space_usage(&self) -> PerFieldSpaceUsage { let mut fields = HashMap::new(); for (&field_addr, &(start, end)) in self.offsets_index.iter() { - fields.entry(field_addr.field) + fields + .entry(field_addr.field) .or_insert_with(|| FieldUsage::empty(field_addr.field)) .add_field_idx(field_addr.idx, end - start); } diff --git a/src/core/executor.rs b/src/core/executor.rs index eda83707b..9663bba3a 100644 --- a/src/core/executor.rs +++ b/src/core/executor.rs @@ -1,6 +1,6 @@ -use Result; -use scoped_pool::{Pool, ThreadConfig}; use crossbeam::channel; +use scoped_pool::{Pool, ThreadConfig}; +use Result; /// Search executor whether search request are single thread or multithread. /// @@ -31,11 +31,18 @@ impl Executor { // // Regardless of the executor (`SingleThread` or `ThreadPool`), panics in the task // will propagate to the caller. - pub fn map, F: Sized + Sync + Fn(A) -> Result>(&self, f: F, args: AIterator) -> Result> { + pub fn map< + A: Send, + R: Send, + AIterator: Iterator, + F: Sized + Sync + Fn(A) -> Result, + >( + &self, + f: F, + args: AIterator, + ) -> Result> { match self { - Executor::SingleThread => { - args.map(f).collect::>() - } + Executor::SingleThread => args.map(f).collect::>(), Executor::ThreadPool(pool) => { let args_with_indices: Vec<(usize, A)> = args.enumerate().collect(); let num_fruits = args_with_indices.len(); @@ -58,7 +65,7 @@ impl Executor { // terminate. }; let mut results = Vec::with_capacity(num_fruits); - unsafe {results.set_len(num_fruits)}; + unsafe { results.set_len(num_fruits) }; let mut num_items = 0; for (pos, fruit_res) in fruit_receiver { results[pos] = fruit_res?; @@ -79,22 +86,34 @@ mod tests { use super::Executor; #[test] - #[should_panic(expected="panic should propagate")] + #[should_panic(expected = "panic should propagate")] fn test_panic_propagates_single_thread() { - let _result: Vec = Executor::single_thread().map(|_| {panic!("panic should propagate"); }, vec![0].into_iter()).unwrap(); + let _result: Vec = Executor::single_thread() + .map( + |_| { + panic!("panic should propagate"); + }, + vec![0].into_iter(), + ).unwrap(); } #[test] #[should_panic] //< unfortunately the panic message is not propagated fn test_panic_propagates_multi_thread() { let _result: Vec = Executor::multi_thread(1, "search-test") - .map(|_| {panic!("panic should propagate"); }, vec![0].into_iter()).unwrap(); + .map( + |_| { + panic!("panic should propagate"); + }, + vec![0].into_iter(), + ).unwrap(); } #[test] fn test_map_singlethread() { let result: Vec = Executor::single_thread() - .map(|i| { Ok(i * 2) }, 0..1_000).unwrap(); + .map(|i| Ok(i * 2), 0..1_000) + .unwrap(); assert_eq!(result.len(), 1_000); for i in 0..1_000 { assert_eq!(result[i], i * 2); @@ -106,7 +125,8 @@ mod tests { #[test] fn test_map_multithread() { let result: Vec = Executor::multi_thread(3, "search-test") - .map(|i| Ok(i * 2), 0..10).unwrap(); + .map(|i| Ok(i * 2), 0..10) + .unwrap(); assert_eq!(result.len(), 10); for i in 0..10 { assert_eq!(result[i], i * 2); diff --git a/src/core/index.rs b/src/core/index.rs index e1ea363d6..2618b70e5 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -3,6 +3,7 @@ use super::pool::Pool; use super::segment::create_segment; use super::segment::Segment; use core::searcher::Searcher; +use core::Executor; use core::IndexMeta; use core::SegmentId; use core::SegmentMeta; @@ -31,7 +32,6 @@ use tokenizer::BoxedTokenizer; use tokenizer::TokenizerManager; use IndexWriter; use Result; -use core::Executor; fn load_metas(directory: &Directory) -> Result { let meta_data = directory.atomic_read(&META_FILEPATH)?; @@ -110,7 +110,9 @@ impl Index { if index.schema() == schema { Ok(index) } else { - Err(TantivyError::SchemaError("An index exists but the schema does not match.".to_string())) + Err(TantivyError::SchemaError( + "An index exists but the schema does not match.".to_string(), + )) } } else { Index::create(dir, schema) @@ -381,9 +383,9 @@ impl Clone for Index { #[cfg(test)] mod tests { + use directory::RAMDirectory; use schema::{Schema, INT_INDEXED, TEXT}; use Index; - use directory::RAMDirectory; #[test] fn test_indexer_for_field() { @@ -415,7 +417,6 @@ mod tests { assert!(Index::exists(&directory)); } - #[test] fn open_or_create_should_open() { let directory = RAMDirectory::create(); @@ -439,7 +440,10 @@ mod tests { assert!(Index::exists(&directory)); assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok()); let err = Index::open_or_create(directory, Schema::builder().build()); - assert_eq!(format!("{:?}", err.unwrap_err()), "SchemaError(\"An index exists but the schema does not match.\")"); + assert_eq!( + format!("{:?}", err.unwrap_err()), + "SchemaError(\"An index exists but the schema does not match.\")" + ); } fn throw_away_schema() -> Schema { diff --git a/src/core/mod.rs b/src/core/mod.rs index fa9772790..1aae75f8b 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,3 +1,4 @@ +mod executor; pub mod index; mod index_meta; mod inverted_index_reader; @@ -7,7 +8,6 @@ mod segment; mod segment_component; mod segment_id; mod segment_meta; -mod executor; mod segment_reader; pub use self::executor::Executor; diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 89346390e..025f26072 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -1,37 +1,39 @@ use collector::Collector; +use collector::SegmentCollector; +use core::Executor; use core::InvertedIndexReader; use core::SegmentReader; use query::Query; +use query::Scorer; +use query::Weight; use schema::Document; use schema::Schema; use schema::{Field, Term}; use space_usage::SearcherSpaceUsage; use std::fmt; use std::sync::Arc; +use store::StoreReader; use termdict::TermMerger; use DocAddress; use Index; use Result; -use store::StoreReader; -use query::Weight; -use query::Scorer; -use collector::SegmentCollector; -use core::Executor; -fn collect_segment(collector: &C, - weight: &Weight, - segment_ord: u32, - segment_reader: &SegmentReader) -> Result { +fn collect_segment( + collector: &C, + weight: &Weight, + segment_ord: u32, + segment_reader: &SegmentReader, +) -> Result { let mut scorer = weight.scorer(segment_reader)?; let mut segment_collector = collector.for_segment(segment_ord as u32, segment_reader)?; if let Some(delete_bitset) = segment_reader.delete_bitset() { - scorer.for_each(&mut |doc, score| + scorer.for_each(&mut |doc, score| { if !delete_bitset.is_deleted(doc) { segment_collector.collect(doc, score); - }); + } + }); } else { - scorer.for_each(&mut |doc, score| - segment_collector.collect(doc, score)); + scorer.for_each(&mut |doc, score| segment_collector.collect(doc, score)); } Ok(segment_collector.harvest()) } @@ -45,7 +47,7 @@ pub struct Searcher { schema: Schema, index: Index, segment_readers: Vec, - store_readers: Vec + store_readers: Vec, } impl Searcher { @@ -53,15 +55,17 @@ impl Searcher { pub(crate) fn new( schema: Schema, index: Index, - segment_readers: Vec) -> Searcher { - let store_readers = segment_readers.iter() + segment_readers: Vec, + ) -> Searcher { + let store_readers = segment_readers + .iter() .map(|segment_reader| segment_reader.get_store_reader()) .collect(); Searcher { schema, index, segment_readers, - store_readers + store_readers, } } @@ -144,18 +148,26 @@ impl Searcher { /// Also, keep in my multithreading a single query on several /// threads will not improve your throughput. It can actually /// hurt it. It will however, decrease the average response time. - pub fn search_with_executor(&self, - query: &Query, - collector: &C, - executor: &Executor) -> Result { + pub fn search_with_executor( + &self, + query: &Query, + collector: &C, + executor: &Executor, + ) -> Result { let scoring_enabled = collector.requires_scoring(); let weight = query.weight(self, scoring_enabled)?; let segment_readers = self.segment_readers(); - let fruits = executor - .map(|(segment_ord, segment_reader)| { - collect_segment(collector, weight.as_ref(), segment_ord as u32, segment_reader) + let fruits = executor.map( + |(segment_ord, segment_reader)| { + collect_segment( + collector, + weight.as_ref(), + segment_ord as u32, + segment_reader, + ) }, - segment_readers.iter().enumerate())?; + segment_readers.iter().enumerate(), + )?; collector.merge_fruits(fruits) } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index e08148c27..597ecae39 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -4,6 +4,7 @@ use core::InvertedIndexReader; use core::Segment; use core::SegmentComponent; use core::SegmentId; +use directory::ReadOnlySource; use error::TantivyError; use fastfield::DeleteBitSet; use fastfield::FacetReader; @@ -24,7 +25,6 @@ use store::StoreReader; use termdict::TermDictionary; use DocId; use Result; -use directory::ReadOnlySource; /// Entry point to access all of the datastructures of the `Segment` /// @@ -384,7 +384,10 @@ impl SegmentReader { self.fast_fields_composite.space_usage(), self.fieldnorms_composite.space_usage(), self.get_store_reader().space_usage(), - self.delete_bitset_opt.as_ref().map(|x| x.space_usage()).unwrap_or(0), + self.delete_bitset_opt + .as_ref() + .map(|x| x.space_usage()) + .unwrap_or(0), ) } } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 6111bfe25..7ddea8a90 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -127,13 +127,13 @@ mod tests { use common::CompositeFile; use directory::{Directory, RAMDirectory, WritePtr}; use fastfield::FastFieldReader; + use rand::prelude::SliceRandom; use rand::rngs::StdRng; use rand::SeedableRng; use schema::Document; - use rand::prelude::SliceRandom; use schema::Field; - use schema::FAST; use schema::Schema; + use schema::FAST; use std::collections::HashMap; use std::path::Path; diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 4c800403a..663507470 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -387,10 +387,13 @@ impl IndexWriter { let mem_budget = self.heap_size_in_bytes_per_thread; let join_handle: JoinHandle> = thread::Builder::new() - .name(format!("thrd-tantivy-index{}-gen{}", self.worker_id, generation)) - .spawn(move || { + .name(format!( + "thrd-tantivy-index{}-gen{}", + self.worker_id, generation + )).spawn(move || { loop { - let mut document_iterator = document_receiver_clone.clone().into_iter().peekable(); + let mut document_iterator = + document_receiver_clone.clone().into_iter().peekable(); // the peeking here is to avoid // creating a new segment's files diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index f8f34a8d8..2b8cfe1b2 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -641,12 +641,12 @@ mod tests { use core::Index; use futures::Future; use query::AllQuery; - use schema::Facet; use query::BooleanQuery; use query::TermQuery; use schema; use schema::Cardinality; use schema::Document; + use schema::Facet; use schema::IndexRecordOption; use schema::IntOptions; use schema::Term; @@ -749,23 +749,26 @@ mod tests { { assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "a")]), - vec![DocAddress(0,1), DocAddress(0,2), DocAddress(0,4)] + vec![DocAddress(0, 1), DocAddress(0, 2), DocAddress(0, 4)] ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "af")]), - vec![DocAddress(0,0), DocAddress(0,3)] + vec![DocAddress(0, 0), DocAddress(0, 3)] ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "g")]), - vec![DocAddress(0,4)] + vec![DocAddress(0, 4)] ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "b")]), - vec![DocAddress(0,0), - DocAddress(0,1), - DocAddress(0,2), - DocAddress(0,3), - DocAddress(0,4)]); + vec![ + DocAddress(0, 0), + DocAddress(0, 1), + DocAddress(0, 2), + DocAddress(0, 3), + DocAddress(0, 4) + ] + ); } { let doc = searcher.doc(DocAddress(0, 0)).unwrap(); @@ -790,13 +793,17 @@ mod tests { { let get_fast_vals = |terms: Vec| { let query = BooleanQuery::new_multiterms_query(terms); - searcher.search(&query, &FastFieldTestCollector::for_field(score_field)).unwrap() + searcher + .search(&query, &FastFieldTestCollector::for_field(score_field)) + .unwrap() }; let get_fast_vals_bytes = |terms: Vec| { let query = BooleanQuery::new_multiterms_query(terms); searcher - .search(&query, &BytesFastFieldTestCollector::for_field(bytes_score_field)) - .expect("failed to search") + .search( + &query, + &BytesFastFieldTestCollector::for_field(bytes_score_field), + ).expect("failed to search") }; assert_eq!( get_fast_vals(vec![Term::from_field_text(text_field, "a")]), @@ -810,7 +817,6 @@ mod tests { } } - #[test] fn test_index_merger_with_deletes() { let mut schema_builder = schema::Schema::builder(); @@ -829,10 +835,9 @@ mod tests { let collector = FastFieldTestCollector::for_field(score_field); let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field); let term_query = TermQuery::new(term, IndexRecordOption::Basic); - let (scores, bytes) = - searcher - .search(&term_query, &(collector, bytes_collector)) - .unwrap(); + let (scores, bytes) = searcher + .search(&term_query, &(collector, bytes_collector)) + .unwrap(); let mut score_bytes = Cursor::new(bytes); for &score in &scores { assert_eq!(score as u32, score_bytes.read_u32::().unwrap()); @@ -1169,7 +1174,9 @@ mod tests { let searcher = index.searcher(); let mut facet_collector = FacetCollector::for_field(facet_field); facet_collector.add_facet(Facet::from("/top")); - let (count, facet_counts) = searcher.search(&AllQuery, &(Count, facet_collector)).unwrap(); + let (count, facet_counts) = searcher + .search(&AllQuery, &(Count, facet_collector)) + .unwrap(); assert_eq!(count, expected_num_docs); let facets: Vec<(String, u64)> = facet_counts .get("/top") @@ -1228,13 +1235,17 @@ mod tests { index_writer.delete_term(facet_term); index_writer.commit().unwrap(); index.load_searchers().unwrap(); - test_searcher(9, &[ + test_searcher( + 9, + &[ ("/top/a", 3), ("/top/b", 3), ("/top/c", 1), ("/top/d", 2), ("/top/e", 2), - ("/top/f", 1)]); + ("/top/f", 1), + ], + ); } } @@ -1347,7 +1358,14 @@ mod tests { assert_eq!(&vals, &[17]); } - println!("{:?}", searcher.segment_readers().iter().map(|reader| reader.max_doc()).collect::>()); + println!( + "{:?}", + searcher + .segment_readers() + .iter() + .map(|reader| reader.max_doc()) + .collect::>() + ); { let segment = searcher.segment_reader(1u32); @@ -1384,7 +1402,14 @@ mod tests { { let searcher = index.searcher(); - println!("{:?}", searcher.segment_readers().iter().map(|reader| reader.max_doc()).collect::>()); + println!( + "{:?}", + searcher + .segment_readers() + .iter() + .map(|reader| reader.max_doc()) + .collect::>() + ); let segment = searcher.segment_reader(0u32); let ff_reader = segment.multi_fast_field_reader(int_field).unwrap(); @@ -1409,7 +1434,6 @@ mod tests { ff_reader.get_vals(6, &mut vals); assert_eq!(&vals, &[17]); - ff_reader.get_vals(7, &mut vals); assert_eq!(&vals, &[28, 27]); @@ -1418,7 +1442,6 @@ mod tests { ff_reader.get_vals(9, &mut vals); assert_eq!(&vals, &[20]); - } } } diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 283657ffe..80651a585 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -242,9 +242,7 @@ impl SegmentUpdater { // from the different drives. // // Segment 1 from disk 1, Segment 1 from disk 2, etc. - commited_segment_metas.sort_by_key(|segment_meta| { - -(segment_meta.max_doc() as i32) - }); + commited_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32)); save_metas( commited_segment_metas, index.schema(), diff --git a/src/lib.rs b/src/lib.rs index dee843a5e..312c477e5 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,8 +153,6 @@ extern crate tempdir; extern crate tempfile; extern crate uuid; - - #[cfg(test)] #[macro_use] extern crate matches; @@ -297,16 +295,16 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId); #[cfg(test)] mod tests { - use DocAddress; use collector::tests::TestCollector; use core::SegmentReader; use docset::DocSet; use query::BooleanQuery; use rand::distributions::Bernoulli; use rand::distributions::Uniform; - use rand::{Rng, SeedableRng}; use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; use schema::*; + use DocAddress; use Index; use IndexWriter; use Postings; @@ -794,24 +792,23 @@ mod tests { ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "b")]), - vec![DocAddress(0,0), DocAddress(0,1), DocAddress(0,2)] + vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "c")]), - vec![DocAddress(0,1), DocAddress(0,2)] + vec![DocAddress(0, 1), DocAddress(0, 2)] ); assert_eq!( get_doc_ids(vec![Term::from_field_text(text_field, "d")]), - vec![DocAddress(0,2)] + vec![DocAddress(0, 2)] ); assert_eq!( get_doc_ids(vec![ Term::from_field_text(text_field, "b"), Term::from_field_text(text_field, "a"), ]), - vec![DocAddress(0,0), DocAddress(0,1), DocAddress(0,2)] + vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] ); - } } diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs index 6458461ac..4da8c1c25 100644 --- a/src/postings/compression/mod.rs +++ b/src/postings/compression/mod.rs @@ -271,12 +271,9 @@ mod bench { use test::Bencher; fn generate_array_with_seed(n: usize, ratio: f64, seed_val: u8) -> Vec { - let seed: &[u8; 16] = &[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,seed_val]; + let seed: &[u8; 16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, seed_val]; let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); - (0u32..) - .filter(|_| rng.gen_bool(ratio)) - .take(n) - .collect() + (0u32..).filter(|_| rng.gen_bool(ratio)).take(n).collect() } pub fn generate_array(n: usize, ratio: f64) -> Vec { diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 70484668a..d84870517 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -54,8 +54,8 @@ pub mod tests { use indexer::operation::AddOperation; use indexer::SegmentWriter; use query::Scorer; - use rand::{Rng, SeedableRng}; use rand::rngs::StdRng; + use rand::{Rng, SeedableRng}; use schema::Field; use schema::IndexRecordOption; use schema::{Document, Schema, Term, INT_INDEXED, STRING, TEXT}; diff --git a/src/postings/stacker/expull.rs b/src/postings/stacker/expull.rs index 02cf858d8..9c345a087 100644 --- a/src/postings/stacker/expull.rs +++ b/src/postings/stacker/expull.rs @@ -174,8 +174,8 @@ mod tests { #[cfg(all(test, feature = "unstable"))] mod bench { - use super::ExpUnrolledLinkedList; use super::super::MemoryArena; + use super::ExpUnrolledLinkedList; use test::Bencher; const NUM_STACK: usize = 10_000; diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 3c1cf070e..e0963d605 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -19,7 +19,7 @@ where impl AutomatonWeight where - A: Automaton + Send + Sync + 'static + A: Automaton + Send + Sync + 'static, { /// Create a new AutomationWeight pub fn new(field: Field, automaton: A) -> AutomatonWeight { @@ -33,7 +33,8 @@ where } impl Weight for AutomatonWeight -where A: Automaton + Send + Sync + 'static +where + A: Automaton + Send + Sync + 'static, { fn scorer(&self, reader: &SegmentReader) -> Result> { let max_doc = reader.max_doc(); diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index b3f47ed25..544e615fc 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -19,8 +19,8 @@ mod tests { use query::Scorer; use query::TermQuery; use schema::*; - use Index; use DocId; + use Index; fn aux_test_helper() -> (Index, Field) { let mut schema_builder = Schema::builder(); @@ -132,7 +132,8 @@ mod tests { let matching_docs = |boolean_query: &Query| { let searcher = index.searcher(); let test_docs = searcher.search(boolean_query, &TestCollector).unwrap(); - test_docs.docs() + test_docs + .docs() .iter() .cloned() .map(|doc| doc.1) diff --git a/src/query/fuzzy_query.rs b/src/query/fuzzy_query.rs index 1d62ab9c0..6539929bf 100644 --- a/src/query/fuzzy_query.rs +++ b/src/query/fuzzy_query.rs @@ -147,7 +147,9 @@ mod test { let term = Term::from_field_text(country_field, "japon"); let fuzzy_query = FuzzyTermQuery::new(term, 1, true); - let top_docs = searcher.search(&fuzzy_query, &TopDocs::with_limit(2)).unwrap(); + let top_docs = searcher + .search(&fuzzy_query, &TopDocs::with_limit(2)) + .unwrap(); assert_eq!(top_docs.len(), 1, "Expected only 1 document"); let (score, _) = top_docs[0]; assert_nearly_equals(1f32, score); diff --git a/src/query/mod.rs b/src/query/mod.rs index 394aa4e2c..cf8b6dcbd 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -56,11 +56,11 @@ pub use self::weight::Weight; #[cfg(test)] mod tests { - use Index; - use schema::{Schema, TEXT}; use query::QueryParser; - use Term; + use schema::{Schema, TEXT}; use std::collections::BTreeSet; + use Index; + use Term; #[test] fn test_query_terms() { @@ -73,33 +73,48 @@ mod tests { let term_b = Term::from_field_text(text_field, "b"); { let mut terms_set: BTreeSet = BTreeSet::new(); - query_parser.parse_query("a").unwrap().query_terms(&mut terms_set); + query_parser + .parse_query("a") + .unwrap() + .query_terms(&mut terms_set); let terms: Vec<&Term> = terms_set.iter().collect(); assert_eq!(vec![&term_a], terms); } { let mut terms_set: BTreeSet = BTreeSet::new(); - query_parser.parse_query("a b").unwrap().query_terms(&mut terms_set); + query_parser + .parse_query("a b") + .unwrap() + .query_terms(&mut terms_set); let terms: Vec<&Term> = terms_set.iter().collect(); assert_eq!(vec![&term_a, &term_b], terms); } { let mut terms_set: BTreeSet = BTreeSet::new(); - query_parser.parse_query("\"a b\"").unwrap().query_terms(&mut terms_set); + query_parser + .parse_query("\"a b\"") + .unwrap() + .query_terms(&mut terms_set); let terms: Vec<&Term> = terms_set.iter().collect(); assert_eq!(vec![&term_a, &term_b], terms); } { let mut terms_set: BTreeSet = BTreeSet::new(); - query_parser.parse_query("a a a a a").unwrap().query_terms(&mut terms_set); + query_parser + .parse_query("a a a a a") + .unwrap() + .query_terms(&mut terms_set); let terms: Vec<&Term> = terms_set.iter().collect(); assert_eq!(vec![&term_a], terms); } { let mut terms_set: BTreeSet = BTreeSet::new(); - query_parser.parse_query("a -b").unwrap().query_terms(&mut terms_set); + query_parser + .parse_query("a -b") + .unwrap() + .query_terms(&mut terms_set); let terms: Vec<&Term> = terms_set.iter().collect(); assert_eq!(vec![&term_a, &term_b], terms); } } -} \ No newline at end of file +} diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index d374e5371..690ccdd39 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -15,8 +15,8 @@ mod tests { use error::TantivyError; use schema::{Schema, Term, TEXT}; use tests::assert_nearly_equals; - use DocId; use DocAddress; + use DocId; fn create_index(texts: &[&'static str]) -> Index { let mut schema_builder = Schema::builder(); @@ -57,7 +57,8 @@ mod tests { let test_fruits = searcher .search(&phrase_query, &TestCollector) .expect("search should succeed"); - test_fruits.docs() + test_fruits + .docs() .iter() .map(|docaddr| docaddr.1) .collect::>() @@ -125,7 +126,8 @@ mod tests { searcher .search(&phrase_query, &TestCollector) .expect("search should succeed") - .scores().to_vec() + .scores() + .to_vec() }; let scores = test_query(vec!["a", "b"]); assert_nearly_equals(scores[0], 0.40618482); @@ -160,8 +162,8 @@ mod tests { .docs() .to_vec() }; - assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress(0,1)]); - assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress(0,2)]); + assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress(0, 1)]); + assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress(0, 2)]); } #[test] // motivated by #234 diff --git a/src/query/range_query.rs b/src/query/range_query.rs index e5ebe896d..bf5b37967 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -209,12 +209,16 @@ impl RangeQuery { /// Lower bound of range pub fn left_bound(&self) -> Bound { - map_bound(&self.left_bound, &|bytes| Term::from_field_bytes(self.field, bytes)) + map_bound(&self.left_bound, &|bytes| { + Term::from_field_bytes(self.field, bytes) + }) } /// Upper bound of range pub fn right_bound(&self) -> Bound { - map_bound(&self.right_bound, &|bytes| Term::from_field_bytes(self.field, bytes)) + map_bound(&self.right_bound, &|bytes| { + Term::from_field_bytes(self.field, bytes) + }) } } @@ -353,9 +357,8 @@ mod tests { } index.load_searchers().unwrap(); let searcher = index.searcher(); - let count_multiples = |range_query: RangeQuery| { - searcher.search(&range_query, &Count).unwrap() - }; + let count_multiples = + |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap(); assert_eq!(count_multiples(RangeQuery::new_i64(int_field, 10..11)), 9); assert_eq!( diff --git a/src/query/regex_query.rs b/src/query/regex_query.rs index 412ea56c8..caa8f080a 100644 --- a/src/query/regex_query.rs +++ b/src/query/regex_query.rs @@ -113,13 +113,16 @@ mod test { { let regex_query = RegexQuery::new("jap[ao]n".to_string(), country_field); let scored_docs = searcher - .search(®ex_query, &TopDocs::with_limit(2)).unwrap(); + .search(®ex_query, &TopDocs::with_limit(2)) + .unwrap(); assert_eq!(scored_docs.len(), 1, "Expected only 1 document"); let (score, _) = scored_docs[0]; assert_nearly_equals(1f32, score); } let regex_query = RegexQuery::new("jap[A-Z]n".to_string(), country_field); - let top_docs = searcher.search(®ex_query, &TopDocs::with_limit(2)).unwrap(); + let top_docs = searcher + .search(®ex_query, &TopDocs::with_limit(2)) + .unwrap(); assert!(top_docs.is_empty(), "Expected ZERO document"); } } diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index e0a200eb2..04bd97ecd 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -1,4 +1,5 @@ - mod term_query; + +mod term_query; mod term_scorer; mod term_weight; @@ -9,13 +10,13 @@ pub use self::term_weight::TermWeight; #[cfg(test)] mod tests { + use collector::TopDocs; use docset::DocSet; use query::{Query, QueryParser, Scorer, TermQuery}; use schema::{IndexRecordOption, Schema, STRING, TEXT}; use tests::assert_nearly_equals; use Index; use Term; - use collector::TopDocs; #[test] pub fn test_term_query_no_freq() { @@ -70,7 +71,9 @@ mod tests { { let term = Term::from_field_text(left_field, "left2"); let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs); - let topdocs = searcher.search(&term_query,&TopDocs::with_limit(2)).unwrap(); + let topdocs = searcher + .search(&term_query, &TopDocs::with_limit(2)) + .unwrap(); assert_eq!(topdocs.len(), 1); let (score, _) = topdocs[0]; assert_nearly_equals(0.77802235, score); @@ -78,7 +81,9 @@ mod tests { { let term = Term::from_field_text(left_field, "left1"); let term_query = TermQuery::new(term, IndexRecordOption::WithFreqs); - let top_docs = searcher.search(&term_query, &TopDocs::with_limit(2)).unwrap(); + let top_docs = searcher + .search(&term_query, &TopDocs::with_limit(2)) + .unwrap(); assert_eq!(top_docs.len(), 2); let (score1, _) = top_docs[0]; assert_nearly_equals(0.27101856, score1); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 12112e82f..14fdb1763 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -121,7 +121,6 @@ impl SchemaBuilder { } } - struct InnerSchema { fields: Vec, fields_map: HashMap, // transient @@ -135,7 +134,6 @@ impl PartialEq for InnerSchema { impl Eq for InnerSchema {} - /// Tantivy has a very strict schema. /// You need to specify in advance, whether a field is indexed or not, /// stored or not, and RAM-based or not. diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 4bcfc39eb..c94b62656 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -255,8 +255,7 @@ impl SnippetGenerator { } else { None } - }) - .collect(); + }).collect(); let tokenizer = searcher.index().tokenizer_for_field(field)?; Ok(SnippetGenerator { terms_text, @@ -328,8 +327,6 @@ to the project are from community members.[15] Rust won first place for "most loved programming language" in the Stack Overflow Developer Survey in 2016, 2017, and 2018."#; - - #[test] fn test_snippet() { let boxed_tokenizer = box_tokenizer(SimpleTokenizer); @@ -345,13 +342,18 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(first.stop_offset, 89); } let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT); - assert_eq!(snippet.fragments, "Rust is a systems programming language sponsored by \ - Mozilla which\ndescribes it as a \"safe"); - assert_eq!(snippet.to_html(), "Rust is a systems programming language \ - sponsored by Mozilla which\ndescribes it as a "safe") + assert_eq!( + snippet.fragments, + "Rust is a systems programming language sponsored by \ + Mozilla which\ndescribes it as a \"safe" + ); + assert_eq!( + snippet.to_html(), + "Rust is a systems programming language \ + sponsored by Mozilla which\ndescribes it as a "safe" + ) } - #[test] fn test_snippet_scored_fragment() { let boxed_tokenizer = box_tokenizer(SimpleTokenizer); @@ -385,10 +387,8 @@ Survey in 2016, 2017, and 2018."#; let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT); assert_eq!(snippet.to_html(), "programming language") } - } - #[test] fn test_snippet_in_second_fragment() { let boxed_tokenizer = box_tokenizer(SimpleTokenizer); @@ -495,7 +495,6 @@ Survey in 2016, 2017, and 2018."#; assert_eq!(snippet.to_html(), ""); } - #[test] fn test_snippet_generator_term_score() { let mut schema_builder = Schema::builder(); @@ -521,17 +520,26 @@ Survey in 2016, 2017, and 2018."#; { let query = query_parser.parse_query("a").unwrap(); let snippet_generator = SnippetGenerator::new(&searcher, &*query, text_field).unwrap(); - assert_eq!(&btreemap!("a".to_string() => 0.25f32), snippet_generator.terms_text()); + assert_eq!( + &btreemap!("a".to_string() => 0.25f32), + snippet_generator.terms_text() + ); } { let query = query_parser.parse_query("a b").unwrap(); let snippet_generator = SnippetGenerator::new(&searcher, &*query, text_field).unwrap(); - assert_eq!(&btreemap!("a".to_string() => 0.25f32, "b".to_string() => 0.5), snippet_generator.terms_text()); + assert_eq!( + &btreemap!("a".to_string() => 0.25f32, "b".to_string() => 0.5), + snippet_generator.terms_text() + ); } { let query = query_parser.parse_query("a b c").unwrap(); let snippet_generator = SnippetGenerator::new(&searcher, &*query, text_field).unwrap(); - assert_eq!(&btreemap!("a".to_string() => 0.25f32, "b".to_string() => 0.5), snippet_generator.terms_text()); + assert_eq!( + &btreemap!("a".to_string() => 0.25f32, "b".to_string() => 0.5), + snippet_generator.terms_text() + ); } } diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs index cf1bef206..35f175c6c 100644 --- a/src/space_usage/mod.rs +++ b/src/space_usage/mod.rs @@ -117,8 +117,8 @@ impl SegmentSpaceUsage { /// Clones the underlying data. /// Use the components directly if this is somehow in performance critical code. pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage { - use SegmentComponent::*; use self::ComponentSpaceUsage::*; + use SegmentComponent::*; match component { POSTINGS => PerField(self.postings().clone()), POSITIONS => PerField(self.positions().clone()), @@ -221,7 +221,7 @@ impl StoreSpaceUsage { #[derive(Clone, Debug, Serialize, Deserialize)] pub struct PerFieldSpaceUsage { fields: HashMap, - total: ByteCount + total: ByteCount, } impl PerFieldSpaceUsage { @@ -265,7 +265,7 @@ impl FieldUsage { } pub(crate) fn add_field_idx(&mut self, idx: usize, size: ByteCount) { - if self.sub_num_bytes.len() < idx + 1{ + if self.sub_num_bytes.len() < idx + 1 { self.sub_num_bytes.resize(idx + 1, None); } assert!(self.sub_num_bytes[idx].is_none()); @@ -292,12 +292,12 @@ impl FieldUsage { #[cfg(test)] mod test { use core::Index; - use schema::Schema; - use schema::{FAST, INT_INDEXED, TEXT}; use schema::Field; + use schema::Schema; + use schema::STORED; + use schema::{FAST, INT_INDEXED, TEXT}; use space_usage::ByteCount; use space_usage::PerFieldSpaceUsage; - use schema::STORED; use Term; #[test] @@ -311,12 +311,20 @@ mod test { assert_eq!(0, searcher_space_usage.total()); } - fn expect_single_field(field_space: &PerFieldSpaceUsage, field: &Field, min_size: ByteCount, max_size: ByteCount) { + fn expect_single_field( + field_space: &PerFieldSpaceUsage, + field: &Field, + min_size: ByteCount, + max_size: ByteCount, + ) { assert!(field_space.total() >= min_size); assert!(field_space.total() <= max_size); assert_eq!( vec![(field, field_space.total())], - field_space.fields().map(|(x,y)| (x, y.total())).collect::>() + field_space + .fields() + .map(|(x, y)| (x, y.total())) + .collect::>() ); } @@ -354,7 +362,7 @@ mod test { expect_single_field(segment.fast_fields(), &name, 1, 512); expect_single_field(segment.fieldnorms(), &name, 1, 512); // TODO: understand why the following fails -// assert_eq!(0, segment.store().total()); + // assert_eq!(0, segment.store().total()); assert_eq!(0, segment.deletes()); } @@ -369,7 +377,9 @@ mod test { let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(name => "hi")); index_writer.add_document(doc!(name => "this is a test")); - index_writer.add_document(doc!(name => "some more documents with some word overlap with the other test")); + index_writer.add_document( + doc!(name => "some more documents with some word overlap with the other test"), + ); index_writer.add_document(doc!(name => "hello hi goodbye")); index_writer.commit().unwrap(); } @@ -392,7 +402,7 @@ mod test { assert_eq!(0, segment.fast_fields().total()); expect_single_field(segment.fieldnorms(), &name, 1, 512); // TODO: understand why the following fails -// assert_eq!(0, segment.store().total()); + // assert_eq!(0, segment.store().total()); assert_eq!(0, segment.deletes()); } @@ -407,7 +417,9 @@ mod test { let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(name => "hi")); index_writer.add_document(doc!(name => "this is a test")); - index_writer.add_document(doc!(name => "some more documents with some word overlap with the other test")); + index_writer.add_document( + doc!(name => "some more documents with some word overlap with the other test"), + ); index_writer.add_document(doc!(name => "hello hi goodbye")); index_writer.commit().unwrap(); } @@ -478,7 +490,7 @@ mod test { assert_eq!(0, segment.fast_fields().total()); expect_single_field(segment.fieldnorms(), &name, 1, 512); // TODO: understand why the following fails -// assert_eq!(0, segment.store().total()); + // assert_eq!(0, segment.store().total()); assert!(segment.deletes() > 0); } -} \ No newline at end of file +} diff --git a/src/store/mod.rs b/src/store/mod.rs index 45c966b83..5f01fb277 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -56,8 +56,8 @@ pub mod tests { use directory::{Directory, RAMDirectory, WritePtr}; use schema::Document; use schema::FieldValue; - use schema::TextOptions; use schema::Schema; + use schema::TextOptions; use std::path::Path; pub fn write_lorem_ipsum_store(writer: WritePtr, num_docs: usize) -> Schema { diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 8f9b5f621..d5a804145 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -157,13 +157,11 @@ pub use self::tokenizer::BoxedTokenizer; pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer}; pub use self::tokenizer_manager::TokenizerManager; - #[cfg(test)] pub mod tests { use super::Token; use super::TokenizerManager; - /// This is a function that can be used in tests and doc tests /// to assert a token's correctness. pub fn assert_token(token: &Token, position: usize, text: &str, from: usize, to: usize) { diff --git a/src/tokenizer/ngram_tokenizer.rs b/src/tokenizer/ngram_tokenizer.rs index ebd1ece98..a0c53b15d 100644 --- a/src/tokenizer/ngram_tokenizer.rs +++ b/src/tokenizer/ngram_tokenizer.rs @@ -108,7 +108,7 @@ impl NgramTokenizer { /// Create a `NGramTokenizer` which generates tokens for all inner ngrams. /// /// This is as opposed to only prefix ngrams . - pub fn all_ngrams(min_gram: usize, max_gram:usize) -> NgramTokenizer { + pub fn all_ngrams(min_gram: usize, max_gram: usize) -> NgramTokenizer { Self::new(min_gram, max_gram, false) } @@ -137,9 +137,10 @@ impl<'a> Tokenizer<'a> for NgramTokenizer { fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { NgramTokenStream { ngram_charidx_iterator: StutteringIterator::new( - CodepointFrontiers::for_str(text), + CodepointFrontiers::for_str(text), self.min_gram, - self.max_gram), + self.max_gram, + ), prefix_only: self.prefix_only, text, token: Token::default(), @@ -172,7 +173,6 @@ impl<'a> TokenStream for NgramTokenStream<'a> { } } - /// This iterator takes an underlying Iterator /// and emits all of the pairs `(a,b)` such that /// a and b are items emitted by the iterator at @@ -190,11 +190,13 @@ struct StutteringIterator { memory: Vec, cursor: usize, - gram_len: usize + gram_len: usize, } impl StutteringIterator - where T: Iterator { +where + T: Iterator, +{ pub fn new(mut underlying: T, min_gram: usize, max_gram: usize) -> StutteringIterator { assert!(min_gram > 0); let memory: Vec = (&mut underlying).take(max_gram + 1).collect(); @@ -222,7 +224,9 @@ impl StutteringIterator } impl Iterator for StutteringIterator - where T: Iterator { +where + T: Iterator, +{ type Item = (usize, usize); fn next(&mut self) -> Option<(usize, usize)> { @@ -230,7 +234,7 @@ impl Iterator for StutteringIterator // we have exhausted all options // starting at `self.memory[self.cursor]`. // - // Time to advance. + // Time to advance. self.gram_len = self.min_gram; if let Some(next_val) = self.underlying.next() { self.memory[self.cursor] = next_val; @@ -252,22 +256,20 @@ impl Iterator for StutteringIterator } } - - /// Emits all of the offsets where a codepoint starts /// or a codepoint ends. /// /// By convention, we emit [0] for the empty string. struct CodepointFrontiers<'a> { s: &'a str, - next_el: Option + next_el: Option, } impl<'a> CodepointFrontiers<'a> { fn for_str(s: &'a str) -> Self { CodepointFrontiers { s, - next_el: Some(0) + next_el: Some(0), } } } @@ -276,26 +278,20 @@ impl<'a> Iterator for CodepointFrontiers<'a> { type Item = usize; fn next(&mut self) -> Option { - self.next_el - .map(|offset| { - if self.s.is_empty() { - self.next_el = None; - } else { - let first_codepoint_width = utf8_codepoint_width(self.s.as_bytes()[0]); - self.s = &self.s[first_codepoint_width..]; - self.next_el = Some(offset + first_codepoint_width); - } - offset - }) + self.next_el.map(|offset| { + if self.s.is_empty() { + self.next_el = None; + } else { + let first_codepoint_width = utf8_codepoint_width(self.s.as_bytes()[0]); + self.s = &self.s[first_codepoint_width..]; + self.next_el = Some(offset + first_codepoint_width); + } + offset + }) } } -const CODEPOINT_UTF8_WIDTH: [u8; 16] = [ - 1, 1, 1, 1, - 1, 1, 1, 1, - 2, 2, 2, 2, - 2, 2, 3, 4, -]; +const CODEPOINT_UTF8_WIDTH: [u8; 16] = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4]; // Number of bytes to encode a codepoint in UTF-8 given // the first byte. @@ -309,13 +305,13 @@ fn utf8_codepoint_width(b: u8) -> usize { #[cfg(test)] mod tests { - use tokenizer::tokenizer::{TokenStream, Tokenizer}; - use super::NgramTokenizer; - use tokenizer::Token; - use tokenizer::tests::assert_token; - use super::CodepointFrontiers; - use super::StutteringIterator; use super::utf8_codepoint_width; + use super::CodepointFrontiers; + use super::NgramTokenizer; + use super::StutteringIterator; + use tokenizer::tests::assert_token; + use tokenizer::tokenizer::{TokenStream, Tokenizer}; + use tokenizer::Token; fn test_helper(mut tokenizer: T) -> Vec { let mut tokens: Vec = vec![]; @@ -323,7 +319,6 @@ mod tests { tokens } - #[test] fn test_utf8_codepoint_width() { // 0xxx @@ -344,17 +339,16 @@ mod tests { } } - #[test] fn test_codepoint_frontiers() { assert_eq!(CodepointFrontiers::for_str("").collect::>(), vec![0]); assert_eq!( CodepointFrontiers::for_str("abcd").collect::>(), - vec![0,1,2,3,4] + vec![0, 1, 2, 3, 4] ); assert_eq!( - CodepointFrontiers::for_str("aあ").collect::>(), - vec![0,1,4] + CodepointFrontiers::for_str("aあ").collect::>(), + vec![0, 1, 4] ); } @@ -425,7 +419,6 @@ mod tests { assert!(tokens.is_empty()); } - #[test] #[should_panic(expected = "min_gram must be greater than 0")] fn test_ngram_min_max_interval_empty() { @@ -438,7 +431,6 @@ mod tests { NgramTokenizer::all_ngrams(2, 1); } - #[test] fn test_stutterring_iterator_empty() { let rg: Vec = vec![0]; @@ -470,4 +462,4 @@ mod tests { assert_eq!(it.next(), None); } -} \ No newline at end of file +}