From 37e4280c0a62943f70b3cfbf83c72fd10e494973 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sat, 15 Sep 2018 07:44:22 +0900 Subject: [PATCH] Cargo Format (#420) --- examples/basic_search.rs | 3 - examples/custom_tokenizer.rs | 2 - examples/deleting_updating_documents.rs | 9 +- examples/faceted_search.rs | 66 +++++----- examples/iterating_docs_and_positions.rs | 20 +-- examples/snippet.rs | 2 +- examples/stop_words.rs | 110 ++++++++-------- src/collector/facet_collector.rs | 25 ++-- src/collector/top_collector.rs | 6 +- src/collector/top_field_collector.rs | 18 +-- src/collector/top_score_collector.rs | 4 +- src/common/composite_file.rs | 3 +- src/common/vint.rs | 16 +-- src/core/index.rs | 72 +++++------ src/core/inverted_index_reader.rs | 10 +- src/core/pool.rs | 6 +- src/core/searcher.rs | 17 ++- src/core/segment_reader.rs | 19 ++- src/directory/directory.rs | 12 +- src/directory/managed_directory.rs | 15 ++- src/directory/mmap_directory.rs | 6 +- src/directory/ram_directory.rs | 10 +- src/directory/read_only_source.rs | 1 - src/error.rs | 16 ++- src/fastfield/delete.rs | 3 +- src/fastfield/facet_reader.rs | 3 +- src/fastfield/multivalued/writer.rs | 4 +- src/functional_test.rs | 2 +- src/indexer/delete_queue.rs | 12 +- src/indexer/directory_lock.rs | 51 +++----- src/indexer/index_writer.rs | 13 +- src/indexer/merge_policy.rs | 12 +- src/indexer/merger.rs | 3 +- src/indexer/segment_register.rs | 3 +- src/lib.rs | 5 +- src/positions/mod.rs | 28 ++-- src/positions/reader.rs | 61 +++++---- src/positions/serializer.rs | 11 +- src/postings/compression/mod.rs | 27 ++-- src/postings/compression/vint.rs | 12 +- src/postings/mod.rs | 17 ++- src/postings/postings_writer.rs | 3 +- src/postings/recorder.rs | 3 +- src/postings/segment_postings.rs | 156 +++++++++++++---------- src/postings/serializer.rs | 48 ++++--- src/postings/skip.rs | 27 ++-- src/query/boolean_query/boolean_query.rs | 6 +- src/query/empty_query.rs | 10 +- src/query/mod.rs | 4 +- src/query/occur.rs | 2 +- src/query/phrase_query/phrase_query.rs | 2 +- src/query/phrase_query/phrase_scorer.rs | 3 +- src/query/phrase_query/phrase_weight.rs | 1 - src/query/query.rs | 2 +- src/query/query_parser/query_grammar.rs | 42 +++--- src/query/query_parser/query_parser.rs | 121 +++++++++--------- src/query/query_parser/user_input_ast.rs | 24 ++-- src/query/range_query.rs | 5 +- src/query/scorer.rs | 1 - src/query/term_query/term_query.rs | 2 +- src/schema/schema.rs | 5 +- src/snippet/mod.rs | 82 ++++++------ src/store/mod.rs | 8 +- src/store/skiplist/skiplist_builder.rs | 3 +- src/termdict/term_info_store.rs | 1 - src/termdict/termdict.rs | 3 +- src/tokenizer/lower_caser.rs | 29 +++-- src/tokenizer/mod.rs | 2 +- src/tokenizer/raw_tokenizer.rs | 2 +- src/tokenizer/token_stream_chain.rs | 13 +- src/tokenizer/tokenizer.rs | 2 +- 71 files changed, 697 insertions(+), 650 deletions(-) diff --git a/examples/basic_search.rs b/examples/basic_search.rs index 35867b2f0..1aba7bf3f 100644 --- a/examples/basic_search.rs +++ b/examples/basic_search.rs @@ -10,7 +10,6 @@ // - search for the best document matchings "sea whale" // - retrieve the best document original content. - extern crate tempdir; // --- @@ -235,9 +234,7 @@ fn main() -> tantivy::Result<()> { println!("{}", schema.to_json(&retrieved_doc)); } - Ok(()) } - use tempdir::TempDir; diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs index e44b82c57..7c5299e00 100644 --- a/examples/custom_tokenizer.rs +++ b/examples/custom_tokenizer.rs @@ -3,7 +3,6 @@ // In this example, we'll see how to define a tokenizer pipeline // by aligning a bunch of `TokenFilter`. - #[macro_use] extern crate tantivy; use tantivy::collector::TopCollector; @@ -12,7 +11,6 @@ use tantivy::schema::*; use tantivy::tokenizer::NgramTokenizer; use tantivy::Index; - fn main() -> tantivy::Result<()> { // # Defining the schema // diff --git a/examples/deleting_updating_documents.rs b/examples/deleting_updating_documents.rs index 9ddb38a59..de0603392 100644 --- a/examples/deleting_updating_documents.rs +++ b/examples/deleting_updating_documents.rs @@ -11,10 +11,9 @@ #[macro_use] extern crate tantivy; use tantivy::collector::TopCollector; +use tantivy::query::TermQuery; use tantivy::schema::*; use tantivy::Index; -use tantivy::query::TermQuery; - // A simple helper function to fetch a single document // given its id from our index. @@ -31,7 +30,7 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result tantivy::Result tantivy::Result<()> { - // # Defining the schema // // Check out the *basic_search* example if this makes @@ -126,7 +124,6 @@ fn main() -> tantivy::Result<()> { isbn => "978-9176370711", )); - // You are guaranteed that your clients will only observe your index in // the state it was in after a commit. // In this example, your search engine will at no point be missing the *Frankenstein* document. @@ -143,4 +140,4 @@ fn main() -> tantivy::Result<()> { ); Ok(()) -} \ No newline at end of file +} diff --git a/examples/faceted_search.rs b/examples/faceted_search.rs index 76d167778..24fd536e8 100644 --- a/examples/faceted_search.rs +++ b/examples/faceted_search.rs @@ -22,60 +22,60 @@ use tantivy::schema::*; use tantivy::Index; fn main() -> tantivy::Result<()> { - // Let's create a temporary directory for the - // sake of this example - let index_path = TempDir::new("tantivy_facet_example_dir")?; - let mut schema_builder = SchemaBuilder::default(); + // Let's create a temporary directory for the + // sake of this example + let index_path = TempDir::new("tantivy_facet_example_dir")?; + let mut schema_builder = SchemaBuilder::default(); - schema_builder.add_text_field("name", TEXT | STORED); + schema_builder.add_text_field("name", TEXT | STORED); - // this is our faceted field - schema_builder.add_facet_field("tags"); + // this is our faceted field + schema_builder.add_facet_field("tags"); - let schema = schema_builder.build(); + let schema = schema_builder.build(); - let index = Index::create_in_dir(&index_path, schema.clone())?; + let index = Index::create_in_dir(&index_path, schema.clone())?; - let mut index_writer = index.writer(50_000_000)?; + let mut index_writer = index.writer(50_000_000)?; - let name = schema.get_field("name").unwrap(); - let tags = schema.get_field("tags").unwrap(); + let name = schema.get_field("name").unwrap(); + let tags = schema.get_field("tags").unwrap(); - // For convenience, tantivy also comes with a macro to - // reduce the boilerplate above. - index_writer.add_document(doc!( + // For convenience, tantivy also comes with a macro to + // reduce the boilerplate above. + index_writer.add_document(doc!( name => "the ditch", tags => Facet::from("/pools/north") )); - index_writer.add_document(doc!( + index_writer.add_document(doc!( name => "little stacey", tags => Facet::from("/pools/south") )); - index_writer.commit()?; + index_writer.commit()?; - index.load_searchers()?; + index.load_searchers()?; - let searcher = index.searcher(); + let searcher = index.searcher(); - let mut facet_collector = FacetCollector::for_field(tags); - facet_collector.add_facet("/pools"); + let mut facet_collector = FacetCollector::for_field(tags); + facet_collector.add_facet("/pools"); - searcher.search(&AllQuery, &mut facet_collector).unwrap(); + searcher.search(&AllQuery, &mut facet_collector).unwrap(); - let counts = facet_collector.harvest(); - // This lists all of the facet counts - let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect(); - assert_eq!( - facets, - vec![ - (&Facet::from("/pools/north"), 1), - (&Facet::from("/pools/south"), 1) - ] - ); + let counts = facet_collector.harvest(); + // This lists all of the facet counts + let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect(); + assert_eq!( + facets, + vec![ + (&Facet::from("/pools/north"), 1), + (&Facet::from("/pools/south"), 1), + ] + ); - Ok(()) + Ok(()) } use tempdir::TempDir; diff --git a/examples/iterating_docs_and_positions.rs b/examples/iterating_docs_and_positions.rs index 9d3937617..0434f58c8 100644 --- a/examples/iterating_docs_and_positions.rs +++ b/examples/iterating_docs_and_positions.rs @@ -7,18 +7,15 @@ // the list of documents containing a term, getting // its term frequency, and accessing its positions. - // --- // Importing tantivy... #[macro_use] extern crate tantivy; use tantivy::schema::*; use tantivy::Index; -use tantivy::{DocSet, DocId, Postings}; +use tantivy::{DocId, DocSet, Postings}; fn main() -> tantivy::Result<()> { - - // We first create a schema for the sake of the // example. Check the `basic_search` example for more information. let mut schema_builder = SchemaBuilder::default(); @@ -47,7 +44,6 @@ fn main() -> tantivy::Result<()> { // there is actually only one segment here, but let's iterate through the list // anyway) for segment_reader in searcher.segment_readers() { - // A segment contains different data structure. // Inverted index stands for the combination of // - the term dictionary @@ -58,19 +54,18 @@ fn main() -> tantivy::Result<()> { // Let's go through all docs containing the term `title:the` and access their position let term_the = Term::from_field_text(title, "the"); - // This segment posting object is like a cursor over the documents matching the term. // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies // and positions. // // If you don't need all this information, you may get better performance by decompressing less // information. - if let Some(mut segment_postings) = inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions) { - + if let Some(mut segment_postings) = + inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions) + { // this buffer will be used to request for positions let mut positions: Vec = Vec::with_capacity(100); while segment_postings.advance() { - // the number of time the term appears in the document. let doc_id: DocId = segment_postings.doc(); //< do not try to access this before calling advance once. @@ -98,7 +93,6 @@ fn main() -> tantivy::Result<()> { } } - // A `Term` is a text token associated with a field. // Let's go through all docs containing the term `title:the` and access their position let term_the = Term::from_field_text(title, "the"); @@ -111,7 +105,6 @@ fn main() -> tantivy::Result<()> { // Also, for some VERY specific high performance use case like an OLAP analysis of logs, // you can get better performance by accessing directly the blocks of doc ids. for segment_reader in searcher.segment_readers() { - // A segment contains different data structure. // Inverted index stands for the combination of // - the term dictionary @@ -124,7 +117,9 @@ fn main() -> tantivy::Result<()> { // // If you don't need all this information, you may get better performance by decompressing less // information. - if let Some(mut block_segment_postings) = inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic) { + if let Some(mut block_segment_postings) = + inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic) + { while block_segment_postings.advance() { // Once again these docs MAY contains deleted documents as well. let docs = block_segment_postings.docs(); @@ -136,4 +131,3 @@ fn main() -> tantivy::Result<()> { Ok(()) } - diff --git a/examples/snippet.rs b/examples/snippet.rs index 814edd62d..a39ba2016 100644 --- a/examples/snippet.rs +++ b/examples/snippet.rs @@ -66,6 +66,6 @@ fn main() -> tantivy::Result<()> { println!("title: {}", doc.get_first(title).unwrap().text().unwrap()); println!("snippet: {}", snippet.to_html()); } - + Ok(()) } diff --git a/examples/stop_words.rs b/examples/stop_words.rs index b131d876c..8945f8614 100644 --- a/examples/stop_words.rs +++ b/examples/stop_words.rs @@ -22,59 +22,59 @@ use tantivy::tokenizer::*; use tantivy::Index; fn main() -> tantivy::Result<()> { - // this example assumes you understand the content in `basic_search` - let mut schema_builder = SchemaBuilder::default(); + // this example assumes you understand the content in `basic_search` + let mut schema_builder = SchemaBuilder::default(); - // This configures your custom options for how tantivy will - // store and process your content in the index; The key - // to note is that we are setting the tokenizer to `stoppy` - // which will be defined and registered below. - let text_field_indexing = TextFieldIndexing::default() - .set_tokenizer("stoppy") - .set_index_option(IndexRecordOption::WithFreqsAndPositions); - let text_options = TextOptions::default() - .set_indexing_options(text_field_indexing) - .set_stored(); + // This configures your custom options for how tantivy will + // store and process your content in the index; The key + // to note is that we are setting the tokenizer to `stoppy` + // which will be defined and registered below. + let text_field_indexing = TextFieldIndexing::default() + .set_tokenizer("stoppy") + .set_index_option(IndexRecordOption::WithFreqsAndPositions); + let text_options = TextOptions::default() + .set_indexing_options(text_field_indexing) + .set_stored(); - // Our first field is title. - schema_builder.add_text_field("title", text_options); + // Our first field is title. + schema_builder.add_text_field("title", text_options); - // Our second field is body. - let text_field_indexing = TextFieldIndexing::default() - .set_tokenizer("stoppy") - .set_index_option(IndexRecordOption::WithFreqsAndPositions); - let text_options = TextOptions::default() - .set_indexing_options(text_field_indexing) - .set_stored(); - schema_builder.add_text_field("body", text_options); + // Our second field is body. + let text_field_indexing = TextFieldIndexing::default() + .set_tokenizer("stoppy") + .set_index_option(IndexRecordOption::WithFreqsAndPositions); + let text_options = TextOptions::default() + .set_indexing_options(text_field_indexing) + .set_stored(); + schema_builder.add_text_field("body", text_options); - let schema = schema_builder.build(); + let schema = schema_builder.build(); - let index = Index::create_in_ram(schema.clone()); + let index = Index::create_in_ram(schema.clone()); - // This tokenizer lowers all of the text (to help with stop word matching) - // then removes all instances of `the` and `and` from the corpus - let tokenizer = SimpleTokenizer - .filter(LowerCaser) - .filter(StopWordFilter::remove(vec![ - "the".to_string(), - "and".to_string(), - ])); + // This tokenizer lowers all of the text (to help with stop word matching) + // then removes all instances of `the` and `and` from the corpus + let tokenizer = SimpleTokenizer + .filter(LowerCaser) + .filter(StopWordFilter::remove(vec![ + "the".to_string(), + "and".to_string(), + ])); - index.tokenizers().register("stoppy", tokenizer); + index.tokenizers().register("stoppy", tokenizer); - let mut index_writer = index.writer(50_000_000)?; + let mut index_writer = index.writer(50_000_000)?; - let title = schema.get_field("title").unwrap(); - let body = schema.get_field("body").unwrap(); + let title = schema.get_field("title").unwrap(); + let body = schema.get_field("body").unwrap(); - index_writer.add_document(doc!( + index_writer.add_document(doc!( title => "The Old Man and the Sea", body => "He was an old man who fished alone in a skiff in the Gulf Stream and \ he had gone eighty-four days now without taking a fish." )); - index_writer.add_document(doc!( + index_writer.add_document(doc!( title => "Of Mice and Men", body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \ bank and runs deep and green. The water is warm too, for it has slipped twinkling \ @@ -86,7 +86,7 @@ fn main() -> tantivy::Result<()> { limbs and branches that arch over the pool" )); - index_writer.add_document(doc!( + index_writer.add_document(doc!( title => "Frankenstein", body => "You will rejoice to hear that no disaster has accompanied the commencement of an \ enterprise which you have regarded with such evil forebodings. I arrived here \ @@ -94,28 +94,28 @@ fn main() -> tantivy::Result<()> { increasing confidence in the success of my undertaking." )); - index_writer.commit()?; + index_writer.commit()?; - index.load_searchers()?; + index.load_searchers()?; - let searcher = index.searcher(); + let searcher = index.searcher(); - let query_parser = QueryParser::for_index(&index, vec![title, body]); + let query_parser = QueryParser::for_index(&index, vec![title, body]); - // stop words are applied on the query as well. - // The following will be equivalent to `title:frankenstein` - let query = query_parser.parse_query("title:\"the Frankenstein\"")?; + // stop words are applied on the query as well. + // The following will be equivalent to `title:frankenstein` + let query = query_parser.parse_query("title:\"the Frankenstein\"")?; - let mut top_collector = TopCollector::with_limit(10); + let mut top_collector = TopCollector::with_limit(10); - searcher.search(&*query, &mut top_collector)?; + searcher.search(&*query, &mut top_collector)?; - let doc_addresses = top_collector.docs(); + let doc_addresses = top_collector.docs(); - for doc_address in doc_addresses { - let retrieved_doc = searcher.doc(&doc_address)?; - println!("{}", schema.to_json(&retrieved_doc)); - } + for doc_address in doc_addresses { + let retrieved_doc = searcher.doc(&doc_address)?; + println!("{}", schema.to_json(&retrieved_doc)); + } - Ok(()) -} \ No newline at end of file + Ok(()) +} diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 6c0bb647d..8e1c95876 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -342,16 +342,19 @@ impl FacetCollector { pub fn harvest(mut self) -> FacetCounts { self.finalize_segment(); - let collapsed_facet_ords: Vec<&[u64]> = self.segment_counters + let collapsed_facet_ords: Vec<&[u64]> = self + .segment_counters .iter() .map(|segment_counter| &segment_counter.facet_ords[..]) .collect(); - let collapsed_facet_counts: Vec<&[u64]> = self.segment_counters + let collapsed_facet_counts: Vec<&[u64]> = self + .segment_counters .iter() .map(|segment_counter| &segment_counter.facet_counts[..]) .collect(); - let facet_streams = self.segment_counters + let facet_streams = self + .segment_counters .iter() .map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream()) .collect::>(); @@ -402,7 +405,8 @@ impl Collector for FacetCollector { fn collect(&mut self, doc: DocId, _: Score) { let facet_reader: &mut FacetReader = unsafe { - &mut *self.ff_reader + &mut *self + .ff_reader .as_ref() .expect("collect() was called before set_segment. This should never happen.") .get() @@ -476,9 +480,8 @@ impl FacetCounts { heap.push(Hit { count, facet }); } - let mut lowest_count: u64 = heap.peek().map(|hit| hit.count) - .unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value - // is never used in that case. + let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value + // is never used in that case. for (facet, count) in it { if count > lowest_count { @@ -619,7 +622,13 @@ mod tests { let doc = doc!(facet_field => facet); iter::repeat(doc).take(count) }) - .map(|mut doc| { doc.add_facet(facet_field, &format!("/facet/{}", thread_rng().sample(&uniform) )); doc}) + .map(|mut doc| { + doc.add_facet( + facet_field, + &format!("/facet/{}", thread_rng().sample(&uniform)), + ); + doc + }) .collect(); thread_rng().shuffle(&mut docs[..]); diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 64d2eee7f..6cb61e8b2 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -1,8 +1,8 @@ +use std::cmp::Ordering; +use std::collections::BinaryHeap; use DocAddress; use DocId; use SegmentLocalId; -use std::cmp::Ordering; -use std::collections::BinaryHeap; /// Contains a feature (field, score, etc.) of a document along with the document address. /// @@ -139,9 +139,9 @@ impl TopCollector { #[cfg(test)] mod tests { + use super::*; use DocId; use Score; - use super::*; #[test] fn test_top_collector_not_at_capacity() { diff --git a/src/collector/top_field_collector.rs b/src/collector/top_field_collector.rs index ec2361b3f..3fb95d21a 100644 --- a/src/collector/top_field_collector.rs +++ b/src/collector/top_field_collector.rs @@ -1,13 +1,13 @@ +use super::Collector; use collector::top_collector::TopCollector; -use DocAddress; -use DocId; use fastfield::FastFieldReader; use fastfield::FastValue; +use schema::Field; +use DocAddress; +use DocId; use Result; use Score; use SegmentReader; -use super::Collector; -use schema::Field; /// The Top Field Collector keeps track of the K documents /// sorted by a fast field in the index @@ -142,16 +142,16 @@ impl Collector for TopFieldCollector { #[cfg(test)] mod tests { - use Index; - use IndexWriter; - use TantivyError; + use super::*; use query::Query; use query::QueryParser; - use schema::{FAST, SchemaBuilder, TEXT}; use schema::Field; use schema::IntOptions; use schema::Schema; - use super::*; + use schema::{SchemaBuilder, FAST, TEXT}; + use Index; + use IndexWriter; + use TantivyError; const TITLE: &str = "title"; const SIZE: &str = "size"; diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 4a8ace86b..68bf114f6 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -1,3 +1,4 @@ +use super::Collector; use collector::top_collector::TopCollector; use DocAddress; use DocId; @@ -5,7 +6,6 @@ use Result; use Score; use SegmentLocalId; use SegmentReader; -use super::Collector; /// The Top Score Collector keeps track of the K documents /// sorted by their score. @@ -131,10 +131,10 @@ impl Collector for TopScoreCollector { #[cfg(test)] mod tests { + use super::*; use collector::Collector; use DocId; use Score; - use super::*; #[test] fn test_top_collector_not_at_capacity() { diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs index 2f3f71a47..e7d657b65 100644 --- a/src/common/composite_file.rs +++ b/src/common/composite_file.rs @@ -72,7 +72,8 @@ impl CompositeWrite { let footer_offset = self.write.written_bytes(); VInt(self.offsets.len() as u64).serialize(&mut self.write)?; - let mut offset_fields: Vec<_> = self.offsets + let mut offset_fields: Vec<_> = self + .offsets .iter() .map(|(file_addr, offset)| (*offset, *file_addr)) .collect(); diff --git a/src/common/vint.rs b/src/common/vint.rs index 308aff1ca..7b782a946 100644 --- a/src/common/vint.rs +++ b/src/common/vint.rs @@ -10,8 +10,6 @@ pub struct VInt(pub u64); const STOP_BIT: u8 = 128; impl VInt { - - pub fn val(&self) -> u64 { self.0 } @@ -20,14 +18,13 @@ impl VInt { VInt::deserialize(reader).map(|vint| vint.0) } - pub fn serialize_into_vec(&self, output: &mut Vec){ + pub fn serialize_into_vec(&self, output: &mut Vec) { let mut buffer = [0u8; 10]; let num_bytes = self.serialize_into(&mut buffer); output.extend(&buffer[0..num_bytes]); } fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize { - let mut remaining = self.0; for (i, b) in buffer.iter_mut().enumerate() { let next_byte: u8 = (remaining % 128u64) as u8; @@ -74,7 +71,6 @@ impl BinarySerializable for VInt { } } - #[cfg(test)] mod tests { @@ -89,10 +85,10 @@ mod tests { } assert!(num_bytes > 0); if num_bytes < 10 { - assert!(1u64 << (7*num_bytes) > val); + assert!(1u64 << (7 * num_bytes) > val); } if num_bytes > 1 { - assert!(1u64 << (7*(num_bytes-1)) <= val); + assert!(1u64 << (7 * (num_bytes - 1)) <= val); } let serdeser_val = VInt::deserialize(&mut &v[..]).unwrap(); assert_eq!(val, serdeser_val.0); @@ -105,11 +101,11 @@ mod tests { aux_test_vint(5); aux_test_vint(u64::max_value()); for i in 1..9 { - let power_of_128 = 1u64 << (7*i); + let power_of_128 = 1u64 << (7 * i); aux_test_vint(power_of_128 - 1u64); - aux_test_vint(power_of_128 ); + aux_test_vint(power_of_128); aux_test_vint(power_of_128 + 1u64); } aux_test_vint(10); } -} \ No newline at end of file +} diff --git a/src/core/index.rs b/src/core/index.rs index a7bef841d..3eafb90cc 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -1,36 +1,36 @@ -use core::SegmentId; -use error::TantivyError; -use schema::Schema; -use serde_json; -use std::borrow::BorrowMut; -use std::fmt; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use Result; -use indexer::LockType; use super::pool::LeasedItem; use super::pool::Pool; use super::segment::create_segment; use super::segment::Segment; use core::searcher::Searcher; use core::IndexMeta; +use core::SegmentId; use core::SegmentMeta; use core::SegmentReader; use core::META_FILEPATH; +use directory::ManagedDirectory; #[cfg(feature = "mmap")] use directory::MmapDirectory; use directory::{Directory, RAMDirectory}; -use directory::{ManagedDirectory}; +use error::TantivyError; use indexer::index_writer::open_index_writer; use indexer::index_writer::HEAP_SIZE_MIN; use indexer::segment_updater::save_new_metas; +use indexer::LockType; use num_cpus; +use schema::Field; +use schema::FieldType; +use schema::Schema; +use serde_json; +use std::borrow::BorrowMut; +use std::fmt; use std::path::Path; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use tokenizer::BoxedTokenizer; use tokenizer::TokenizerManager; use IndexWriter; -use schema::FieldType; -use schema::Field; -use tokenizer::BoxedTokenizer; +use Result; fn load_metas(directory: &Directory) -> Result { let meta_data = directory.atomic_read(&META_FILEPATH)?; @@ -115,31 +115,24 @@ impl Index { &self.tokenizers } - /// Helper to access the tokenizer associated to a specific field. pub fn tokenizer_for_field(&self, field: Field) -> Result> { let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); let tokenizer_manager: &TokenizerManager = self.tokenizers(); - let tokenizer_name_opt: Option> = - match field_type { - FieldType::Str(text_options) => { - text_options - .get_indexing_options() - .map(|text_indexing_options| text_indexing_options.tokenizer().to_string()) - .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)) - }, - _ => { - None - } - }; + let tokenizer_name_opt: Option> = match field_type { + FieldType::Str(text_options) => text_options + .get_indexing_options() + .map(|text_indexing_options| text_indexing_options.tokenizer().to_string()) + .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)), + _ => None, + }; match tokenizer_name_opt { - Some(tokenizer) => { - Ok(tokenizer) - } - None => { - Err(TantivyError:: SchemaError(format!("{:?} is not a text field.", field_entry.name()))) - } + Some(tokenizer) => Ok(tokenizer), + None => Err(TantivyError::SchemaError(format!( + "{:?} is not a text field.", + field_entry.name() + ))), } } @@ -186,7 +179,6 @@ impl Index { num_threads: usize, overall_heap_size_in_bytes: usize, ) -> Result { - let directory_lock = LockType::IndexWriterLock.acquire_lock(&self.directory)?; let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads; open_index_writer( @@ -225,7 +217,8 @@ impl Index { /// Returns the list of segments that are searchable pub fn searchable_segments(&self) -> Result> { - Ok(self.searchable_segment_metas()? + Ok(self + .searchable_segment_metas()? .into_iter() .map(|segment_meta| self.segment(segment_meta)) .collect()) @@ -260,7 +253,8 @@ impl Index { /// Returns the list of segment ids that are searchable. pub fn searchable_segment_ids(&self) -> Result> { - Ok(self.searchable_segment_metas()? + Ok(self + .searchable_segment_metas()? .iter() .map(|segment_meta| segment_meta.id()) .collect()) @@ -332,11 +326,10 @@ impl Clone for Index { } } - #[cfg(test)] mod tests { + use schema::{SchemaBuilder, INT_INDEXED, TEXT}; use Index; - use schema::{SchemaBuilder, TEXT, INT_INDEXED}; #[test] fn test_indexer_for_field() { @@ -352,5 +345,4 @@ mod tests { ); } - -} \ No newline at end of file +} diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index b919e09b0..bb71be1ae 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,13 +1,13 @@ use common::BinarySerializable; use directory::ReadOnlySource; +use owned_read::OwnedRead; +use positions::PositionReader; use postings::TermInfo; use postings::{BlockSegmentPostings, SegmentPostings}; use schema::FieldType; use schema::IndexRecordOption; use schema::Term; use termdict::TermDictionary; -use owned_read::OwnedRead; -use positions::PositionReader; /// The inverted index reader is in charge of accessing /// the inverted index associated to a specific field. @@ -100,7 +100,6 @@ impl InvertedIndexReader { block_postings.reset(term_info.doc_freq, postings_reader); } - /// Returns a block postings given a `Term`. /// This method is for an advanced usage only. /// @@ -111,7 +110,7 @@ impl InvertedIndexReader { option: IndexRecordOption, ) -> Option { self.get_term_info(term) - .map(move|term_info| self.read_block_postings_from_terminfo(&term_info, option)) + .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option)) } /// Returns a block postings given a `term_info`. @@ -147,7 +146,8 @@ impl InvertedIndexReader { if option.has_positions() { let position_reader = self.positions_source.clone(); let skip_reader = self.positions_idx_source.clone(); - let position_reader = PositionReader::new(position_reader, skip_reader, term_info.positions_idx); + let position_reader = + PositionReader::new(position_reader, skip_reader, term_info.positions_idx); Some(position_reader) } else { None diff --git a/src/core/pool.rs b/src/core/pool.rs index 609848317..d8564e46d 100644 --- a/src/core/pool.rs +++ b/src/core/pool.rs @@ -87,7 +87,8 @@ impl Deref for LeasedItem { type Target = T; fn deref(&self) -> &T { - &self.gen_item + &self + .gen_item .as_ref() .expect("Unwrapping a leased item should never fail") .item // unwrap is safe here @@ -96,7 +97,8 @@ impl Deref for LeasedItem { impl DerefMut for LeasedItem { fn deref_mut(&mut self) -> &mut T { - &mut self.gen_item + &mut self + .gen_item .as_mut() .expect("Unwrapping a mut leased item should never fail") .item // unwrap is safe here diff --git a/src/core/searcher.rs b/src/core/searcher.rs index f17df042f..cbe549062 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -9,8 +9,8 @@ use std::fmt; use std::sync::Arc; use termdict::TermMerger; use DocAddress; -use Result; use Index; +use Result; /// Holds a list of `SegmentReader`s ready for search. /// @@ -25,7 +25,11 @@ pub struct Searcher { impl Searcher { /// Creates a new `Searcher` - pub(crate) fn new(schema: Schema, index: Index, segment_readers: Vec) -> Searcher { + pub(crate) fn new( + schema: Schema, + index: Index, + segment_readers: Vec, + ) -> Searcher { Searcher { schema, index, @@ -87,7 +91,8 @@ impl Searcher { /// Return the field searcher associated to a `Field`. pub fn field(&self, field: Field) -> FieldSearcher { - let inv_index_readers = self.segment_readers + let inv_index_readers = self + .segment_readers .iter() .map(|segment_reader| segment_reader.inverted_index(field)) .collect::>(); @@ -107,7 +112,8 @@ impl FieldSearcher { /// Returns a Stream over all of the sorted unique terms of /// for the given field. pub fn terms(&self) -> TermMerger { - let term_streamers: Vec<_> = self.inv_index_readers + let term_streamers: Vec<_> = self + .inv_index_readers .iter() .map(|inverted_index| inverted_index.terms().stream()) .collect(); @@ -117,7 +123,8 @@ impl FieldSearcher { impl fmt::Debug for Searcher { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let segment_ids = self.segment_readers + let segment_ids = self + .segment_readers .iter() .map(|segment_reader| segment_reader.segment_id()) .collect::>(); diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 56a3a7b9e..dff6cca48 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -157,11 +157,13 @@ impl SegmentReader { &FieldType::Bytes => {} _ => return Err(FastFieldNotAvailableError::new(field_entry)), } - let idx_reader = self.fast_fields_composite + let idx_reader = self + .fast_fields_composite .open_read_with_idx(field, 0) .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) .map(FastFieldReader::open)?; - let values = self.fast_fields_composite + let values = self + .fast_fields_composite .open_read_with_idx(field, 1) .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?; Ok(BytesFastFieldReader::open(idx_reader, values)) @@ -285,7 +287,8 @@ impl SegmentReader { /// term dictionary associated to a specific field, /// and opening the posting list associated to any term. pub fn inverted_index(&self, field: Field) -> Arc { - if let Some(inv_idx_reader) = self.inv_idx_reader_cache + if let Some(inv_idx_reader) = self + .inv_idx_reader_cache .read() .expect("Lock poisoned. This should never happen") .get(&field) @@ -314,15 +317,18 @@ impl SegmentReader { let postings_source = postings_source_opt.unwrap(); - let termdict_source = self.termdict_composite + let termdict_source = self + .termdict_composite .open_read(field) .expect("Failed to open field term dictionary in composite file. Is the field indexed"); - let positions_source = self.positions_composite + let positions_source = self + .positions_composite .open_read(field) .expect("Index corrupted. Failed to open field positions in composite file."); - let positions_idx_source = self.positions_idx_composite + let positions_idx_source = self + .positions_idx_composite .open_read(field) .expect("Index corrupted. Failed to open field positions in composite file."); @@ -435,7 +441,6 @@ mod test { use schema::{SchemaBuilder, Term, STORED, TEXT}; use DocId; - #[test] fn test_alive_docs_iterator() { let mut schema_builder = SchemaBuilder::new(); diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 596cdc492..0f99be74b 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -77,15 +77,15 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// DirectoryClone pub trait DirectoryClone { - /// Clones the directory and boxes the clone - fn box_clone(&self) -> Box; + /// Clones the directory and boxes the clone + fn box_clone(&self) -> Box; } impl DirectoryClone for T where - T: 'static + Directory + Clone, + T: 'static + Directory + Clone, { - fn box_clone(&self) -> Box { - Box::new(self.clone()) - } + fn box_clone(&self) -> Box { + Box::new(self.clone()) + } } diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index e5510d113..7a9a8bd15 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -2,6 +2,7 @@ use core::MANAGED_FILEPATH; use directory::error::{DeleteError, IOError, OpenReadError, OpenWriteError}; use directory::{ReadOnlySource, WritePtr}; use error::TantivyError; +use indexer::LockType; use serde_json; use std::collections::HashSet; use std::io; @@ -12,9 +13,6 @@ use std::sync::RwLockWriteGuard; use std::sync::{Arc, RwLock}; use Directory; use Result; -use indexer::LockType; - - /// Returns true iff the file is "managed". /// Non-managed file are not subject to garbage collection. @@ -108,7 +106,8 @@ impl ManagedDirectory { // // releasing the lock as .delete() will use it too. { - let meta_informations_rlock = self.meta_informations + let meta_informations_rlock = self + .meta_informations .read() .expect("Managed directory rlock poisoned in garbage collect."); @@ -157,7 +156,8 @@ impl ManagedDirectory { if !deleted_files.is_empty() { // update the list of managed files by removing // the file that were removed. - let mut meta_informations_wlock = self.meta_informations + let mut meta_informations_wlock = self + .meta_informations .write() .expect("Managed directory wlock poisoned (2)."); { @@ -186,9 +186,10 @@ impl ManagedDirectory { fn register_file_as_managed(&mut self, filepath: &Path) -> io::Result<()> { // Files starting by "." (e.g. lock files) are not managed. if !is_managed(filepath) { - return Ok(()); + return Ok(()); } - let mut meta_wlock = self.meta_informations + let mut meta_wlock = self + .meta_informations .write() .expect("Managed file lock poisoned"); let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned()); diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 05fa18793..619e0fd19 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -32,7 +32,8 @@ fn open_mmap(full_path: &Path) -> result::Result, OpenReadE } })?; - let meta_data = file.metadata() + let meta_data = file + .metadata() .map_err(|e| IOError::with_path(full_path.to_owned(), e))?; if meta_data.len() == 0 { // if the file size is 0, it will not be possible @@ -309,7 +310,8 @@ impl Directory for MmapDirectory { // when the last reference is gone. mmap_cache.cache.remove(&full_path); match fs::remove_file(&full_path) { - Ok(_) => self.sync_directory() + Ok(_) => self + .sync_directory() .map_err(|e| IOError::with_path(path.to_owned(), e).into()), Err(e) => { if e.kind() == io::ErrorKind::NotFound { diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 1b40970b4..2f1733e0f 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -170,7 +170,8 @@ impl Directory for RAMDirectory { let path_buf = PathBuf::from(path); let vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone()); - let exists = self.fs + let exists = self + .fs .write(path_buf.clone(), &Vec::new()) .map_err(|err| IOError::with_path(path.to_owned(), err))?; // force the creation of the file to mimic the MMap directory. @@ -195,9 +196,10 @@ impl Directory for RAMDirectory { } fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> { - fail_point!("RAMDirectory::atomic_write", |msg| { - Err(io::Error::new(io::ErrorKind::Other, msg.unwrap_or("Undefined".to_string()))) - }); + fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new( + io::ErrorKind::Other, + msg.unwrap_or("Undefined".to_string()) + ))); let path_buf = PathBuf::from(path); let mut vec_writer = VecWriter::new(path_buf.clone(), self.fs.clone()); self.fs.write(path_buf, &Vec::new())?; diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index d2e9358d4..6ed2049e5 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -5,7 +5,6 @@ use fst::raw::MmapReadOnly; use stable_deref_trait::{CloneStableDeref, StableDeref}; use std::ops::Deref; - /// Read object that represents files in tantivy. /// /// These read objects are only in charge to deliver diff --git a/src/error.rs b/src/error.rs index ddde26789..8509d28cd 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,9 +4,9 @@ use std::io; use directory::error::{IOError, OpenDirectoryError, OpenReadError, OpenWriteError}; use fastfield::FastFieldNotAvailableError; +use indexer::LockType; use query; use schema; -use indexer::LockType; use serde_json; use std::path::PathBuf; use std::sync::PoisonError; @@ -21,7 +21,10 @@ pub enum TantivyError { #[fail(display = "file already exists: '{:?}'", _0)] FileAlreadyExists(PathBuf), /// Failed to acquire file lock - #[fail(display = "Failed to acquire Lockfile: {:?}. Possible causes: another IndexWriter instance or panic during previous lock drop.", _0)] + #[fail( + display = "Failed to acquire Lockfile: {:?}. Possible causes: another IndexWriter instance or panic during previous lock drop.", + _0 + )] LockFailure(LockType), /// IO Error. #[fail(display = "an IO error occurred: '{}'", _0)] @@ -95,14 +98,13 @@ impl From for TantivyError { } } - impl From for TantivyError { fn from(error: OpenWriteError) -> TantivyError { match error { - OpenWriteError::FileAlreadyExists(filepath) => - TantivyError::FileAlreadyExists(filepath), - OpenWriteError::IOError(io_error) => - TantivyError::IOError(io_error), + OpenWriteError::FileAlreadyExists(filepath) => { + TantivyError::FileAlreadyExists(filepath) + } + OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error), }.into() } } diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs index 3f8a0eb5b..15ed658ce 100644 --- a/src/fastfield/delete.rs +++ b/src/fastfield/delete.rs @@ -41,7 +41,8 @@ pub struct DeleteBitSet { impl DeleteBitSet { /// Opens a delete bitset given its data source. pub fn open(data: ReadOnlySource) -> DeleteBitSet { - let num_deleted: usize = data.as_slice() + let num_deleted: usize = data + .as_slice() .iter() .map(|b| b.count_ones() as usize) .sum(); diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 182b17989..92a917089 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -56,7 +56,8 @@ impl FacetReader { /// Given a term ordinal returns the term associated to it. pub fn facet_from_ord(&self, facet_ord: TermOrdinal, output: &mut Facet) { - let found_term = self.term_dict + let found_term = self + .term_dict .ord_to_term(facet_ord as u64, output.inner_buffer_mut()); assert!(found_term, "Term ordinal {} no found.", facet_ord); } diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 9177ddcd9..e5fd45203 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -132,7 +132,8 @@ impl MultiValueIntFastFieldWriter { ); let mut doc_vals: Vec = Vec::with_capacity(100); - for (start, stop) in self.doc_index + for (start, stop) in self + .doc_index .windows(2) .map(|interval| (interval[0], interval[1])) .chain(Some(last_interval).into_iter()) @@ -148,7 +149,6 @@ impl MultiValueIntFastFieldWriter { value_serializer.add_val(val)?; } } - } None => { let val_min_max = self.vals.iter().cloned().minmax(); diff --git a/src/functional_test.rs b/src/functional_test.rs index af7b1883a..9905f1d6e 100644 --- a/src/functional_test.rs +++ b/src/functional_test.rs @@ -1,8 +1,8 @@ use rand::thread_rng; use std::collections::HashSet; -use rand::Rng; use rand::distributions::Range; +use rand::Rng; use schema::*; use Index; use Searcher; diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index 4c2597fbb..f921b7523 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -52,7 +52,8 @@ impl DeleteQueue { // // Past delete operations are not accessible. pub fn cursor(&self) -> DeleteCursor { - let last_block = self.inner + let last_block = self + .inner .read() .expect("Read lock poisoned when opening delete queue cursor") .last_block @@ -92,7 +93,8 @@ impl DeleteQueue { // be some unflushed operations. // fn flush(&self) -> Option> { - let mut self_wlock = self.inner + let mut self_wlock = self + .inner .write() .expect("Failed to acquire write lock on delete queue writer"); @@ -132,7 +134,8 @@ impl From for NextBlock { impl NextBlock { fn next_block(&self) -> Option> { { - let next_read_lock = self.0 + let next_read_lock = self + .0 .read() .expect("Failed to acquire write lock in delete queue"); if let InnerNextBlock::Closed(ref block) = *next_read_lock { @@ -141,7 +144,8 @@ impl NextBlock { } let next_block; { - let mut next_write_lock = self.0 + let mut next_write_lock = self + .0 .write() .expect("Failed to acquire write lock in delete queue"); match *next_write_lock { diff --git a/src/indexer/directory_lock.rs b/src/indexer/directory_lock.rs index 4dbaa9ed4..9555234bb 100644 --- a/src/indexer/directory_lock.rs +++ b/src/indexer/directory_lock.rs @@ -1,10 +1,10 @@ use directory::error::OpenWriteError; -use Directory; -use TantivyError; +use std::io::Write; use std::path::{Path, PathBuf}; use std::thread; use std::time::Duration; -use std::io::Write; +use Directory; +use TantivyError; #[derive(Debug, Clone, Copy)] pub enum LockType { @@ -29,10 +29,9 @@ pub enum LockType { /// is very simplistic. We retry after `100ms` until we effectively /// acquire the lock. /// This lock should not have much contention in normal usage. - MetaLock + MetaLock, } - /// Retry the logic of acquiring locks is pretty simple. /// We just retry `n` times after a given `duratio`, both /// depending on the type of lock. @@ -49,7 +48,7 @@ impl RetryPolicy { } } - fn wait_and_retry(&mut self,) -> bool { + fn wait_and_retry(&mut self) -> bool { if self.num_retries == 0 { false } else { @@ -58,35 +57,26 @@ impl RetryPolicy { thread::sleep(wait_duration); true } - } } impl LockType { - fn retry_policy(&self) -> RetryPolicy { match *self { - LockType::IndexWriterLock => - RetryPolicy::no_retry(), - LockType::MetaLock => - RetryPolicy { - num_retries: 100, - wait_in_ms: 100, - } + LockType::IndexWriterLock => RetryPolicy::no_retry(), + LockType::MetaLock => RetryPolicy { + num_retries: 100, + wait_in_ms: 100, + }, } } fn try_acquire_lock(&self, directory: &mut Directory) -> Result { let path = self.filename(); - let mut write = directory - .open_write(path) - .map_err(|e| - match e { - OpenWriteError::FileAlreadyExists(_) => - TantivyError::LockFailure(*self), - OpenWriteError::IOError(io_error) => - TantivyError::IOError(io_error), - })?; + let mut write = directory.open_write(path).map_err(|e| match e { + OpenWriteError::FileAlreadyExists(_) => TantivyError::LockFailure(*self), + OpenWriteError::IOError(io_error) => TantivyError::IOError(io_error), + })?; write.flush()?; Ok(DirectoryLock { directory: directory.box_clone(), @@ -94,7 +84,6 @@ impl LockType { }) } - /// Acquire a lock in the given directory. pub fn acquire_lock(&self, directory: &Directory) -> Result { let mut box_directory = directory.box_clone(); @@ -110,25 +99,19 @@ impl LockType { return Err(TantivyError::LockFailure(filepath.to_owned())); } } - Err(_) => { - } + Err(_) => {} } } } fn filename(&self) -> &Path { match *self { - LockType::MetaLock => { - Path::new(".tantivy-meta.lock") - } - LockType::IndexWriterLock => { - Path::new(".tantivy-indexer.lock") - } + LockType::MetaLock => Path::new(".tantivy-meta.lock"), + LockType::IndexWriterLock => Path::new(".tantivy-indexer.lock"), } } } - /// The `DirectoryLock` is an object that represents a file lock. /// See [`LockType`](struct.LockType.html) /// diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 3e11c4ce5..2b791ecec 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -347,7 +347,8 @@ impl IndexWriter { } drop(self.workers_join_handle); - let result = self.segment_updater + let result = self + .segment_updater .wait_merging_thread() .map_err(|_| TantivyError::ErrorInThread("Failed to join merging thread.".into())); @@ -494,7 +495,8 @@ impl IndexWriter { let document_receiver = self.document_receiver.clone(); // take the directory lock to create a new index_writer. - let directory_lock = self._directory_lock + let directory_lock = self + ._directory_lock .take() .expect("The IndexWriter does not have any lock. This is a bug, please report."); @@ -678,7 +680,7 @@ mod tests { let err_msg = err.to_string(); assert!(err_msg.contains("Lockfile")); assert!(err_msg.contains("Possible causes:")) - }, + } _ => panic!("Expected LockfileAlreadyExists error"), } } @@ -864,8 +866,7 @@ mod tests { assert_eq!(initial_table_size(1_000_000_000), 19); } - - #[cfg(not(feature="no_fail"))] + #[cfg(not(feature = "no_fail"))] #[test] fn test_write_commit_fails() { use fail; @@ -874,7 +875,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - for _ in 0..100 { + for _ in 0..100 { index_writer.add_document(doc!(text_field => "a")); } index_writer.commit().unwrap(); diff --git a/src/indexer/merge_policy.rs b/src/indexer/merge_policy.rs index 8df588ca3..407cb94bb 100644 --- a/src/indexer/merge_policy.rs +++ b/src/indexer/merge_policy.rs @@ -21,17 +21,17 @@ pub trait MergePolicy: MergePolicyClone + marker::Send + marker::Sync + Debug { /// MergePolicyClone pub trait MergePolicyClone { - /// Returns a boxed clone of the MergePolicy. - fn box_clone(&self) -> Box; + /// Returns a boxed clone of the MergePolicy. + fn box_clone(&self) -> Box; } impl MergePolicyClone for T where - T: 'static + MergePolicy + Clone, + T: 'static + MergePolicy + Clone, { - fn box_clone(&self) -> Box { - Box::new(self.clone()) - } + fn box_clone(&self) -> Box { + Box::new(self.clone()) + } } /// Never merge segments. diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 5d2e17c51..87158a947 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -440,7 +440,8 @@ impl IndexMerger { ) -> Result> { let mut positions_buffer: Vec = Vec::with_capacity(1_000); let mut delta_computer = DeltaComputer::new(); - let field_readers = self.readers + let field_readers = self + .readers .iter() .map(|reader| reader.inverted_index(indexed_field)) .collect::>(); diff --git a/src/indexer/segment_register.rs b/src/indexer/segment_register.rs index c455d3091..c0c883e15 100644 --- a/src/indexer/segment_register.rs +++ b/src/indexer/segment_register.rs @@ -51,7 +51,8 @@ impl SegmentRegister { } pub fn segment_metas(&self) -> Vec { - let mut segment_ids: Vec = self.segment_states + let mut segment_ids: Vec = self + .segment_states .values() .map(|segment_entry| segment_entry.meta().clone()) .collect(); diff --git a/src/lib.rs b/src/lib.rs index 8f4bc726e..9607062e1 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,6 +143,7 @@ extern crate fst; extern crate fst_regex; extern crate futures; extern crate futures_cpupool; +extern crate htmlescape; extern crate itertools; extern crate levenshtein_automata; extern crate num_cpus; @@ -154,7 +155,6 @@ extern crate stable_deref_trait; extern crate tempdir; extern crate tempfile; extern crate uuid; -extern crate htmlescape; #[cfg(test)] #[macro_use] @@ -183,7 +183,7 @@ mod macros; pub use error::TantivyError; -#[deprecated(since="0.7.0", note="please use `tantivy::TantivyError` instead")] +#[deprecated(since = "0.7.0", note = "please use `tantivy::TantivyError` instead")] pub use error::TantivyError as Error; extern crate census; @@ -951,4 +951,3 @@ mod tests { } } } - diff --git a/src/positions/mod.rs b/src/positions/mod.rs index f867358f5..ab0375355 100644 --- a/src/positions/mod.rs +++ b/src/positions/mod.rs @@ -1,4 +1,3 @@ - /// Positions are stored in three parts and over two files. // /// The `SegmentComponent::POSITIONS` file contains all of the bitpacked positions delta, @@ -24,13 +23,12 @@ /// The long skip structure makes it possible to skip rapidly to the a checkpoint close to this /// value, and then skip normally. /// - mod reader; mod serializer; pub use self::reader::PositionReader; pub use self::serializer::PositionSerializer; -use bitpacking::{BitPacker4x, BitPacker}; +use bitpacking::{BitPacker, BitPacker4x}; const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN; const LONG_SKIP_IN_BLOCKS: usize = 1_024; @@ -43,10 +41,10 @@ lazy_static! { #[cfg(test)] pub mod tests { - use std::iter; - use super::{PositionSerializer, PositionReader}; + use super::{PositionReader, PositionSerializer}; use directory::ReadOnlySource; use positions::COMPRESSION_BLOCK_SIZE; + use std::iter; fn create_stream_buffer(vals: &[u32]) -> (ReadOnlySource, ReadOnlySource) { let mut skip_buffer = vec![]; @@ -59,7 +57,10 @@ pub mod tests { } serializer.close().unwrap(); } - (ReadOnlySource::from(stream_buffer), ReadOnlySource::from(skip_buffer)) + ( + ReadOnlySource::from(stream_buffer), + ReadOnlySource::from(skip_buffer), + ) } #[test] @@ -103,7 +104,7 @@ pub mod tests { assert_eq!(skip.len(), 12); assert_eq!(stream.len(), 1168); - let mut position_reader = PositionReader::new(stream,skip, 0u64); + let mut position_reader = PositionReader::new(stream, skip, 0u64); let mut buf = [0u32; 7]; let mut c = 0; for _ in 0..100 { @@ -125,7 +126,7 @@ pub mod tests { let (stream, skip) = create_stream_buffer(&v[..]); assert_eq!(skip.len(), 15_749); assert_eq!(stream.len(), 1_000_000); - let mut position_reader = PositionReader::new(stream,skip, 128 * 1024); + let mut position_reader = PositionReader::new(stream, skip, 128 * 1024); let mut buf = [0u32; 1]; position_reader.read(&mut buf); assert_eq!(buf[0], CONST_VAL); @@ -137,12 +138,17 @@ pub mod tests { let (stream, skip) = create_stream_buffer(&v[..]); assert_eq!(skip.len(), 15_749); assert_eq!(stream.len(), 4_987_872); - for &offset in &[10, 128 * 1024, 128 * 1024 - 1, 128 * 1024 + 7, 128 * 10 * 1024 + 10] { - let mut position_reader = PositionReader::new(stream.clone(),skip.clone(), offset); + for &offset in &[ + 10, + 128 * 1024, + 128 * 1024 - 1, + 128 * 1024 + 7, + 128 * 10 * 1024 + 10, + ] { + let mut position_reader = PositionReader::new(stream.clone(), skip.clone(), offset); let mut buf = [0u32; 1]; position_reader.read(&mut buf); assert_eq!(buf[0], offset as u32); } } } - diff --git a/src/positions/reader.rs b/src/positions/reader.rs index 8b5dd70bb..dec653830 100644 --- a/src/positions/reader.rs +++ b/src/positions/reader.rs @@ -1,12 +1,12 @@ -use bitpacking::{BitPacker4x, BitPacker}; -use owned_read::OwnedRead; -use common::{BinarySerializable, FixedSize}; -use postings::compression::compressed_block_size; -use directory::ReadOnlySource; -use positions::COMPRESSION_BLOCK_SIZE; -use positions::LONG_SKIP_IN_BLOCKS; -use positions::LONG_SKIP_INTERVAL; use super::BIT_PACKER; +use bitpacking::{BitPacker, BitPacker4x}; +use common::{BinarySerializable, FixedSize}; +use directory::ReadOnlySource; +use owned_read::OwnedRead; +use positions::COMPRESSION_BLOCK_SIZE; +use positions::LONG_SKIP_INTERVAL; +use positions::LONG_SKIP_IN_BLOCKS; +use postings::compression::compressed_block_size; pub struct PositionReader { skip_read: OwnedRead, @@ -18,7 +18,6 @@ pub struct PositionReader { // of the block of the next int to read. } - // `ahead` represents the offset of the block currently loaded // compared to the cursor of the actual stream. // @@ -32,7 +31,8 @@ fn read_impl( buffer: &mut [u32; 128], mut inner_offset: usize, num_bits: &[u8], - output: &mut [u32]) -> usize { + output: &mut [u32], +) -> usize { let mut output_start = 0; let mut output_len = output.len(); let mut ahead = 0; @@ -47,8 +47,7 @@ fn read_impl( output_start += available_len; inner_offset = 0; let num_bits = num_bits[ahead]; - BitPacker4x::new() - .decompress(position, &mut buffer[..], num_bits); + BitPacker4x::new().decompress(position, &mut buffer[..], num_bits); let block_len = compressed_block_size(num_bits); position = &position[block_len..]; ahead += 1; @@ -56,11 +55,12 @@ fn read_impl( } } - impl PositionReader { - pub fn new(position_source: ReadOnlySource, - skip_source: ReadOnlySource, - offset: u64) -> PositionReader { + pub fn new( + position_source: ReadOnlySource, + skip_source: ReadOnlySource, + offset: u64, + ) -> PositionReader { let skip_len = skip_source.len(); let (body, footer) = skip_source.split(skip_len - u32::SIZE_IN_BYTES); let num_long_skips = u32::deserialize(&mut footer.as_slice()).expect("Index corrupted"); @@ -70,7 +70,8 @@ impl PositionReader { let small_skip = (offset - (long_skip_id as u64) * (LONG_SKIP_INTERVAL as u64)) as usize; let offset_num_bytes: u64 = { if long_skip_id > 0 { - let mut long_skip_blocks: &[u8] = &long_skips.as_slice()[(long_skip_id - 1) * 8..][..8]; + let mut long_skip_blocks: &[u8] = + &long_skips.as_slice()[(long_skip_id - 1) * 8..][..8]; u64::deserialize(&mut long_skip_blocks).expect("Index corrupted") * 16 } else { 0 @@ -79,13 +80,13 @@ impl PositionReader { let mut position_read = OwnedRead::new(position_source); position_read.advance(offset_num_bytes as usize); let mut skip_read = OwnedRead::new(skip_body); - skip_read.advance(long_skip_id * LONG_SKIP_IN_BLOCKS); + skip_read.advance(long_skip_id * LONG_SKIP_IN_BLOCKS); let mut position_reader = PositionReader { skip_read, position_read, inner_offset: 0, buffer: Box::new([0u32; 128]), - ahead: None + ahead: None, }; position_reader.skip(small_skip); position_reader @@ -108,7 +109,8 @@ impl PositionReader { self.buffer.as_mut(), self.inner_offset, &skip_data[1..], - output)); + output, + )); } /// Skip the next `skip_len` integer. @@ -118,23 +120,20 @@ impl PositionReader { /// /// May panic if the end of the stream is reached. pub fn skip(&mut self, skip_len: usize) { - let skip_len_plus_inner_offset = skip_len + self.inner_offset; let num_blocks_to_advance = skip_len_plus_inner_offset / COMPRESSION_BLOCK_SIZE; self.inner_offset = skip_len_plus_inner_offset % COMPRESSION_BLOCK_SIZE; - self.ahead = self.ahead - .and_then(|num_blocks| { - if num_blocks >= num_blocks_to_advance { - Some(num_blocks_to_advance - num_blocks_to_advance) - } else { - None - } - }); + self.ahead = self.ahead.and_then(|num_blocks| { + if num_blocks >= num_blocks_to_advance { + Some(num_blocks_to_advance - num_blocks_to_advance) + } else { + None + } + }); - let skip_len = self.skip_read - .as_ref()[..num_blocks_to_advance] + let skip_len = self.skip_read.as_ref()[..num_blocks_to_advance] .iter() .cloned() .map(|num_bit| num_bit as usize) diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs index 598c26363..2a3dc09c1 100644 --- a/src/positions/serializer.rs +++ b/src/positions/serializer.rs @@ -1,8 +1,8 @@ -use std::io; -use bitpacking::BitPacker; -use positions::{COMPRESSION_BLOCK_SIZE, LONG_SKIP_INTERVAL}; -use common::BinarySerializable; use super::BIT_PACKER; +use bitpacking::BitPacker; +use common::BinarySerializable; +use positions::{COMPRESSION_BLOCK_SIZE, LONG_SKIP_INTERVAL}; +use std::io; pub struct PositionSerializer { write_stream: W, @@ -23,7 +23,7 @@ impl PositionSerializer { buffer: vec![0u8; 128 * 4], num_ints: 0u64, long_skips: Vec::new(), - cumulated_num_bits: 0u64 + cumulated_num_bits: 0u64, } } @@ -31,7 +31,6 @@ impl PositionSerializer { self.num_ints } - fn remaining_block_len(&self) -> usize { COMPRESSION_BLOCK_SIZE - self.block.len() } diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs index 6b05010c6..810cf28e7 100644 --- a/src/postings/compression/mod.rs +++ b/src/postings/compression/mod.rs @@ -28,14 +28,16 @@ impl BlockEncoder { pub fn compress_block_sorted(&mut self, block: &[u32], offset: u32) -> (u8, &[u8]) { let num_bits = self.bitpacker.num_bits_sorted(offset, block); - let written_size = self.bitpacker - .compress_sorted(offset, block, &mut self.output[..], num_bits); + let written_size = + self.bitpacker + .compress_sorted(offset, block, &mut self.output[..], num_bits); (num_bits, &self.output[..written_size]) } pub fn compress_block_unsorted(&mut self, block: &[u32]) -> (u8, &[u8]) { let num_bits = self.bitpacker.num_bits(block); - let written_size = self.bitpacker + let written_size = self + .bitpacker .compress(block, &mut self.output[..], num_bits); (num_bits, &self.output[..written_size]) } @@ -62,19 +64,21 @@ impl BlockDecoder { } } - pub fn uncompress_block_sorted(&mut self, compressed_data: &[u8], offset: u32, num_bits: u8) -> usize { + pub fn uncompress_block_sorted( + &mut self, + compressed_data: &[u8], + offset: u32, + num_bits: u8, + ) -> usize { self.output_len = COMPRESSION_BLOCK_SIZE; - self.bitpacker.decompress_sorted( - offset, - &compressed_data, - &mut self.output, - num_bits, - ) + self.bitpacker + .decompress_sorted(offset, &compressed_data, &mut self.output, num_bits) } pub fn uncompress_block_unsorted(&mut self, compressed_data: &[u8], num_bits: u8) -> usize { self.output_len = COMPRESSION_BLOCK_SIZE; - self.bitpacker.decompress(&compressed_data, &mut self.output, num_bits) + self.bitpacker + .decompress(&compressed_data, &mut self.output, num_bits) } #[inline] @@ -88,7 +92,6 @@ impl BlockDecoder { } } - pub trait VIntEncoder { /// Compresses an array of `u32` integers, /// using [delta-encoding](https://en.wikipedia.org/wiki/Delta_ encoding) diff --git a/src/postings/compression/vint.rs b/src/postings/compression/vint.rs index 515510c54..88a0df5a5 100644 --- a/src/postings/compression/vint.rs +++ b/src/postings/compression/vint.rs @@ -1,9 +1,5 @@ #[inline(always)] -pub fn compress_sorted<'a>( - input: &[u32], - output: &'a mut [u8], - mut offset: u32, -) -> &'a [u8] { +pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] { let mut byte_written = 0; for &v in input { let mut to_encode: u32 = v - offset; @@ -46,11 +42,7 @@ pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a } #[inline(always)] -pub fn uncompress_sorted<'a>( - compressed_data: &'a [u8], - output: &mut [u32], - offset: u32, -) -> usize { +pub fn uncompress_sorted<'a>(compressed_data: &'a [u8], output: &mut [u32], offset: u32) -> usize { let mut read_byte = 0; let mut result = offset; let num_els = output.len(); diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 85852ed22..89b4a3c62 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -2,6 +2,7 @@ Postings module (also called inverted index) */ +pub(crate) mod compression; /// Postings module /// /// Postings, also called inverted lists, is the key datastructure @@ -11,18 +12,17 @@ mod postings_writer; mod recorder; mod segment_postings; mod serializer; -pub(crate) mod compression; +mod skip; mod stacker; mod term_info; -mod skip; pub(crate) use self::postings_writer::MultiFieldPostingsWriter; pub use self::serializer::{FieldSerializer, InvertedIndexSerializer}; +use self::compression::COMPRESSION_BLOCK_SIZE; pub use self::postings::Postings; -pub use self::term_info::TermInfo; pub(crate) use self::skip::SkipReader; -use self::compression::{COMPRESSION_BLOCK_SIZE}; +pub use self::term_info::TermInfo; pub use self::segment_postings::{BlockSegmentPostings, SegmentPostings}; @@ -71,8 +71,7 @@ pub mod tests { let mut segment = index.new_segment(); let mut posting_serializer = InvertedIndexSerializer::open(&mut segment).unwrap(); { - let mut field_serializer = posting_serializer - .new_field(text_field, 120 * 4).unwrap(); + let mut field_serializer = posting_serializer.new_field(text_field, 120 * 4).unwrap(); field_serializer.new_term("abc".as_bytes()).unwrap(); for doc_id in 0u32..120u32 { let delta_positions = vec![1, 2, 3, 2]; @@ -512,13 +511,13 @@ pub mod tests { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); for _ in 0..posting_list_size { let mut doc = Document::default(); - if rng.gen_bool(1f64/ 15f64) { + if rng.gen_bool(1f64 / 15f64) { doc.add_text(text_field, "a"); } - if rng.gen_bool(1f64/ 10f64) { + if rng.gen_bool(1f64 / 10f64) { doc.add_text(text_field, "b"); } - if rng.gen_bool(1f64/ 5f64) { + if rng.gen_bool(1f64 / 5f64) { doc.add_text(text_field, "c"); } doc.add_text(text_field, "d"); diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index d90b322d9..bb3ca10a4 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -94,7 +94,8 @@ impl MultiFieldPostingsWriter { &self, serializer: &mut InvertedIndexSerializer, ) -> Result>> { - let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = self.term_index + let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = self + .term_index .iter() .map(|(term_bytes, addr, bucket_id)| (term_bytes, addr, bucket_id as UnorderedTermId)) .collect(); diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index e787ba5e9..c355a78ba 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -107,7 +107,8 @@ impl Recorder for TermFrequencyRecorder { fn serialize(&self, serializer: &mut FieldSerializer, heap: &MemoryArena) -> io::Result<()> { // the last document has not been closed... // its term freq is self.current_tf. - let mut doc_iter = self.stack + let mut doc_iter = self + .stack .iter(heap) .chain(Some(self.current_tf).into_iter()); diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 74fcbc199..8986c036b 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -1,20 +1,20 @@ -use postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE}; -use DocId; use common::BitSet; use common::HasLen; -use postings::compression::compressed_block_size; +use common::{BinarySerializable, VInt}; use docset::{DocSet, SkipResult}; use fst::Streamer; +use owned_read::OwnedRead; +use positions::PositionReader; +use postings::compression::compressed_block_size; +use postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE}; use postings::serializer::PostingsSerializer; use postings::FreqReadingOption; use postings::Postings; -use owned_read::OwnedRead; -use common::{VInt, BinarySerializable}; -use postings::USE_SKIP_INFO_LIMIT; use postings::SkipReader; +use postings::USE_SKIP_INFO_LIMIT; use schema::IndexRecordOption; -use positions::PositionReader; use std::cmp::Ordering; +use DocId; const EMPTY_ARR: [u8; 0] = []; @@ -98,7 +98,7 @@ impl SegmentPostings { docs.len() as u32, OwnedRead::new(buffer), IndexRecordOption::Basic, - IndexRecordOption::Basic + IndexRecordOption::Basic, ); SegmentPostings::from_block_postings(block_segment_postings, None) } @@ -151,7 +151,11 @@ fn exponential_search(target: u32, arr: &[u32]) -> (usize, usize) { /// The target is assumed smaller or equal to the last element. fn search_within_block(block_docs: &[u32], target: u32) -> usize { let (start, end) = exponential_search(target, block_docs); - start.wrapping_add(block_docs[start..end].binary_search(&target).unwrap_or_else(|e| e)) + start.wrapping_add( + block_docs[start..end] + .binary_search(&target) + .unwrap_or_else(|e| e), + ) } impl DocSet for SegmentPostings { @@ -179,21 +183,20 @@ impl DocSet for SegmentPostings { // check if we need to go to the next block let need_positions = self.position_computer.is_some(); let mut sum_freqs_skipped: u32 = 0; - if !self.block_cursor - .docs() - .last() - .map(|doc| *doc >= target) - .unwrap_or(false) // there should always be at least a document in the block - // since advance returned. + if !self + .block_cursor + .docs() + .last() + .map(|doc| *doc >= target) + .unwrap_or(false) + // there should always be at least a document in the block + // since advance returned. { // we are not in the right block. // // First compute all of the freqs skipped from the current block. if need_positions { - sum_freqs_skipped = self.block_cursor - .freqs()[self.cur..] - .iter() - .sum(); + sum_freqs_skipped = self.block_cursor.freqs()[self.cur..].iter().sum(); match self.block_cursor.skip_to(target) { BlockSegmentPostingsSkipResult::Success(block_skip_freqs) => { sum_freqs_skipped += block_skip_freqs; @@ -215,9 +218,13 @@ impl DocSet for SegmentPostings { let block_docs = self.block_cursor.docs(); debug_assert!(target >= self.doc()); - let new_cur = self.cur.wrapping_add(search_within_block(&block_docs[self.cur..], target)); + let new_cur = self + .cur + .wrapping_add(search_within_block(&block_docs[self.cur..], target)); if need_positions { - sum_freqs_skipped += self.block_cursor.freqs()[self.cur..new_cur].iter().sum::(); + sum_freqs_skipped += self.block_cursor.freqs()[self.cur..new_cur] + .iter() + .sum::(); self.position_computer .as_mut() .unwrap() @@ -330,7 +337,10 @@ pub struct BlockSegmentPostings { skip_reader: SkipReader, } -fn split_into_skips_and_postings(doc_freq: u32, mut data: OwnedRead) -> (Option, OwnedRead) { +fn split_into_skips_and_postings( + doc_freq: u32, + mut data: OwnedRead, +) -> (Option, OwnedRead) { if doc_freq >= USE_SKIP_INFO_LIMIT { let skip_len = VInt::deserialize(&mut data).expect("Data corrupted").0 as usize; let mut postings_data = data.clone(); @@ -345,7 +355,7 @@ fn split_into_skips_and_postings(doc_freq: u32, mut data: OwnedRead) -> (Option< #[derive(Debug, Eq, PartialEq)] pub enum BlockSegmentPostingsSkipResult { Terminated, - Success(u32) //< number of term freqs to skip + Success(u32), //< number of term freqs to skip } impl BlockSegmentPostings { @@ -353,7 +363,7 @@ impl BlockSegmentPostings { doc_freq: u32, data: OwnedRead, record_option: IndexRecordOption, - requested_option: IndexRecordOption + requested_option: IndexRecordOption, ) -> BlockSegmentPostings { let freq_reading_option = match (record_option, requested_option) { (IndexRecordOption::Basic, _) => FreqReadingOption::NoFreq, @@ -362,11 +372,10 @@ impl BlockSegmentPostings { }; let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, data); - let skip_reader = - match skip_data_opt { - Some(skip_data) => SkipReader::new(skip_data, record_option), - None => SkipReader::new(OwnedRead::new(&EMPTY_ARR[..]), record_option) - }; + let skip_reader = match skip_data_opt { + Some(skip_data) => SkipReader::new(skip_data, record_option), + None => SkipReader::new(OwnedRead::new(&EMPTY_ARR[..]), record_option), + }; let doc_freq = doc_freq as usize; let num_vint_docs = doc_freq % COMPRESSION_BLOCK_SIZE; BlockSegmentPostings { @@ -450,7 +459,6 @@ impl BlockSegmentPostings { self.doc_decoder.output_len } - /// position on a block that may contains `doc_id`. /// Always advance the current block. /// @@ -461,9 +469,7 @@ impl BlockSegmentPostings { /// Returns false iff all of the document remaining are smaller than /// `doc_id`. In that case, all of these document are consumed. /// - pub fn skip_to(&mut self, - target_doc: DocId) -> BlockSegmentPostingsSkipResult { - + pub fn skip_to(&mut self, target_doc: DocId) -> BlockSegmentPostingsSkipResult { let mut skip_freqs = 0u32; while self.skip_reader.advance() { if self.skip_reader.doc() >= target_doc { @@ -472,11 +478,11 @@ impl BlockSegmentPostings { // // We found our block! let num_bits = self.skip_reader.doc_num_bits(); - let num_consumed_bytes = self.doc_decoder - .uncompress_block_sorted( - self.remaining_data.as_ref(), - self.doc_offset, - num_bits); + let num_consumed_bytes = self.doc_decoder.uncompress_block_sorted( + self.remaining_data.as_ref(), + self.doc_offset, + num_bits, + ); self.remaining_data.advance(num_consumed_bytes); let tf_num_bits = self.skip_reader.tf_num_bits(); match self.freq_reading_option { @@ -486,9 +492,9 @@ impl BlockSegmentPostings { self.remaining_data.advance(num_bytes_to_skip); } FreqReadingOption::ReadFreq => { - let num_consumed_bytes = self.freq_decoder - .uncompress_block_unsorted(self.remaining_data.as_ref(), - tf_num_bits); + let num_consumed_bytes = self + .freq_decoder + .uncompress_block_unsorted(self.remaining_data.as_ref(), tf_num_bits); self.remaining_data.advance(num_consumed_bytes); } } @@ -518,7 +524,8 @@ impl BlockSegmentPostings { } } self.num_vint_docs = 0; - return self.docs() + return self + .docs() .last() .map(|last_doc| { if *last_doc >= target_doc { @@ -538,11 +545,11 @@ impl BlockSegmentPostings { pub fn advance(&mut self) -> bool { if self.skip_reader.advance() { let num_bits = self.skip_reader.doc_num_bits(); - let num_consumed_bytes = self.doc_decoder - .uncompress_block_sorted( - self.remaining_data.as_ref(), - self.doc_offset, - num_bits); + let num_consumed_bytes = self.doc_decoder.uncompress_block_sorted( + self.remaining_data.as_ref(), + self.doc_offset, + num_bits, + ); self.remaining_data.advance(num_consumed_bytes); let tf_num_bits = self.skip_reader.tf_num_bits(); match self.freq_reading_option { @@ -552,9 +559,9 @@ impl BlockSegmentPostings { self.remaining_data.advance(num_bytes_to_skip); } FreqReadingOption::ReadFreq => { - let num_consumed_bytes = self.freq_decoder - .uncompress_block_unsorted(self.remaining_data.as_ref(), - tf_num_bits); + let num_consumed_bytes = self + .freq_decoder + .uncompress_block_unsorted(self.remaining_data.as_ref(), tf_num_bits); self.remaining_data.advance(num_consumed_bytes); } } @@ -594,7 +601,6 @@ impl BlockSegmentPostings { doc_offset: 0, doc_freq: 0, - remaining_data: OwnedRead::new(vec![]), skip_reader: SkipReader::new(OwnedRead::new(vec![]), IndexRecordOption::Basic), } @@ -616,7 +622,9 @@ impl<'b> Streamer<'b> for BlockSegmentPostings { #[cfg(test)] mod tests { + use super::search_within_block; use super::BlockSegmentPostings; + use super::BlockSegmentPostingsSkipResult; use super::SegmentPostings; use common::HasLen; use core::Index; @@ -626,9 +634,7 @@ mod tests { use schema::SchemaBuilder; use schema::Term; use schema::INT_INDEXED; - use super::BlockSegmentPostingsSkipResult; use DocId; - use super::search_within_block; #[test] fn test_empty_segment_postings() { @@ -645,7 +651,6 @@ mod tests { assert_eq!(postings.doc_freq(), 0); } - fn search_within_block_trivial_but_slow(block: &[u32], target: u32) -> usize { block .iter() @@ -653,11 +658,15 @@ mod tests { .enumerate() .filter(|&(_, ref val)| *val >= target) .next() - .unwrap().0 + .unwrap() + .0 } fn util_test_search_within_block(block: &[u32], target: u32) { - assert_eq!(search_within_block(block, target), search_within_block_trivial_but_slow(block, target)); + assert_eq!( + search_within_block(block, target), + search_within_block_trivial_but_slow(block, target) + ); } fn util_test_search_within_block_all(block: &[u32]) { @@ -677,7 +686,7 @@ mod tests { #[test] fn test_search_within_block() { for len in 1u32..128u32 { - let v: Vec = (0..len).map(|i| i*2).collect(); + let v: Vec = (0..len).map(|i| i * 2).collect(); util_test_search_within_block_all(&v[..]); } } @@ -726,14 +735,22 @@ mod tests { fn test_block_segment_postings_skip() { for i in 0..4 { let mut block_postings = build_block_postings(vec![3]); - assert_eq!(block_postings.skip_to(i), BlockSegmentPostingsSkipResult::Success(0u32)); - assert_eq!(block_postings.skip_to(i), BlockSegmentPostingsSkipResult::Terminated); + assert_eq!( + block_postings.skip_to(i), + BlockSegmentPostingsSkipResult::Success(0u32) + ); + assert_eq!( + block_postings.skip_to(i), + BlockSegmentPostingsSkipResult::Terminated + ); } let mut block_postings = build_block_postings(vec![3]); - assert_eq!(block_postings.skip_to(4u32), BlockSegmentPostingsSkipResult::Terminated); + assert_eq!( + block_postings.skip_to(4u32), + BlockSegmentPostingsSkipResult::Terminated + ); } - #[test] fn test_block_segment_postings_skip2() { let mut docs = vec![0]; @@ -741,14 +758,23 @@ mod tests { docs.push((i * i / 100) + i); } let mut block_postings = build_block_postings(docs.clone()); - for i in vec![0, 424, 10000] { - assert_eq!(block_postings.skip_to(i), BlockSegmentPostingsSkipResult::Success(0u32)); + for i in vec![0, 424, 10000] { + assert_eq!( + block_postings.skip_to(i), + BlockSegmentPostingsSkipResult::Success(0u32) + ); let docs = block_postings.docs(); assert!(docs[0] <= i); assert!(docs.last().cloned().unwrap_or(0u32) >= i); } - assert_eq!(block_postings.skip_to(100_000), BlockSegmentPostingsSkipResult::Terminated); - assert_eq!(block_postings.skip_to(101_000), BlockSegmentPostingsSkipResult::Terminated); + assert_eq!( + block_postings.skip_to(100_000), + BlockSegmentPostingsSkipResult::Terminated + ); + assert_eq!( + block_postings.skip_to(101_000), + BlockSegmentPostingsSkipResult::Terminated + ); } #[test] diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 521d467d2..f7661933a 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -1,18 +1,18 @@ use super::TermInfo; -use common::{VInt, BinarySerializable}; +use common::{BinarySerializable, VInt}; use common::{CompositeWrite, CountingWriter}; -use postings::compression::{VIntEncoder, BlockEncoder, COMPRESSION_BLOCK_SIZE}; use core::Segment; use directory::WritePtr; +use positions::PositionSerializer; +use postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE}; +use postings::skip::SkipSerializer; +use postings::USE_SKIP_INFO_LIMIT; use schema::Schema; use schema::{Field, FieldEntry, FieldType}; use std::io::{self, Write}; use termdict::{TermDictionaryBuilder, TermOrdinal}; use DocId; use Result; -use postings::USE_SKIP_INFO_LIMIT; -use postings::skip::SkipSerializer; -use positions::PositionSerializer; /// `PostingsSerializer` is in charge of serializing /// postings on disk, in the @@ -104,7 +104,7 @@ impl InvertedIndexSerializer { term_dictionary_write, postings_write, positions_write, - positionsidx_write + positionsidx_write, ) } @@ -135,7 +135,7 @@ impl<'a> FieldSerializer<'a> { term_dictionary_write: &'a mut CountingWriter, postings_write: &'a mut CountingWriter, positions_write: &'a mut CountingWriter, - positionsidx_write: &'a mut CountingWriter + positionsidx_write: &'a mut CountingWriter, ) -> io::Result> { let (term_freq_enabled, position_enabled): (bool, bool) = match field_type { FieldType::Str(ref text_options) => { @@ -153,7 +153,8 @@ impl<'a> FieldSerializer<'a> { }; let term_dictionary_builder = TermDictionaryBuilder::new(term_dictionary_write, field_type)?; - let postings_serializer = PostingsSerializer::new(postings_write, term_freq_enabled, position_enabled); + let postings_serializer = + PostingsSerializer::new(postings_write, term_freq_enabled, position_enabled); let positions_serializer_opt = if position_enabled { Some(PositionSerializer::new(positions_write, positionsidx_write)) } else { @@ -171,14 +172,15 @@ impl<'a> FieldSerializer<'a> { } fn current_term_info(&self) -> TermInfo { - let positions_idx = self.positions_serializer_opt + let positions_idx = self + .positions_serializer_opt .as_ref() .map(|positions_serializer| positions_serializer.positions_idx()) .unwrap_or(0u64); TermInfo { doc_freq: 0, postings_offset: self.postings_serializer.addr(), - positions_idx + positions_idx, } } @@ -253,7 +255,7 @@ impl<'a> FieldSerializer<'a> { struct Block { doc_ids: [DocId; COMPRESSION_BLOCK_SIZE], term_freqs: [u32; COMPRESSION_BLOCK_SIZE], - len: usize + len: usize, } impl Block { @@ -261,7 +263,7 @@ impl Block { Block { doc_ids: [0u32; COMPRESSION_BLOCK_SIZE], term_freqs: [0u32; COMPRESSION_BLOCK_SIZE], - len: 0 + len: 0, } } @@ -312,9 +314,12 @@ pub struct PostingsSerializer { termfreq_sum_enabled: bool, } - impl PostingsSerializer { - pub fn new(write: W, termfreq_enabled: bool, termfreq_sum_enabled: bool) -> PostingsSerializer { + pub fn new( + write: W, + termfreq_enabled: bool, + termfreq_sum_enabled: bool, + ) -> PostingsSerializer { PostingsSerializer { output_write: CountingWriter::wrap(write), @@ -337,14 +342,16 @@ impl PostingsSerializer { .block_encoder .compress_block_sorted(&self.block.doc_ids(), self.last_doc_id_encoded); self.last_doc_id_encoded = self.block.last_doc(); - self.skip_write.write_doc(self.last_doc_id_encoded, num_bits); + self.skip_write + .write_doc(self.last_doc_id_encoded, num_bits); // last el block 0, offset block 1, self.postings_write.extend(block_encoded); } if self.termfreq_enabled { // encode the term_freqs - let (num_bits, block_encoded): (u8, &[u8]) = - self.block_encoder.compress_block_unsorted(&self.block.term_freqs()); + let (num_bits, block_encoded): (u8, &[u8]) = self + .block_encoder + .compress_block_unsorted(&self.block.term_freqs()); self.postings_write.extend(block_encoded); self.skip_write.write_term_freq(num_bits); if self.termfreq_sum_enabled { @@ -375,13 +382,15 @@ impl PostingsSerializer { // In that case, the remaining part is encoded // using variable int encoding. { - let block_encoded = self.block_encoder + let block_encoded = self + .block_encoder .compress_vint_sorted(&self.block.doc_ids(), self.last_doc_id_encoded); self.postings_write.write_all(block_encoded)?; } // ... Idem for term frequencies if self.termfreq_enabled { - let block_encoded = self.block_encoder + let block_encoded = self + .block_encoder .compress_vint_unsorted(self.block.term_freqs()); self.postings_write.write_all(block_encoded)?; } @@ -392,7 +401,6 @@ impl PostingsSerializer { VInt(skip_data.len() as u64).serialize(&mut self.output_write)?; self.output_write.write_all(skip_data)?; self.output_write.write_all(&self.postings_write[..])?; - } else { self.output_write.write_all(&self.postings_write[..])?; } diff --git a/src/postings/skip.rs b/src/postings/skip.rs index e2d59e2c6..ab2dcb6c2 100644 --- a/src/postings/skip.rs +++ b/src/postings/skip.rs @@ -1,8 +1,8 @@ -use DocId; use common::BinarySerializable; use owned_read::OwnedRead; use postings::compression::COMPRESSION_BLOCK_SIZE; use schema::IndexRecordOption; +use DocId; pub struct SkipSerializer { buffer: Vec, @@ -18,8 +18,11 @@ impl SkipSerializer { } pub fn write_doc(&mut self, last_doc: DocId, doc_num_bits: u8) { - assert!(last_doc > self.prev_doc, "write_doc(...) called with non-increasing doc ids. \ - Did you forget to call clear maybe?"); + assert!( + last_doc > self.prev_doc, + "write_doc(...) called with non-increasing doc ids. \ + Did you forget to call clear maybe?" + ); let delta_doc = last_doc - self.prev_doc; self.prev_doc = last_doc; delta_doc.serialize(&mut self.buffer).unwrap(); @@ -30,9 +33,10 @@ impl SkipSerializer { self.buffer.push(tf_num_bits); } - pub fn write_total_term_freq(&mut self, tf_sum: u32) { - tf_sum.serialize(&mut self.buffer).expect("Should never fail"); + tf_sum + .serialize(&mut self.buffer) + .expect("Should never fail"); } pub fn data(&self) -> &[u8] { @@ -103,33 +107,32 @@ impl SkipReader { } else { let doc_delta = u32::deserialize(&mut self.owned_read).expect("Skip data corrupted"); self.doc += doc_delta as DocId; - self.doc_num_bits = self.owned_read.get(0); + self.doc_num_bits = self.owned_read.get(0); match self.skip_info { IndexRecordOption::Basic => { self.owned_read.advance(1); } - IndexRecordOption::WithFreqs=> { + IndexRecordOption::WithFreqs => { self.tf_num_bits = self.owned_read.get(1); self.owned_read.advance(2); } IndexRecordOption::WithFreqsAndPositions => { self.tf_num_bits = self.owned_read.get(1); self.owned_read.advance(2); - self.tf_sum = u32::deserialize(&mut self.owned_read) - .expect("Failed reading tf_sum"); + self.tf_sum = + u32::deserialize(&mut self.owned_read).expect("Failed reading tf_sum"); } } true } - } } #[cfg(test)] mod tests { - use super::{SkipReader, SkipSerializer}; use super::IndexRecordOption; + use super::{SkipReader, SkipSerializer}; use owned_read::OwnedRead; #[test] @@ -171,4 +174,4 @@ mod tests { assert_eq!(skip_reader.doc_num_bits(), 5u8); assert!(!skip_reader.advance()); } -} \ No newline at end of file +} diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index b92a203eb..0a01d9977 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -5,8 +5,8 @@ use query::TermQuery; use query::Weight; use schema::IndexRecordOption; use schema::Term; -use Result; use std::collections::BTreeSet; +use Result; use Searcher; /// The boolean query combines a set of queries @@ -41,9 +41,9 @@ impl From)>> for BooleanQuery { } impl Query for BooleanQuery { - fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> Result> { - let sub_weights = self.subqueries + let sub_weights = self + .subqueries .iter() .map(|&(ref occur, ref subquery)| { Ok((*occur, subquery.weight(searcher, scoring_enabled)?)) diff --git a/src/query/empty_query.rs b/src/query/empty_query.rs index 06c15c3f3..6e64dca57 100644 --- a/src/query/empty_query.rs +++ b/src/query/empty_query.rs @@ -1,11 +1,11 @@ use super::Scorer; -use DocSet; -use Score; -use DocId; use query::Query; -use Result; -use Searcher; use query::Weight; +use DocId; +use DocSet; +use Result; +use Score; +use Searcher; use SegmentReader; /// `EmptyQuery` is a dummy `Query` in which no document matches. diff --git a/src/query/mod.rs b/src/query/mod.rs index 73a77174b..78b9cd56b 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -3,11 +3,11 @@ Query */ mod all_query; -mod empty_query; mod automaton_weight; mod bitset; mod bm25; mod boolean_query; +mod empty_query; mod exclude; mod fuzzy_query; mod intersection; @@ -34,10 +34,10 @@ pub use self::union::Union; pub use self::vec_docset::VecDocSet; pub use self::all_query::{AllQuery, AllScorer, AllWeight}; -pub use self::empty_query::{EmptyQuery, EmptyWeight, EmptyScorer}; pub use self::automaton_weight::AutomatonWeight; pub use self::bitset::BitSetDocSet; pub use self::boolean_query::BooleanQuery; +pub use self::empty_query::{EmptyQuery, EmptyScorer, EmptyWeight}; pub use self::exclude::Exclude; pub use self::fuzzy_query::FuzzyTermQuery; pub use self::intersection::intersect_scorers; diff --git a/src/query/occur.rs b/src/query/occur.rs index 1a9396de0..e91e3a2b6 100644 --- a/src/query/occur.rs +++ b/src/query/occur.rs @@ -46,4 +46,4 @@ pub fn compose_occur(left: Occur, right: Occur) -> Occur { } } } -} \ No newline at end of file +} diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index d103461c1..959b17b0e 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -5,8 +5,8 @@ use query::bm25::BM25Weight; use query::Query; use query::Weight; use schema::{Field, Term}; -use Result; use std::collections::BTreeSet; +use Result; /// `PhraseQuery` matches a specific sequence of words. /// diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index 6e04291c6..85f075d3a 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -124,7 +124,8 @@ impl PhraseScorer { fieldnorm_reader: FieldNormReader, score_needed: bool, ) -> PhraseScorer { - let max_offset = term_postings.iter() + let max_offset = term_postings + .iter() .map(|&(offset, _)| offset) .max() .unwrap_or(0); diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index 69ab4e184..de8eeb0d2 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -30,7 +30,6 @@ impl PhraseWeight { } impl Weight for PhraseWeight { - fn scorer(&self, reader: &SegmentReader) -> Result> { let similarity_weight = self.similarity_weight.clone(); let field = self.phrase_terms[0].1.field(); diff --git a/src/query/query.rs b/src/query/query.rs index 6abbf35e0..ca7de8ca6 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -2,10 +2,10 @@ use super::Weight; use collector::Collector; use core::searcher::Searcher; use downcast; +use std::collections::BTreeSet; use std::fmt; use Result; use SegmentLocalId; -use std::collections::BTreeSet; use Term; /// The `Query` trait defines a set of documents and a scoring method diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 557e38e24..6df6f9144 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -1,8 +1,8 @@ use super::user_input_ast::*; use combine::char::*; -use combine::*; -use combine::stream::StreamErrorFor; use combine::error::StreamError; +use combine::stream::StreamErrorFor; +use combine::*; use query::occur::Occur; use query::query_parser::user_input_ast::UserInputBound; @@ -123,7 +123,8 @@ parser! { } enum BinaryOperand { - Or, And + Or, + And, } parser! { @@ -138,19 +139,16 @@ parser! { } } - enum Element { SingleEl(UserInputAST), - NormalDisjunctive(Vec>) + NormalDisjunctive(Vec>), } impl Element { pub fn into_dnf(self) -> Vec> { match self { - Element::NormalDisjunctive(conjunctions) => - conjunctions, - Element::SingleEl(el) => - vec!(vec!(el)), + Element::NormalDisjunctive(conjunctions) => conjunctions, + Element::SingleEl(el) => vec![vec![el]], } } } @@ -227,10 +225,12 @@ mod test { assert!(parse_to_ast().parse(query).is_err()); } - #[test] fn test_parse_query_to_ast_not_op() { - assert_eq!(format!("{:?}", parse_to_ast().parse("NOT")), "Err(UnexpectedParse)"); + assert_eq!( + format!("{:?}", parse_to_ast().parse("NOT")), + "Err(UnexpectedParse)" + ); test_parse_query_to_ast_helper("NOTa", "\"NOTa\""); test_parse_query_to_ast_helper("NOT a", "-(\"a\")"); } @@ -241,10 +241,22 @@ mod test { test_parse_query_to_ast_helper("a OR b", "(?(\"a\") ?(\"b\"))"); test_parse_query_to_ast_helper("a OR b AND c", "(?(\"a\") ?((+(\"b\") +(\"c\"))))"); test_parse_query_to_ast_helper("a AND b AND c", "(+(\"a\") +(\"b\") +(\"c\"))"); - assert_eq!(format!("{:?}", parse_to_ast().parse("a OR b aaa")), "Err(UnexpectedParse)"); - assert_eq!(format!("{:?}", parse_to_ast().parse("a AND b aaa")), "Err(UnexpectedParse)"); - assert_eq!(format!("{:?}", parse_to_ast().parse("aaa a OR b ")), "Err(UnexpectedParse)"); - assert_eq!(format!("{:?}", parse_to_ast().parse("aaa ccc a OR b ")), "Err(UnexpectedParse)"); + assert_eq!( + format!("{:?}", parse_to_ast().parse("a OR b aaa")), + "Err(UnexpectedParse)" + ); + assert_eq!( + format!("{:?}", parse_to_ast().parse("a AND b aaa")), + "Err(UnexpectedParse)" + ); + assert_eq!( + format!("{:?}", parse_to_ast().parse("aaa a OR b ")), + "Err(UnexpectedParse)" + ); + assert_eq!( + format!("{:?}", parse_to_ast().parse("aaa ccc a OR b ")), + "Err(UnexpectedParse)" + ); } #[test] diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 93deb48c1..a8aec4f56 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1,11 +1,14 @@ use super::logical_ast::*; use super::query_grammar::parse_to_ast; use super::user_input_ast::*; +use combine::Parser; use core::Index; +use query::occur::compose_occur; +use query::query_parser::logical_ast::LogicalAST; use query::AllQuery; use query::BooleanQuery; +use query::EmptyQuery; use query::Occur; -use query::occur::compose_occur; use query::PhraseQuery; use query::Query; use query::RangeQuery; @@ -18,10 +21,6 @@ use std::num::ParseIntError; use std::ops::Bound; use std::str::FromStr; use tokenizer::TokenizerManager; -use combine::Parser; -use query::EmptyQuery; -use query::query_parser::logical_ast::LogicalAST; - /// Possible error that may happen when parsing a query. #[derive(Debug, PartialEq, Eq)] @@ -59,23 +58,24 @@ impl From for QueryParserError { } } - /// Recursively remove empty clause from the AST /// /// Returns `None` iff the `logical_ast` ended up being empty. fn trim_ast(logical_ast: LogicalAST) -> Option { match logical_ast { LogicalAST::Clause(children) => { - let trimmed_children = children.into_iter() - .flat_map(|(occur, child)| - trim_ast(child).map(|trimmed_child| (occur, trimmed_child)) ) + let trimmed_children = children + .into_iter() + .flat_map(|(occur, child)| { + trim_ast(child).map(|trimmed_child| (occur, trimmed_child)) + }) .collect::>(); if trimmed_children.is_empty() { None } else { Some(LogicalAST::Clause(trimmed_children)) } - }, + } _ => Some(logical_ast), } } @@ -188,8 +188,9 @@ impl QueryParser { /// Parse the user query into an AST. fn parse_query_to_logical_ast(&self, query: &str) -> Result { - let (user_input_ast, _remaining) = - parse_to_ast().parse(query).map_err(|_| QueryParserError::SyntaxError)?; + let (user_input_ast, _remaining) = parse_to_ast() + .parse(query) + .map_err(|_| QueryParserError::SyntaxError)?; self.compute_logical_ast(user_input_ast) } @@ -291,12 +292,9 @@ impl QueryParser { ) -> Result, QueryParserError> { let terms = self.compute_terms_for_string(field, phrase)?; match &terms[..] { - [] => - Ok(None), - [(_, term)] => - Ok(Some(LogicalLiteral::Term(term.clone()))), - _ => - Ok(Some(LogicalLiteral::Phrase(terms.clone()))), + [] => Ok(None), + [(_, term)] => Ok(Some(LogicalLiteral::Term(term.clone()))), + _ => Ok(Some(LogicalLiteral::Phrase(terms.clone()))), } } @@ -308,7 +306,11 @@ impl QueryParser { } } - fn resolve_bound(&self, field: Field, bound: &UserInputBound) -> Result, QueryParserError> { + fn resolve_bound( + &self, + field: Field, + bound: &UserInputBound, + ) -> Result, QueryParserError> { if bound.term_str() == "*" { return Ok(Bound::Unbounded); } @@ -355,18 +357,21 @@ impl QueryParser { Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) } UserInputAST::Unary(left_occur, subquery) => { - let (right_occur, logical_sub_queries) = self.compute_logical_ast_with_occur(*subquery)?; + let (right_occur, logical_sub_queries) = + self.compute_logical_ast_with_occur(*subquery)?; Ok((compose_occur(left_occur, right_occur), logical_sub_queries)) } - UserInputAST::Leaf(leaf) => { + UserInputAST::Leaf(leaf) => { let result_ast = self.compute_logical_ast_from_leaf(*leaf)?; Ok((Occur::Should, result_ast)) } } } - - fn compute_logical_ast_from_leaf(&self, leaf: UserInputLeaf) -> Result { + fn compute_logical_ast_from_leaf( + &self, + leaf: UserInputLeaf, + ) -> Result { match leaf { UserInputLeaf::Literal(literal) => { let term_phrases: Vec<(Field, String)> = match literal.field_name { @@ -391,21 +396,19 @@ impl QueryParser { asts.push(LogicalAST::Leaf(Box::new(ast))); } } - let result_ast: LogicalAST = - if asts.len() == 1 { - asts.into_iter().next().unwrap() - } else { - LogicalAST::Clause( - asts.into_iter() - .map(|ast| (Occur::Should, ast)) - .collect()) - }; + let result_ast: LogicalAST = if asts.len() == 1 { + asts.into_iter().next().unwrap() + } else { + LogicalAST::Clause(asts.into_iter().map(|ast| (Occur::Should, ast)).collect()) + }; Ok(result_ast) } - UserInputLeaf::All => { - Ok(LogicalAST::Leaf(Box::new(LogicalLiteral::All))) - } - UserInputLeaf::Range { field, lower, upper } => { + UserInputLeaf::All => Ok(LogicalAST::Leaf(Box::new(LogicalLiteral::All))), + UserInputLeaf::Range { + field, + lower, + upper, + } => { let fields = self.resolved_fields(&field)?; let mut clauses = fields .iter() @@ -433,14 +436,15 @@ impl QueryParser { Ok(result_ast) } } - } } fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { match logical_literal { LogicalLiteral::Term(term) => Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)), - LogicalLiteral::Phrase(term_with_offsets) => Box::new(PhraseQuery::new_with_offset(term_with_offsets)), + LogicalLiteral::Phrase(term_with_offsets) => { + Box::new(PhraseQuery::new_with_offset(term_with_offsets)) + } LogicalLiteral::Range { field, value_type, @@ -458,11 +462,16 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box { .into_iter() .map(|(occur, subquery)| (occur, convert_to_query(subquery))) .collect::>(); - assert!(!occur_subqueries.is_empty(), "Should not be empty after trimming"); + assert!( + !occur_subqueries.is_empty(), + "Should not be empty after trimming" + ); Box::new(BooleanQuery::from(occur_subqueries)) - }, - Some(LogicalAST::Leaf(trimmed_logical_literal)) => convert_literal_to_query(*trimmed_logical_literal), - None => Box::new(EmptyQuery) + } + Some(LogicalAST::Leaf(trimmed_logical_literal)) => { + convert_literal_to_query(*trimmed_logical_literal) + } + None => Box::new(EmptyQuery), } } @@ -475,7 +484,7 @@ mod test { use schema::Field; use schema::{IndexRecordOption, TextFieldIndexing, TextOptions}; use schema::{SchemaBuilder, Term, INT_INDEXED, STORED, STRING, TEXT}; - use tokenizer::{Tokenizer, SimpleTokenizer, LowerCaser, StopWordFilter, TokenizerManager}; + use tokenizer::{LowerCaser, SimpleTokenizer, StopWordFilter, Tokenizer, TokenizerManager}; use Index; fn make_query_parser() -> QueryParser { @@ -498,9 +507,11 @@ mod test { let schema = schema_builder.build(); let default_fields = vec![title, text]; let tokenizer_manager = TokenizerManager::default(); - tokenizer_manager.register("en_with_stop_words", SimpleTokenizer - .filter(LowerCaser) - .filter(StopWordFilter::remove(vec!["the".to_string()])) + tokenizer_manager.register( + "en_with_stop_words", + SimpleTokenizer + .filter(LowerCaser) + .filter(StopWordFilter::remove(vec!["the".to_string()])), ); QueryParser::new(schema, default_fields, tokenizer_manager) } @@ -571,16 +582,8 @@ mod test { #[test] pub fn test_parse_query_empty() { - test_parse_query_to_logical_ast_helper( - "", - "", - false, - ); - test_parse_query_to_logical_ast_helper( - " ", - "", - false, - ); + test_parse_query_to_logical_ast_helper("", "", false); + test_parse_query_to_logical_ast_helper(" ", "", false); let query_parser = make_query_parser(); let query_result = query_parser.parse_query(""); let query = query_result.unwrap(); @@ -693,11 +696,7 @@ mod test { "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO Unbounded)", false, ); - test_parse_query_to_logical_ast_helper( - "*", - "*", - false, - ); + test_parse_query_to_logical_ast_helper("*", "*", false); } #[test] diff --git a/src/query/query_parser/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs index 37adb94be..40cdd2424 100644 --- a/src/query/query_parser/user_input_ast.rs +++ b/src/query/query_parser/user_input_ast.rs @@ -16,9 +16,7 @@ pub enum UserInputLeaf { impl Debug for UserInputLeaf { fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> { match self { - UserInputLeaf::Literal(literal) => { - literal.fmt(formatter) - } + UserInputLeaf::Literal(literal) => literal.fmt(formatter), UserInputLeaf::Range { ref field, ref lower, @@ -82,13 +80,12 @@ impl UserInputBound { pub enum UserInputAST { Clause(Vec), Unary(Occur, Box), -// Not(Box), -// Should(Box), -// Must(Box), + // Not(Box), + // Should(Box), + // Must(Box), Leaf(Box), } - impl UserInputAST { pub fn unary(self, occur: Occur) -> UserInputAST { UserInputAST::Unary(occur, Box::new(self)) @@ -100,12 +97,10 @@ impl UserInputAST { if asts.len() == 1 { asts.into_iter().next().unwrap() //< safe } else { - UserInputAST::Clause(asts - .into_iter() - .map(|ast: UserInputAST| - ast.unary(occur) - ) - .collect::>() + UserInputAST::Clause( + asts.into_iter() + .map(|ast: UserInputAST| ast.unary(occur)) + .collect::>(), ) } } @@ -117,11 +112,8 @@ impl UserInputAST { pub fn or(asts: Vec) -> UserInputAST { UserInputAST::compose(Occur::Should, asts) } - } - - /* impl UserInputAST { diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 6ec9c587f..fd739652c 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -274,7 +274,6 @@ impl RangeWeight { } impl Weight for RangeWeight { - fn scorer(&self, reader: &SegmentReader) -> Result> { let max_doc = reader.max_doc(); let mut doc_bitset = BitSet::with_max_value(max_doc); @@ -370,9 +369,7 @@ mod tests { let searcher = index.searcher(); let count_multiples = |range_query: RangeQuery| { let mut count_collector = CountCollector::default(); - range_query - .search(&searcher, &mut count_collector) - .unwrap(); + range_query.search(&searcher, &mut count_collector).unwrap(); count_collector.count() }; diff --git a/src/query/scorer.rs b/src/query/scorer.rs index 186e75a22..2c2f0cd62 100644 --- a/src/query/scorer.rs +++ b/src/query/scorer.rs @@ -50,7 +50,6 @@ impl Scorer for Box { } } - /// Wraps a `DocSet` and simply returns a constant `Scorer`. /// The `ConstScorer` is useful if you have a `DocSet` where /// you needed a scorer. diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index d6cd72288..267ca9ba7 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -3,10 +3,10 @@ use query::bm25::BM25Weight; use query::Query; use query::Weight; use schema::IndexRecordOption; +use std::collections::BTreeSet; use Result; use Searcher; use Term; -use std::collections::BTreeSet; /// A Term query matches all of the documents /// containing a specific term. diff --git a/src/schema/schema.rs b/src/schema/schema.rs index d000ab9e2..0855200f4 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -444,7 +444,10 @@ mod tests { ) .unwrap(); assert_eq!(doc.get_first(title_field).unwrap().text(), Some("my title")); - assert_eq!(doc.get_first(author_field).unwrap().text(), Some("fulmicoton")); + assert_eq!( + doc.get_first(author_field).unwrap().text(), + Some("fulmicoton") + ); assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4); assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10); } diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 8f4ec43b1..3957417e6 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -1,14 +1,14 @@ use htmlescape::encode_minimal; -use std::collections::BTreeMap; -use tokenizer::{Token, TokenStream}; -use Result; use query::Query; -use Searcher; use schema::Field; +use std::cmp::Ordering; +use std::collections::BTreeMap; use std::collections::BTreeSet; use tokenizer::BoxedTokenizer; +use tokenizer::{Token, TokenStream}; use Document; -use std::cmp::Ordering; +use Result; +use Searcher; const DEFAULT_MAX_NUM_CHARS: usize = 150; @@ -75,11 +75,10 @@ const HIGHLIGHTEN_PREFIX: &str = ""; const HIGHLIGHTEN_POSTFIX: &str = ""; impl Snippet { - pub fn empty() -> Snippet { Snippet { fragments: String::new(), - highlighted: Vec::new() + highlighted: Vec::new(), } } @@ -157,16 +156,17 @@ fn select_best_fragment_combination<'a>( fragments: Vec, text: &'a str, ) -> Snippet { - let best_fragment_opt = fragments - .iter() - .max_by(|left, right| { - let cmp_score = left.score.partial_cmp(&right.score).unwrap_or(Ordering::Equal); - if cmp_score == Ordering::Equal { - (right.start_offset, right.stop_offset).cmp(&(left.start_offset, left.stop_offset)) - } else { - cmp_score - } - }); + let best_fragment_opt = fragments.iter().max_by(|left, right| { + let cmp_score = left + .score + .partial_cmp(&right.score) + .unwrap_or(Ordering::Equal); + if cmp_score == Ordering::Equal { + (right.start_offset, right.stop_offset).cmp(&(left.start_offset, left.stop_offset)) + } else { + cmp_score + } + }); if let Some(fragment) = best_fragment_opt { let fragment_text = &text[fragment.start_offset..fragment.stop_offset]; let highlighted = fragment @@ -177,7 +177,8 @@ fn select_best_fragment_combination<'a>( item.start - fragment.start_offset, item.stop - fragment.start_offset, ) - }).collect(); + }) + .collect(); Snippet { fragments: fragment_text.to_string(), highlighted: highlighted, @@ -239,17 +240,16 @@ pub struct SnippetGenerator { terms_text: BTreeMap, tokenizer: Box, field: Field, - max_num_chars: usize + max_num_chars: usize, } impl SnippetGenerator { /// Creates a new snippet generator - pub fn new(searcher: &Searcher, - query: &Query, - field: Field) -> Result { + pub fn new(searcher: &Searcher, query: &Query, field: Field) -> Result { let mut terms = BTreeSet::new(); query.query_terms(&mut terms); - let terms_text: BTreeMap = terms.into_iter() + let terms_text: BTreeMap = terms + .into_iter() .filter(|term| term.field() == field) .map(|term| (term.text().to_string(), 1f32)) .collect(); @@ -258,7 +258,7 @@ impl SnippetGenerator { terms_text, tokenizer, field, - max_num_chars: DEFAULT_MAX_NUM_CHARS + max_num_chars: DEFAULT_MAX_NUM_CHARS, }) } @@ -272,7 +272,8 @@ impl SnippetGenerator { /// This method extract the text associated to the `SnippetGenerator`'s field /// and computes a snippet. pub fn snippet_from_doc(&self, doc: &Document) -> Snippet { - let text: String = doc.get_all(self.field) + let text: String = doc + .get_all(self.field) .into_iter() .flat_map(|val| val.text()) .collect::>() @@ -282,10 +283,12 @@ impl SnippetGenerator { /// Generates a snippet for the given text. pub fn snippet(&self, text: &str) -> Snippet { - let fragment_candidates = search_fragments(&*self.tokenizer, - &text, - &self.terms_text, - self.max_num_chars); + let fragment_candidates = search_fragments( + &*self.tokenizer, + &text, + &self.terms_text, + self.max_num_chars, + ); select_best_fragment_combination(fragment_candidates, &text) } } @@ -293,16 +296,16 @@ impl SnippetGenerator { #[cfg(test)] mod tests { use super::{search_fragments, select_best_fragment_combination}; + use query::QueryParser; + use schema::{IndexRecordOption, SchemaBuilder, TextFieldIndexing, TextOptions}; use std::collections::BTreeMap; use std::iter::Iterator; use tokenizer::{box_tokenizer, SimpleTokenizer}; use Index; - use schema::{SchemaBuilder, IndexRecordOption, TextOptions, TextFieldIndexing}; use SnippetGenerator; - use query::QueryParser; - - const TEST_TEXT: &'static str = r#"Rust is a systems programming language sponsored by Mozilla which + const TEST_TEXT: &'static str = + r#"Rust is a systems programming language sponsored by Mozilla which describes it as a "safe, concurrent, practical language", supporting functional and imperative-procedural paradigms. Rust is syntactically similar to C++[according to whom?], but its designers intend it to provide better memory safety while still maintaining @@ -431,7 +434,7 @@ Survey in 2016, 2017, and 2018."#; let text = "a b c d"; - let terms = BTreeMap::new(); + let terms = BTreeMap::new(); let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 0); @@ -442,12 +445,12 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_generator() { - let mut schema_builder = SchemaBuilder::default (); - let text_options = TextOptions::default() - .set_indexing_options(TextFieldIndexing::default() + let mut schema_builder = SchemaBuilder::default(); + let text_options = TextOptions::default().set_indexing_options( + TextFieldIndexing::default() .set_tokenizer("en_stem") - .set_index_option(IndexRecordOption::Basic) - ); + .set_index_option(IndexRecordOption::Basic), + ); let text_field = schema_builder.add_text_field("text", text_options); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -474,6 +477,5 @@ Survey in 2016, 2017, and 2018."#; let snippet = snippet_generator.snippet(TEST_TEXT); assert_eq!(snippet.to_html(), "Rust is syntactically similar to C++[according to whom?],\nbut its designers intend it to"); } - } } diff --git a/src/store/mod.rs b/src/store/mod.rs index 7bce9085d..57930e8d8 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -109,7 +109,13 @@ pub mod tests { let store = StoreReader::from_source(store_source); for i in 0..1_000 { assert_eq!( - *store.get(i).unwrap().get_first(field_title).unwrap().text().unwrap(), + *store + .get(i) + .unwrap() + .get_first(field_title) + .unwrap() + .text() + .unwrap(), format!("Doc {}", i) ); } diff --git a/src/store/skiplist/skiplist_builder.rs b/src/store/skiplist/skiplist_builder.rs index 14ccd6dda..61f04bf34 100644 --- a/src/store/skiplist/skiplist_builder.rs +++ b/src/store/skiplist/skiplist_builder.rs @@ -72,7 +72,8 @@ impl SkipListBuilder { let mut skip_pointer = self.data_layer.insert(key, dest)?; loop { skip_pointer = match skip_pointer { - Some((skip_doc_id, skip_offset)) => self.get_skip_layer(layer_id) + Some((skip_doc_id, skip_offset)) => self + .get_skip_layer(layer_id) .insert(skip_doc_id, &skip_offset)?, None => { return Ok(()); diff --git a/src/termdict/term_info_store.rs b/src/termdict/term_info_store.rs index 70ded6090..98947feb5 100644 --- a/src/termdict/term_info_store.rs +++ b/src/termdict/term_info_store.rs @@ -59,7 +59,6 @@ impl TermInfoBlockMeta { } fn deserialize_term_info(&self, data: &[u8], inner_offset: usize) -> TermInfo { - let num_bits = self.num_bits() as usize; let mut cursor = num_bits * inner_offset; diff --git a/src/termdict/termdict.rs b/src/termdict/termdict.rs index f633211ef..03738e694 100644 --- a/src/termdict/termdict.rs +++ b/src/termdict/termdict.rs @@ -164,7 +164,8 @@ impl TermDictionary { let fst = self.fst_index.as_fst(); let mut node = fst.root(); while ord != 0 || !node.is_final() { - if let Some(transition) = node.transitions() + if let Some(transition) = node + .transitions() .take_while(|transition| transition.out.value() <= ord) .last() { diff --git a/src/tokenizer/lower_caser.rs b/src/tokenizer/lower_caser.rs index 578678a4a..38fa782fc 100644 --- a/src/tokenizer/lower_caser.rs +++ b/src/tokenizer/lower_caser.rs @@ -31,7 +31,6 @@ fn to_lowercase_unicode(text: &mut String, output: &mut String) { } } - impl TokenStream for LowerCaserTokenStream where TailTokenStream: TokenStream, @@ -50,7 +49,7 @@ where // fast track for ascii. self.token_mut().text.make_ascii_lowercase(); } else { - to_lowercase_unicode(&mut self.tail.token_mut().text, &mut self.buffer); + to_lowercase_unicode(&mut self.tail.token_mut().text, &mut self.buffer); mem::swap(&mut self.tail.token_mut().text, &mut self.buffer); } @@ -68,41 +67,43 @@ where fn wrap(tail: TailTokenStream) -> LowerCaserTokenStream { LowerCaserTokenStream { tail, - buffer: String::with_capacity(100) + buffer: String::with_capacity(100), } } } #[cfg(test)] mod tests { - use tokenizer::Tokenizer; use tokenizer::LowerCaser; - use tokenizer::TokenStream; use tokenizer::SimpleTokenizer; + use tokenizer::TokenStream; + use tokenizer::Tokenizer; #[test] fn test_to_lower_case() { - assert_eq!(lowercase_helper("Русский текст"), - vec!["русский".to_string(), "текст".to_string()]); + assert_eq!( + lowercase_helper("Русский текст"), + vec!["русский".to_string(), "текст".to_string()] + ); } fn lowercase_helper(text: &str) -> Vec { let mut tokens = vec![]; - let mut token_stream = SimpleTokenizer - .filter(LowerCaser) - .token_stream(text); + let mut token_stream = SimpleTokenizer.filter(LowerCaser).token_stream(text); while token_stream.advance() { let token_text = token_stream.token().text.clone(); tokens.push(token_text); } tokens - } - + } #[test] fn test_lowercaser() { assert_eq!(lowercase_helper("Tree"), vec!["tree".to_string()]); - assert_eq!(lowercase_helper("Русский"), vec!["русский".to_string()]); + assert_eq!( + lowercase_helper("Русский"), + vec!["русский".to_string()] + ); } -} \ No newline at end of file +} diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 9d94441ab..dd8eb18dd 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -151,8 +151,8 @@ pub use self::simple_tokenizer::SimpleTokenizer; pub use self::stemmer::Stemmer; pub use self::stop_word_filter::StopWordFilter; pub(crate) use self::token_stream_chain::TokenStreamChain; -pub use self::tokenizer::BoxedTokenizer; pub(crate) use self::tokenizer::box_tokenizer; +pub use self::tokenizer::BoxedTokenizer; pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer}; pub use self::tokenizer_manager::TokenizerManager; diff --git a/src/tokenizer/raw_tokenizer.rs b/src/tokenizer/raw_tokenizer.rs index 12a5af82c..338109b88 100644 --- a/src/tokenizer/raw_tokenizer.rs +++ b/src/tokenizer/raw_tokenizer.rs @@ -18,7 +18,7 @@ impl<'a> Tokenizer<'a> for RawTokenizer { offset_to: text.len(), position: 0, text: text.to_string(), - position_length: 1 + position_length: 1, }; RawTokenStream { token, diff --git a/src/tokenizer/token_stream_chain.rs b/src/tokenizer/token_stream_chain.rs index 01d631e2b..224d7746c 100644 --- a/src/tokenizer/token_stream_chain.rs +++ b/src/tokenizer/token_stream_chain.rs @@ -71,13 +71,16 @@ where #[cfg(test)] mod tests { - use super::POSITION_GAP; + use super::super::{SimpleTokenizer, TokenStream, Tokenizer}; use super::TokenStreamChain; - use super::super::{Tokenizer, TokenStream, SimpleTokenizer}; + use super::POSITION_GAP; #[test] fn test_chain_first_emits_no_tokens() { - let token_streams = vec![SimpleTokenizer.token_stream(""), SimpleTokenizer.token_stream("hello world")]; + let token_streams = vec![ + SimpleTokenizer.token_stream(""), + SimpleTokenizer.token_stream("hello world"), + ]; let mut token_chain = TokenStreamChain::new(vec![0, 0], token_streams); assert!(token_chain.advance()); @@ -91,8 +94,8 @@ mod tests { assert_eq!(token_chain.token().offset_from, 6); assert_eq!(token_chain.token().offset_to, 11); assert_eq!(token_chain.token().position, POSITION_GAP); - + assert!(!token_chain.advance()); } -} \ No newline at end of file +} diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index fcdf8f21b..d73f84e93 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -276,7 +276,7 @@ mod test { offset_from: 2, offset_to: 3, text: "abc".to_string(), - position_length: 1 + position_length: 1, }; let t2 = t1.clone();