diff --git a/src/analyzer/analyzer.rs b/src/analyzer/analyzer.rs index f2a485557..08cb0afcd 100644 --- a/src/analyzer/analyzer.rs +++ b/src/analyzer/analyzer.rs @@ -38,7 +38,7 @@ impl Default for Token { pub trait Analyzer<'a>: Sized + Clone { type TokenStreamImpl: TokenStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl; + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl; fn filter(self, new_filter: NewFilter) -> ChainAnalyzer where NewFilter: TokenFilterFactory<>::TokenStreamImpl> @@ -51,8 +51,8 @@ pub trait Analyzer<'a>: Sized + Clone { } pub trait BoxedAnalyzer: Send + Sync { - fn token_stream<'a>(&mut self, text: &'a str) -> Box; - fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box; + fn token_stream<'a>(&self, text: &'a str) -> Box; + fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box; fn boxed_clone(&self) -> Box; } @@ -60,11 +60,11 @@ pub trait BoxedAnalyzer: Send + Sync { struct BoxableAnalyzer(A) where A: for <'a> Analyzer<'a> + Send + Sync; impl BoxedAnalyzer for BoxableAnalyzer where A: 'static + Send + Sync + for <'a> Analyzer<'a> { - fn token_stream<'a>(&mut self, text: &'a str) -> Box { + fn token_stream<'a>(&self, text: &'a str) -> Box { box self.0.token_stream(text) } - fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box { + fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box { assert!(texts.len() > 0); if texts.len() == 1 { box self.0.token_stream(texts[0]) @@ -72,7 +72,7 @@ impl BoxedAnalyzer for BoxableAnalyzer where A: 'static + Send + Sync + fo else { let mut offsets = vec!(); let mut total_offset = 0; - for text in texts { + for &text in texts { offsets.push(total_offset); total_offset += text.len(); } @@ -154,7 +154,7 @@ impl<'a, HeadTokenFilterFactory, TailAnalyzer> Analyzer<'a> { type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let tail_token_stream = self.tail.token_stream(text ); self.head.transform(tail_token_stream) } diff --git a/src/analyzer/japanese_tokenizer.rs b/src/analyzer/japanese_tokenizer.rs index 909ccbb0c..e80ae9f5d 100644 --- a/src/analyzer/japanese_tokenizer.rs +++ b/src/analyzer/japanese_tokenizer.rs @@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream { impl<'a> Analyzer<'a> for JapaneseTokenizer { type TokenStreamImpl = JapaneseTokenizerStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let mut tokens = vec![]; let mut offset_from; let mut offset_to = 0; diff --git a/src/analyzer/lower_caser.rs b/src/analyzer/lower_caser.rs index 866508782..c23e71ec3 100644 --- a/src/analyzer/lower_caser.rs +++ b/src/analyzer/lower_caser.rs @@ -1,5 +1,4 @@ use super::{TokenFilterFactory, TokenStream, Token}; -use std::ascii::AsciiExt; /// Token filter that lowercase terms. diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index 227995b85..a312bc787 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -29,7 +29,7 @@ mod test { #[test] fn test_raw_tokenizer() { let analyzer_manager = AnalyzerManager::default(); - let mut en_analyzer = analyzer_manager.get("raw").unwrap(); + let en_analyzer = analyzer_manager.get("raw").unwrap(); let mut tokens: Vec = vec![]; { let mut add_token = |token: &Token| { tokens.push(token.term.clone()); }; @@ -44,7 +44,7 @@ mod test { fn test_en_analyzer() { let analyzer_manager = AnalyzerManager::default(); assert!(analyzer_manager.get("en_doesnotexist").is_none()); - let mut en_analyzer = analyzer_manager.get("en_stem").unwrap(); + let en_analyzer = analyzer_manager.get("en_stem").unwrap(); let mut tokens: Vec = vec![]; { let mut add_token = |token: &Token| { tokens.push(token.term.clone()); }; @@ -60,7 +60,7 @@ mod test { #[test] fn test_jp_analyzer() { let analyzer_manager = AnalyzerManager::default(); - let mut en_analyzer = analyzer_manager.get("ja").unwrap(); + let en_analyzer = analyzer_manager.get("ja").unwrap(); let mut tokens: Vec = vec![]; { @@ -78,7 +78,7 @@ mod test { #[test] fn test_tokenizer_empty() { let analyzer_manager = AnalyzerManager::default(); - let mut en_analyzer = analyzer_manager.get("en_stem").unwrap(); + let en_analyzer = analyzer_manager.get("en_stem").unwrap(); { let mut tokens: Vec = vec![]; { diff --git a/src/analyzer/raw_tokenizer.rs b/src/analyzer/raw_tokenizer.rs index 488ca5590..a5b2d3f6b 100644 --- a/src/analyzer/raw_tokenizer.rs +++ b/src/analyzer/raw_tokenizer.rs @@ -11,7 +11,7 @@ pub struct RawTokenStream { impl<'a> Analyzer<'a> for RawTokenizer { type TokenStreamImpl = RawTokenStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let token = Token { offset_from: 0, offset_to: text.len(), diff --git a/src/analyzer/simple_tokenizer.rs b/src/analyzer/simple_tokenizer.rs index 1d4b71c22..e6cf30fb6 100644 --- a/src/analyzer/simple_tokenizer.rs +++ b/src/analyzer/simple_tokenizer.rs @@ -14,7 +14,7 @@ pub struct SimpleTokenStream<'a> { impl<'a> Analyzer<'a> for SimpleTokenizer { type TokenStreamImpl = SimpleTokenStream<'a>; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { SimpleTokenStream { text: text, chars: text.char_indices(), diff --git a/src/analyzer/token_stream_chain.rs b/src/analyzer/token_stream_chain.rs index 6f59f9ae2..89087fb02 100644 --- a/src/analyzer/token_stream_chain.rs +++ b/src/analyzer/token_stream_chain.rs @@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain } fn token(&self) -> &Token { - if self.stream_idx > self.token_streams.len() { - panic!("You called .token(), after the end of the token stream has been reached"); - } + assert!( + self.stream_idx <= self.token_streams.len(), + "You called .token(), after the end of the token stream has been reached" + ); &self.token } fn token_mut(&mut self) -> &mut Token { - if self.stream_idx > self.token_streams.len() { - panic!("You called .token(), after the end of the token stream has been reached"); - } + assert!( + self.stream_idx <= self.token_streams.len(), + "You called .token(), after the end of the token stream has been reached" + ); &mut self.token } } diff --git a/src/schema/index_record_option.rs b/src/schema/index_record_option.rs index edb57eb3a..e74f70c3a 100644 --- a/src/schema/index_record_option.rs +++ b/src/schema/index_record_option.rs @@ -13,12 +13,16 @@ /// #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)] pub enum IndexRecordOption { + /// Records only the `DocId`s #[serde(rename = "basic")] - Basic, //< records only the `DocId`s + Basic, + /// Records the document ids as well as the term frequency. #[serde(rename = "freq")] - WithFreqs, //< records the document ids as well as the term frequency. + WithFreqs, + /// Records the document id, the term frequency and the positions of + /// the occurences in the document. #[serde(rename = "position")] - WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document. + WithFreqsAndPositions, } impl IndexRecordOption {