diff --git a/src/tokenizer/japanese_tokenizer.rs b/src/tokenizer/japanese_tokenizer.rs index af56eb94f..c9981b201 100644 --- a/src/tokenizer/japanese_tokenizer.rs +++ b/src/tokenizer/japanese_tokenizer.rs @@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream { impl<'a> Tokenizer<'a> for JapaneseTokenizer { type TokenStreamImpl = JapaneseTokenizerStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let mut tokens = vec![]; let mut offset_from; let mut offset_to = 0; diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 94431797f..634bc4810 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -159,7 +159,7 @@ mod test { #[test] fn test_raw_tokenizer() { let tokenizer_manager = TokenizerManager::default(); - let mut en_tokenizer = tokenizer_manager.get("raw").unwrap(); + let en_tokenizer = tokenizer_manager.get("raw").unwrap(); let mut tokens: Vec = vec![]; { let mut add_token = |token: &Token| { tokens.push(token.text.clone()); }; @@ -174,7 +174,7 @@ mod test { fn test_en_tokenizer() { let tokenizer_manager = TokenizerManager::default(); assert!(tokenizer_manager.get("en_doesnotexist").is_none()); - let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap(); + let en_tokenizer = tokenizer_manager.get("en_stem").unwrap(); let mut tokens: Vec = vec![]; { let mut add_token = |token: &Token| { tokens.push(token.text.clone()); }; @@ -190,7 +190,7 @@ mod test { #[test] fn test_jp_tokenizer() { let tokenizer_manager = TokenizerManager::default(); - let mut en_tokenizer = tokenizer_manager.get("ja").unwrap(); + let en_tokenizer = tokenizer_manager.get("ja").unwrap(); let mut tokens: Vec = vec![]; { @@ -208,7 +208,7 @@ mod test { #[test] fn test_tokenizer_empty() { let tokenizer_manager = TokenizerManager::default(); - let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap(); + let en_tokenizer = tokenizer_manager.get("en_stem").unwrap(); { let mut tokens: Vec = vec![]; { diff --git a/src/tokenizer/raw_tokenizer.rs b/src/tokenizer/raw_tokenizer.rs index 23bd6efe4..0d97103eb 100644 --- a/src/tokenizer/raw_tokenizer.rs +++ b/src/tokenizer/raw_tokenizer.rs @@ -13,7 +13,7 @@ pub struct RawTokenStream { impl<'a> Tokenizer<'a> for RawTokenizer { type TokenStreamImpl = RawTokenStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let token = Token { offset_from: 0, offset_to: text.len(), diff --git a/src/tokenizer/simple_tokenizer.rs b/src/tokenizer/simple_tokenizer.rs index f6e223fc2..e9d93deee 100644 --- a/src/tokenizer/simple_tokenizer.rs +++ b/src/tokenizer/simple_tokenizer.rs @@ -16,7 +16,7 @@ pub struct SimpleTokenStream<'a> { impl<'a> Tokenizer<'a> for SimpleTokenizer { type TokenStreamImpl = SimpleTokenStream<'a>; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { SimpleTokenStream { text: text, chars: text.char_indices(), diff --git a/src/tokenizer/token_stream_chain.rs b/src/tokenizer/token_stream_chain.rs index 401e77bba..84d17fede 100644 --- a/src/tokenizer/token_stream_chain.rs +++ b/src/tokenizer/token_stream_chain.rs @@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain } fn token(&self) -> &Token { - if self.stream_idx > self.token_streams.len() { - panic!("You called .token(), after the end of the token stream has been reached"); - } + assert!( + self.stream_idx <= self.token_streams.len(), + "You called .token(), after the end of the token stream has been reached" + ); &self.token } fn token_mut(&mut self) -> &mut Token { - if self.stream_idx > self.token_streams.len() { - panic!("You called .token(), after the end of the token stream has been reached"); - } + assert!( + self.stream_idx <= self.token_streams.len(), + "You called .token(), after the end of the token stream has been reached" + ); &mut self.token } } diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index 3b862f7ed..6297af0ca 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -48,7 +48,7 @@ pub trait Tokenizer<'a>: Sized + Clone { type TokenStreamImpl: TokenStream; /// Creates a token stream for a given `str`. - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl; + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl; /// Appends a token filter to the current tokenizer. /// @@ -80,10 +80,9 @@ pub trait Tokenizer<'a>: Sized + Clone { } } - pub trait BoxedTokenizer: Send + Sync { - fn token_stream<'a>(&mut self, text: &'a str) -> Box; - fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box; + fn token_stream<'a>(&self, text: &'a str) -> Box; + fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box; fn boxed_clone(&self) -> Box; } @@ -91,11 +90,11 @@ pub trait BoxedTokenizer: Send + Sync { struct BoxableTokenizer(A) where A: for <'a> Tokenizer<'a> + Send + Sync; impl BoxedTokenizer for BoxableTokenizer where A: 'static + Send + Sync + for <'a> Tokenizer<'a> { - fn token_stream<'a>(&mut self, text: &'a str) -> Box { + fn token_stream<'a>(&self, text: &'a str) -> Box { box self.0.token_stream(text) } - fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box { + fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box { assert!(texts.len() > 0); if texts.len() == 1 { box self.0.token_stream(texts[0]) @@ -103,7 +102,7 @@ impl BoxedTokenizer for BoxableTokenizer where A: 'static + Send + Sync + else { let mut offsets = vec!(); let mut total_offset = 0; - for text in texts { + for &text in texts { offsets.push(total_offset); total_offset += text.len(); } @@ -217,7 +216,7 @@ impl<'a, HeadTokenFilterFactory, TailTokenizer> Tokenizer<'a> { type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream; - fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl { + fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl { let tail_token_stream = self.tail.token_stream(text ); self.head.transform(tail_token_stream) }