Analyzer's Analyzer::token_stream does not need to me &mut self

2026-01-09 10:32:55 +00:00 · 2017-11-22 20:37:34 +09:00
parent 185a72b341
commit a298c084e6
8 changed files with 29 additions and 24 deletions
--- a/src/analyzer/analyzer.rs
+++ b/src/analyzer/analyzer.rs
@@ -38,7 +38,7 @@ impl Default for Token {
 pub trait Analyzer<'a>: Sized + Clone {
    type TokenStreamImpl: TokenStream;

-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl;
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl;

    fn filter<NewFilter>(self, new_filter: NewFilter) -> ChainAnalyzer<NewFilter, Self>
        where NewFilter: TokenFilterFactory<<Self as Analyzer<'a>>::TokenStreamImpl>
@@ -51,8 +51,8 @@ pub trait Analyzer<'a>: Sized + Clone {
 }

 pub trait BoxedAnalyzer: Send + Sync {
-    fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a>;
-    fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
+    fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a>;
+    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
    fn boxed_clone(&self) -> Box<BoxedAnalyzer>;
 }

@@ -60,11 +60,11 @@ pub trait BoxedAnalyzer: Send + Sync {
 struct BoxableAnalyzer<A>(A) where A: for <'a> Analyzer<'a> + Send + Sync;

 impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + for <'a> Analyzer<'a> {
-    fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a> {
+    fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a> {
        box self.0.token_stream(text)
    }

-    fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
+    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
        assert!(texts.len() > 0);
        if texts.len() == 1 {
            box self.0.token_stream(texts[0])
@@ -72,7 +72,7 @@ impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + fo
        else {
            let mut offsets = vec!();
            let mut total_offset = 0;
-            for text in texts {
+            for &text in texts {
                offsets.push(total_offset);
                total_offset += text.len();
            }
@@ -154,7 +154,7 @@ impl<'a, HeadTokenFilterFactory, TailAnalyzer> Analyzer<'a>
 {
    type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream;

-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
        let tail_token_stream = self.tail.token_stream(text );
        self.head.transform(tail_token_stream)
    }
--- a/src/analyzer/japanese_tokenizer.rs
+++ b/src/analyzer/japanese_tokenizer.rs
@@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream {
 impl<'a> Analyzer<'a> for JapaneseTokenizer {
    type TokenStreamImpl = JapaneseTokenizerStream;

-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
        let mut tokens = vec![];
        let mut offset_from;
        let mut offset_to = 0;
--- a/src/analyzer/lower_caser.rs
+++ b/src/analyzer/lower_caser.rs
@@ -1,5 +1,4 @@
 use super::{TokenFilterFactory, TokenStream, Token};
-use std::ascii::AsciiExt;


 /// Token filter that lowercase terms.
--- a/src/analyzer/mod.rs
+++ b/src/analyzer/mod.rs
@@ -29,7 +29,7 @@ mod test {
    #[test]
    fn test_raw_tokenizer() {
        let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("raw").unwrap();
+        let en_analyzer = analyzer_manager.get("raw").unwrap();
        let mut tokens: Vec<String> = vec![];
        {
            let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
@@ -44,7 +44,7 @@ mod test {
    fn test_en_analyzer() {
        let analyzer_manager = AnalyzerManager::default();
        assert!(analyzer_manager.get("en_doesnotexist").is_none());
-        let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
+        let en_analyzer = analyzer_manager.get("en_stem").unwrap();
        let mut tokens: Vec<String> = vec![];
        {
            let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
@@ -60,7 +60,7 @@ mod test {
    #[test]
    fn test_jp_analyzer() {
        let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("ja").unwrap();
+        let en_analyzer = analyzer_manager.get("ja").unwrap();
        
        let mut tokens: Vec<String> = vec![];
        {
@@ -78,7 +78,7 @@ mod test {
    #[test]
    fn test_tokenizer_empty() {
        let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
+        let en_analyzer = analyzer_manager.get("en_stem").unwrap();
        {
            let mut tokens: Vec<String> = vec![];
            {
--- a/src/analyzer/raw_tokenizer.rs
+++ b/src/analyzer/raw_tokenizer.rs
@@ -11,7 +11,7 @@ pub struct RawTokenStream {
 impl<'a> Analyzer<'a> for RawTokenizer {
    type TokenStreamImpl = RawTokenStream;

-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
        let token = Token {
            offset_from: 0,
            offset_to: text.len(),
--- a/src/analyzer/simple_tokenizer.rs
+++ b/src/analyzer/simple_tokenizer.rs
@@ -14,7 +14,7 @@ pub struct SimpleTokenStream<'a> {
 impl<'a> Analyzer<'a> for SimpleTokenizer {
    type TokenStreamImpl = SimpleTokenStream<'a>;

-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
        SimpleTokenStream {
            text: text,
            chars: text.char_indices(),
--- a/src/analyzer/token_stream_chain.rs
+++ b/src/analyzer/token_stream_chain.rs
@@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain<TTokenStream>
    }

    fn token(&self) -> &Token {
-        if self.stream_idx > self.token_streams.len() {
-            panic!("You called .token(), after the end of the token stream has been reached");
-        }
+        assert!(
+            self.stream_idx <= self.token_streams.len(),
+            "You called .token(), after the end of the token stream has been reached"
+        );
        &self.token
    }

    fn token_mut(&mut self) -> &mut Token {
-        if self.stream_idx > self.token_streams.len() {
-            panic!("You called .token(), after the end of the token stream has been reached");
-        }
+        assert!(
+            self.stream_idx <= self.token_streams.len(),
+            "You called .token(), after the end of the token stream has been reached"
+        );
        &mut self.token
    }
 }
--- a/src/schema/index_record_option.rs
+++ b/src/schema/index_record_option.rs
@@ -13,12 +13,16 @@
 ///
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
 pub enum IndexRecordOption {
+    /// Records only the `DocId`s
    #[serde(rename = "basic")]
-    Basic,  //< records only the `DocId`s
+    Basic,
+    /// Records the document ids as well as the term frequency.
    #[serde(rename = "freq")]
-    WithFreqs, //< records the document ids as well as the term frequency.
+    WithFreqs,
+    /// Records the document id, the term frequency and the positions of
+    /// the occurences in the document.
    #[serde(rename = "position")]
-    WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document.
+    WithFreqsAndPositions,
 }

 impl IndexRecordOption {