From 185a72b3419a8d2ee695be7552fd6b40e3245a74 Mon Sep 17 00:00:00 2001
From: Paul Masurel <paul.masurel@gmail.com>
Date: Thu, 16 Nov 2017 08:22:54 +0900
Subject: [PATCH 1/2] Closes #224. Fixes documentation about STORED in the
 example. (#225)

---
 examples/html/simple_search.html | 99 ++++++++++++++++++++++----------
 examples/simple_search.rs        | 18 +++---
 2 files changed, 80 insertions(+), 37 deletions(-)
diff --git a/examples/html/simple_search.html b/examples/html/simple_search.html
index 1aa6b63ab..178313d74 100644
--- a/examples/html/simple_search.html
+++ b/examples/html/simple_search.html
@@ -30,10 +30,12 @@
               
             </div>
             
-            <div class="content"><div class='highlight'><pre><span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> rustc_serialize;
-<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tantivy;
+            <div class="content"><div class='highlight'><pre><span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tantivy;
 <span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> tempdir;
 
+<span class="hljs-meta">#[macro_use]</span>
+<span class="hljs-keyword">extern</span> <span class="hljs-keyword">crate</span> serde_json;
+
 <span class="hljs-keyword">use</span> std::path::Path;
 <span class="hljs-keyword">use</span> tempdir::TempDir;
 <span class="hljs-keyword">use</span> tantivy::Index;
@@ -108,8 +110,8 @@ be indexed”.</p>
                 <a class="pilcrow" href="#section-5">&#182;</a>
               </div>
               <p>Our first field is title.
-We want full-text search for it, and we want to be able
-to retrieve the document after the search.</p>
+We want full-text search for it, and we also want 
+to be able to retrieve the document after the search.</p>
 <p>TEXT | STORED is some syntactic sugar to describe
 that.</p>
 <p><code>TEXT</code> means the field should be tokenized and indexed,
@@ -132,9 +134,12 @@ documents that were selected during the search phase.</p>
               <div class="pilwrap ">
                 <a class="pilcrow" href="#section-6">&#182;</a>
               </div>
-              <p>Our first field is body.
-We want full-text search for it, and we want to be able
-to retrieve the body after the search.</p>
+              <p>Our second field is body.
+We want full-text search for it, but we do not 
+need to be able to be able to retrieve it
+for our application. </p>
+<p>We can make our index lighter and 
+by omitting <code>STORED</code> flag.</p>
 
             </div>
             
@@ -158,7 +163,7 @@ with our schema in the directory.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> index = <span class="hljs-built_in">try!</span>(Index::create(index_path, schema.clone()));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> index = Index::create(index_path, schema.clone())?;</pre></div></div>
             
         </li>
         
@@ -178,7 +183,7 @@ heap for the indexer can increase its throughput.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = <span class="hljs-built_in">try!</span>(index.writer(<span class="hljs-number">50_000_000</span>));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> index_writer = index.writer(<span class="hljs-number">50_000_000</span>)?;</pre></div></div>
             
         </li>
         
@@ -214,9 +219,11 @@ one by one in a Document object.</p>
 
     <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> old_man_doc = Document::<span class="hljs-keyword">default</span>();
     old_man_doc.add_text(title, <span class="hljs-string">"The Old Man and the Sea"</span>);
-    old_man_doc.add_text(body,
-                         <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and \
-                          he had gone eighty-four days now without taking a fish."</span>);</pre></div></div>
+    old_man_doc.add_text(
+        body,
+        <span class="hljs-string">"He was an old man who fished alone in a skiff in the Gulf Stream and \
+                          he had gone eighty-four days now without taking a fish."</span>,
+    );</pre></div></div>
             
         </li>
         
@@ -243,16 +250,25 @@ one by one in a Document object.</p>
                 <a class="pilcrow" href="#section-12">&#182;</a>
               </div>
               <h3 id="create-a-document-directly-from-json-">Create a document directly from json.</h3>
-<p>Alternatively, we can use our schema to parse
-a document object directly from json.</p>
+<p>Alternatively, we can use our schema to parse a
+document object directly from json.
+The document is a string, but we use the <code>json</code> macro
+from <code>serde_json</code> for the convenience of multi-line support.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>
-    <span class="hljs-keyword">let</span> mice_and_men_doc = <span class="hljs-built_in">try!</span>(schema.parse_document(r#<span class="hljs-string">"{
-       "</span>title<span class="hljs-string">": "</span>Of Mice and Men<span class="hljs-string">",
-       "</span>body<span class="hljs-string">": "</span>few miles south of Soledad, the Salinas River drops <span class="hljs-keyword">in</span> close to the hillside bank and runs deep and green. The water is warm too, <span class="hljs-keyword">for</span> it has slipped twinkling over the yellow sands <span class="hljs-keyword">in</span> the sunlight before reaching the narrow pool. On one side of the river the golden foothill slopes curve up to the strong and rocky Gabilan Mountains, but on the valley side the water is lined with trees—willows fresh and green with every spring, carrying <span class="hljs-keyword">in</span> their lower leaf junctures the debris of the winter’s flooding; and sycamores with mottled, white,recumbent limbs and branches that arch over the pool<span class="hljs-string">"  
-    }"</span>#));
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
+       <span class="hljs-string">"title"</span>: <span class="hljs-string">"Of Mice and Men"</span>,
+       <span class="hljs-string">"body"</span>: <span class="hljs-string">"A few miles south of Soledad, the Salinas River drops in close to the hillside \
+                bank and runs deep and green. The water is warm too, for it has slipped twinkling \
+                over the yellow sands in the sunlight before reaching the narrow pool. On one \
+                side of the river the golden foothill slopes curve up to the strong and rocky \
+                Gabilan Mountains, but on the valley side the water is lined with trees—willows \
+                fresh and green with every spring, carrying in their lower leaf junctures the \
+                debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
+                limbs and branches that arch over the pool"</span>
+    });
+    <span class="hljs-keyword">let</span> mice_and_men_doc = schema.parse_document(&amp;json.to_string())?;
 
     index_writer.add_document(mice_and_men_doc);</pre></div></div>
             
@@ -271,10 +287,15 @@ The following document has two titles.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> frankenstein_doc = <span class="hljs-built_in">try!</span>(schema.parse_document(r#<span class="hljs-string">"{
-       "</span>title<span class="hljs-string">": ["</span>Frankenstein<span class="hljs-string">", "</span>The Modern Promotheus<span class="hljs-string">"],
-       "</span>body<span class="hljs-string">": "</span>You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence <span class="hljs-keyword">in</span> the success of my undertaking.<span class="hljs-string">"  
-    }"</span>#));
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> json = json!({
+       <span class="hljs-string">"title"</span>: [<span class="hljs-string">"Frankenstein"</span>, <span class="hljs-string">"The Modern Prometheus"</span>],
+       <span class="hljs-string">"body"</span>: <span class="hljs-string">"You will rejoice to hear that no disaster has accompanied the commencement of an \
+                enterprise which you have regarded with such evil forebodings.  I arrived here \
+                yesterday, and my first task is to assure my dear sister of my welfare and \
+                increasing confidence in the success of my undertaking."</span>
+    });
+    <span class="hljs-keyword">let</span> frankenstein_doc = schema.parse_document(&amp;json.to_string())?;
+
     index_writer.add_document(frankenstein_doc);</pre></div></div>
             
         </li>
@@ -313,7 +334,7 @@ the existence of new documents.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(index_writer.commit());</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    index_writer.commit()?;</pre></div></div>
             
         </li>
         
@@ -349,7 +370,7 @@ after every commit().</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(index.load_searchers());</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    index.load_searchers()?;</pre></div></div>
             
         </li>
         
@@ -384,7 +405,7 @@ in both title and body.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query_parser = QueryParser::new(index.schema(), <span class="hljs-built_in">vec!</span>[title, body]);</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut</span> query_parser = QueryParser::for_index(index, <span class="hljs-built_in">vec!</span>[title, body]);</pre></div></div>
             
         </li>
         
@@ -401,7 +422,7 @@ A ticket has been opened regarding this problem.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query = <span class="hljs-built_in">try!</span>(query_parser.parse_query(<span class="hljs-string">"sea whale"</span>));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    <span class="hljs-keyword">let</span> query = query_parser.parse_query(<span class="hljs-string">"sea whale"</span>)?;</pre></div></div>
             
         </li>
         
@@ -451,7 +472,7 @@ is the role of the TopCollector.</p>
 
             </div>
             
-            <div class="content"><div class='highlight'><pre>    <span class="hljs-built_in">try!</span>(searcher.search(&amp;*query, &amp;<span class="hljs-keyword">mut</span> top_collector));</pre></div></div>
+            <div class="content"><div class='highlight'><pre>    searcher.search(&amp;*query, &amp;<span class="hljs-keyword">mut</span> top_collector)?;</pre></div></div>
             
         </li>
         
@@ -488,9 +509,27 @@ a title.</p>
             
             <div class="content"><div class='highlight'><pre>
     <span class="hljs-keyword">for</span> doc_address <span class="hljs-keyword">in</span> doc_addresses {
-        <span class="hljs-keyword">let</span> retrieved_doc = <span class="hljs-built_in">try!</span>(searcher.doc(&amp;doc_address));
+        <span class="hljs-keyword">let</span> retrieved_doc = searcher.doc(&amp;doc_address)?;
         <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, schema.to_json(&amp;retrieved_doc));
-    }
+    }</pre></div></div>
+            
+        </li>
+        
+        
+        <li id="section-26">
+            <div class="annotation">
+              
+              <div class="pilwrap ">
+                <a class="pilcrow" href="#section-26">&#182;</a>
+              </div>
+              <p>Wait for indexing and merging threads to shut down.
+Usually this isn’t needed, but in <code>main</code> we try to
+delete the temporary directory and that fails on
+Windows if the files are still open.</p>
+
+            </div>
+            
+            <div class="content"><div class='highlight'><pre>    index_writer.wait_merging_threads()?;
 
     <span class="hljs-literal">Ok</span>(())
 }</pre></div></div>
diff --git a/examples/simple_search.rs b/examples/simple_search.rs
index 20e3812c0..301508cd5 100644
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -36,12 +36,12 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
     let mut schema_builder = SchemaBuilder::default();
 
     // Our first field is title.
-    // We want full-text search for it, and we want to be able
-    // to retrieve the document after the search.
-    //
+    // We want full-text search for it, and we also want 
+    // to be able to retrieve the document after the search.
+    // 
     // TEXT | STORED is some syntactic sugar to describe
     // that.
-    //
+    // 
     // `TEXT` means the field should be tokenized and indexed,
     // along with its term frequency and term positions.
     //
@@ -51,9 +51,13 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
     // documents that were selected during the search phase.
     schema_builder.add_text_field("title", TEXT | STORED);
 
-    // Our first field is body.
-    // We want full-text search for it, and we want to be able
-    // to retrieve the body after the search.
+    // Our second field is body.
+    // We want full-text search for it, but we do not 
+    // need to be able to be able to retrieve it
+    // for our application. 
+    //
+    // We can make our index lighter and 
+    // by omitting `STORED` flag.
     schema_builder.add_text_field("body", TEXT);
 
     let schema = schema_builder.build();

From a298c084e66ea7b610f1625f05e11c8337f83ccc Mon Sep 17 00:00:00 2001
From: Paul Masurel <pmasurel@indeed.com>
Date: Wed, 22 Nov 2017 20:37:34 +0900
Subject: [PATCH 2/2] Analyzer's Analyzer::token_stream does not need to me
 `&mut self`

---
 src/analyzer/analyzer.rs           | 14 +++++++-------
 src/analyzer/japanese_tokenizer.rs |  2 +-
 src/analyzer/lower_caser.rs        |  1 -
 src/analyzer/mod.rs                |  8 ++++----
 src/analyzer/raw_tokenizer.rs      |  2 +-
 src/analyzer/simple_tokenizer.rs   |  2 +-
 src/analyzer/token_stream_chain.rs | 14 ++++++++------
 src/schema/index_record_option.rs  | 10 +++++++---
 8 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/analyzer/analyzer.rs b/src/analyzer/analyzer.rs
index f2a485557..08cb0afcd 100644
--- a/src/analyzer/analyzer.rs
+++ b/src/analyzer/analyzer.rs
@@ -38,7 +38,7 @@ impl Default for Token {
 pub trait Analyzer<'a>: Sized + Clone {
     type TokenStreamImpl: TokenStream;
 
-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl;
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl;
 
     fn filter<NewFilter>(self, new_filter: NewFilter) -> ChainAnalyzer<NewFilter, Self>
         where NewFilter: TokenFilterFactory<<Self as Analyzer<'a>>::TokenStreamImpl>
@@ -51,8 +51,8 @@ pub trait Analyzer<'a>: Sized + Clone {
 }
 
 pub trait BoxedAnalyzer: Send + Sync {
-    fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a>;
-    fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
+    fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a>;
+    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
     fn boxed_clone(&self) -> Box<BoxedAnalyzer>;
 }
 
@@ -60,11 +60,11 @@ pub trait BoxedAnalyzer: Send + Sync {
 struct BoxableAnalyzer<A>(A) where A: for <'a> Analyzer<'a> + Send + Sync;
 
 impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + for <'a> Analyzer<'a> {
-    fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a> {
+    fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a> {
         box self.0.token_stream(text)
     }
 
-    fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
+    fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
         assert!(texts.len() > 0);
         if texts.len() == 1 {
             box self.0.token_stream(texts[0])
@@ -72,7 +72,7 @@ impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + fo
         else {
             let mut offsets = vec!();
             let mut total_offset = 0;
-            for text in texts {
+            for &text in texts {
                 offsets.push(total_offset);
                 total_offset += text.len();
             }
@@ -154,7 +154,7 @@ impl<'a, HeadTokenFilterFactory, TailAnalyzer> Analyzer<'a>
 {
     type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream;
 
-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
         let tail_token_stream = self.tail.token_stream(text );
         self.head.transform(tail_token_stream)
     }
diff --git a/src/analyzer/japanese_tokenizer.rs b/src/analyzer/japanese_tokenizer.rs
index 909ccbb0c..e80ae9f5d 100644
--- a/src/analyzer/japanese_tokenizer.rs
+++ b/src/analyzer/japanese_tokenizer.rs
@@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream {
 impl<'a> Analyzer<'a> for JapaneseTokenizer {
     type TokenStreamImpl = JapaneseTokenizerStream;
 
-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
         let mut tokens = vec![];
         let mut offset_from;
         let mut offset_to = 0;
diff --git a/src/analyzer/lower_caser.rs b/src/analyzer/lower_caser.rs
index 866508782..c23e71ec3 100644
--- a/src/analyzer/lower_caser.rs
+++ b/src/analyzer/lower_caser.rs
@@ -1,5 +1,4 @@
 use super::{TokenFilterFactory, TokenStream, Token};
-use std::ascii::AsciiExt;
 
 
 /// Token filter that lowercase terms.
diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs
index 227995b85..a312bc787 100644
--- a/src/analyzer/mod.rs
+++ b/src/analyzer/mod.rs
@@ -29,7 +29,7 @@ mod test {
     #[test]
     fn test_raw_tokenizer() {
         let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("raw").unwrap();
+        let en_analyzer = analyzer_manager.get("raw").unwrap();
         let mut tokens: Vec<String> = vec![];
         {
             let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
@@ -44,7 +44,7 @@ mod test {
     fn test_en_analyzer() {
         let analyzer_manager = AnalyzerManager::default();
         assert!(analyzer_manager.get("en_doesnotexist").is_none());
-        let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
+        let en_analyzer = analyzer_manager.get("en_stem").unwrap();
         let mut tokens: Vec<String> = vec![];
         {
             let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
@@ -60,7 +60,7 @@ mod test {
     #[test]
     fn test_jp_analyzer() {
         let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("ja").unwrap();
+        let en_analyzer = analyzer_manager.get("ja").unwrap();
         
         let mut tokens: Vec<String> = vec![];
         {
@@ -78,7 +78,7 @@ mod test {
     #[test]
     fn test_tokenizer_empty() {
         let analyzer_manager = AnalyzerManager::default();
-        let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
+        let en_analyzer = analyzer_manager.get("en_stem").unwrap();
         {
             let mut tokens: Vec<String> = vec![];
             {
diff --git a/src/analyzer/raw_tokenizer.rs b/src/analyzer/raw_tokenizer.rs
index 488ca5590..a5b2d3f6b 100644
--- a/src/analyzer/raw_tokenizer.rs
+++ b/src/analyzer/raw_tokenizer.rs
@@ -11,7 +11,7 @@ pub struct RawTokenStream {
 impl<'a> Analyzer<'a> for RawTokenizer {
     type TokenStreamImpl = RawTokenStream;
 
-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
         let token = Token {
             offset_from: 0,
             offset_to: text.len(),
diff --git a/src/analyzer/simple_tokenizer.rs b/src/analyzer/simple_tokenizer.rs
index 1d4b71c22..e6cf30fb6 100644
--- a/src/analyzer/simple_tokenizer.rs
+++ b/src/analyzer/simple_tokenizer.rs
@@ -14,7 +14,7 @@ pub struct SimpleTokenStream<'a> {
 impl<'a> Analyzer<'a> for SimpleTokenizer {
     type TokenStreamImpl = SimpleTokenStream<'a>;
 
-    fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
+    fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
         SimpleTokenStream {
             text: text,
             chars: text.char_indices(),
diff --git a/src/analyzer/token_stream_chain.rs b/src/analyzer/token_stream_chain.rs
index 6f59f9ae2..89087fb02 100644
--- a/src/analyzer/token_stream_chain.rs
+++ b/src/analyzer/token_stream_chain.rs
@@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain<TTokenStream>
     }
 
     fn token(&self) -> &Token {
-        if self.stream_idx > self.token_streams.len() {
-            panic!("You called .token(), after the end of the token stream has been reached");
-        }
+        assert!(
+            self.stream_idx <= self.token_streams.len(),
+            "You called .token(), after the end of the token stream has been reached"
+        );
         &self.token
     }
 
     fn token_mut(&mut self) -> &mut Token {
-        if self.stream_idx > self.token_streams.len() {
-            panic!("You called .token(), after the end of the token stream has been reached");
-        }
+        assert!(
+            self.stream_idx <= self.token_streams.len(),
+            "You called .token(), after the end of the token stream has been reached"
+        );
         &mut self.token
     }
 }
diff --git a/src/schema/index_record_option.rs b/src/schema/index_record_option.rs
index edb57eb3a..e74f70c3a 100644
--- a/src/schema/index_record_option.rs
+++ b/src/schema/index_record_option.rs
@@ -13,12 +13,16 @@
 ///
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
 pub enum IndexRecordOption {
+    /// Records only the `DocId`s
     #[serde(rename = "basic")]
-    Basic,  //< records only the `DocId`s
+    Basic,
+    /// Records the document ids as well as the term frequency.
     #[serde(rename = "freq")]
-    WithFreqs, //< records the document ids as well as the term frequency.
+    WithFreqs,
+    /// Records the document id, the term frequency and the positions of
+    /// the occurences in the document.
     #[serde(rename = "position")]
-    WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document.
+    WithFreqsAndPositions,
 }
 
 impl IndexRecordOption {