From 8f812c7cd4f5ccc2964a13fc2c0d588392742e63 Mon Sep 17 00:00:00 2001
From: Paul Masurel <paul.masurel@gmail.com>
Date: Thu, 11 Aug 2016 17:54:06 +0900
Subject: [PATCH] blop

---
 TODO.md                       |   2 +
 docs/tutorial.md              | 142 ++++++++++++++++++++++++++++++++--
 script/build-static-binary.sh |   3 +
 src/cli/commands/bench.rs     |   4 +-
 4 files changed, 142 insertions(+), 9 deletions(-)
 mode change 100644 => 100755 script/build-static-binary.sh

diff --git a/TODO.md b/TODO.md
index e622a01a8..74e514acb 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,8 +1,10 @@
+position not stored
 lenient mode for query parser
 phrase queries
 masks for union
 documentation
 query explain with proper term names
+better schema JSON 
 
 Arc for the schema
 error management
diff --git a/docs/tutorial.md b/docs/tutorial.md
index e7e86be25..167fbff40 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -12,7 +12,7 @@ with the articles of English wikipedia in it.
 There are two ways to get `tantivy`.
 If you are a rust programmer, you can run `cargo install tantivy`.
 Alternatively, if you are on `Linux 64bits`, you can download a
-static binary here []() 
+static binary:  [binaries/linux_x86_64/](http://fulmicoton.com/tantivy/binaries/linux_x86_64/tantivy) 
 
 ## Step 2 - creating the index
 
@@ -31,13 +31,141 @@ Our documents will contain
 * a body 
 * a url
 
-Running
+Running `tantivy new` will start a wizard that will help you go through
+the definition of the schema of our new index.
 
 ```bash
-    # create the directory
-    tantivy 
+    tantivy new -i wikipedia-index
 ```
-   
-   
 
-https://www.dropbox.com/s/wwnfnu441w1ec9p/wiki-articles.json.bz2?dl=0
\ No newline at end of file
+When asked answer to the question as follows:
+
+```
+    Creating new index 
+    Let's define it's schema! 
+
+
+
+    New field name  ? title
+    Text or unsigned 32-bit Integer (T/I) ? T
+    Should the field be stored (Y/N) ? Y
+    Should the field be indexed (Y/N) ? Y
+    Should the field be tokenized (Y/N) ? Y
+    Should the term frequencies (per doc) be in the index (Y/N) ? Y
+    Should the term positions (per doc) be in the index (Y/N) ? Y
+    Add another field (Y/N) ? Y
+
+
+
+    New field name  ? body 
+    Text or unsigned 32-bit Integer (T/I) ? T
+    Should the field be stored (Y/N) ? Y
+    Should the field be indexed (Y/N) ? Y
+    Should the field be tokenized (Y/N) ? Y
+    Should the term frequencies (per doc) be in the index (Y/N) ? Y
+    Should the term positions (per doc) be in the index (Y/N) ? Y
+    Add another field (Y/N) ? Y
+
+
+
+    New field name  ? url
+    Text or unsigned 32-bit Integer (T/I) ? T
+    Should the field be stored (Y/N) ? Y
+    Should the field be indexed (Y/N) ? N
+    Add another field (Y/N) ? N
+
+    [
+    {
+        "variant": "Text",
+        "fields": [
+        "title",
+        {
+            "indexing_options": "TokenizedWithFreqAndPosition",
+            "stored": true
+        }
+        ]
+    },
+    {
+        "variant": "Text",
+        "fields": [
+        "body",
+        {
+            "indexing_options": "TokenizedWithFreqAndPosition",
+            "stored": true
+        }
+        ]
+    },
+    {
+        "variant": "Text",
+        "fields": [
+        "url",
+        {
+            "indexing_options": "Unindexed",
+            "stored": true
+        }
+        ]
+    }
+    ]
+
+```
+
+If you want to know more about the meaning of these options, you can check out the [schema doc page](http://fulmicoton.com/tantivy/tantivy/schema/index.html).  
+
+The json displayed at the end has been written in `wikipedia-index/meta.json`.
+
+
+# Step 3 - Get the documents to index
+
+Tantivy's index command offers a way to index a json file.
+More accurately, the file must contain one document per line, in a json format.
+The structure of this JSON object must match that of our schema definition.
+
+```json
+    {"body": "some text", "title": "some title", "url": "http://somedomain.com"}
+```
+
+You can download a corpus of more than 5 millions articles from wikipedia 
+formatted in the right format here : [wiki-articles.json (2.34 GB)](https://www.dropbox.com/s/wwnfnu441w1ec9p/wiki-articles.json.bz2?dl=0).
+If you are in a rush you can [download 100 articles in the right format here](http://fulmicoton.com/tantivy/tutorial/wiki-articles-first100.json).
+
+Make sure to uncompress the file
+
+```bash
+    bunzip2 wiki-articles.json.bz2
+``` 
+
+# Step 4 -  Index the documents.
+
+The `index` command will index your document.
+By default it will use as many threads as there are core on your machine.
+
+On my computer (8 core Xeon(R) CPU X3450  @ 2.67GHz), it only takes 7 minutes.
+
+```
+    cat /data/wiki-articles | tantivy index -i wikipedia-index
+```
+
+# Step 5 - Have a look at the index directory
+
+```bash
+    ls wikipedia-index
+```
+
+If you indexed the 5 millions articles, you should see a lot of files, all with the following format
+The main file is `meta.json`.
+
+Our index is in fact divided in segments. Each segment acts as an individual smaller index.
+It is named by a uuid. 
+Each different files is storing a different datastructure for the index.
+
+
+# Step 6 - Serve a search index
+
+```
+    tantivy serve -i wikipedia-index
+```
+
+You can start a small server with a JSON API to search into wikipedia.
+By default, the server is serving on the port `3000`.
+
+
diff --git a/script/build-static-binary.sh b/script/build-static-binary.sh
old mode 100644
new mode 100755
index 3fa3b43c0..6b46cda16
--- a/script/build-static-binary.sh
+++ b/script/build-static-binary.sh
@@ -1,4 +1,7 @@
+#!/usr/bin/env bash
+
 # the musl-tools package must be installed.
 rustup target add x86_64-unknown-linux-musl
 cargo build --release --target=x86_64-unknown-linux-musl
+cp target/x86_64-unknown-linux-musl/release/tantivy ../tantivy_doc/binaries/tantivy
 
diff --git a/src/cli/commands/bench.rs b/src/cli/commands/bench.rs
index 1d378195b..6fee3c607 100644
--- a/src/cli/commands/bench.rs
+++ b/src/cli/commands/bench.rs
@@ -73,7 +73,7 @@ fn run_bench(index_path: &Path,
             let timing;
             {
                 let mut collector = chain().add(&mut top_collector).add(&mut count_collector);
-                timing = try!(query.search(&searcher, &mut collector).map_err(|e| format!("Failed while searching query {:?}", query_txt)));
+                timing = try!(query.search(&searcher, &mut collector).map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)));
             }
             println!("{}\t{}\t{}\t{}", query_txt, num_terms, count_collector.count(), timing.total_time());
         }
@@ -89,7 +89,7 @@ fn run_bench(index_path: &Path,
             try!(query.search(&searcher, &mut top_collector).map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e)));
             let mut timer = TimerTree::new();
             {
-                let h = timer.open("total");
+                let _scoped_timer_ = timer.open("total");
                 for doc_address in top_collector.docs() {
                     searcher.doc(&doc_address).unwrap();
                 }