Fix bug. Cleanup some rough spots. Renamed functions. Fixed tests and docs.

2026-05-20 10:10:42 +00:00 · 2020-12-30 13:28:27 +01:00
parent 4e6b341422
commit ca6fd5effc
21 changed files with 360 additions and 313 deletions
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -5,7 +5,7 @@
 use tantivy::collector::TopDocs;
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
-use tantivy::tokenizer::{NgramTokenizer, TextAnalyzer};
+use tantivy::tokenizer::NgramTokenizer;
 use tantivy::{doc, Index};

 fn main() -> tantivy::Result<()> {
@@ -52,10 +52,9 @@ fn main() -> tantivy::Result<()> {

    // here we are registering our custome tokenizer
    // this will store tokens of 3 characters each
-    index.tokenizers().register(
-        "ngram3",
-        TextAnalyzer::new(NgramTokenizer::new(3, 3, false)),
-    );
+    index
+        .tokenizers()
+        .register("ngram3", NgramTokenizer::new(3, 3, false));

    // To insert document we need an index writer.
    // There must be only one writer at a time.
--- a/examples/stop_words.rs
+++ b/examples/stop_words.rs
@@ -50,12 +50,13 @@ fn main() -> tantivy::Result<()> {

    // This tokenizer lowers all of the text (to help with stop word matching)
    // then removes all instances of `the` and `and` from the corpus
-    let tokenizer = TextAnalyzer::new(SimpleTokenizer)
+    let tokenizer = analyzer_builder(SimpleTokenizer)
        .filter(LowerCaser::new())
-        .filter(StopWordFilter::new(vec![
+        .filter(StopWordFilter::remove(vec![
            "the".to_string(),
            "and".to_string(),
-        ]));
+        ]))
+        .build();

    index.tokenizers().register("stoppy", tokenizer);