Fix bug. Cleanup some rough spots. Renamed functions. Fixed tests and docs.

This commit is contained in:
dcraven
2020-12-30 13:28:27 +01:00
committed by Paul Masurel
parent 4e6b341422
commit ca6fd5effc
21 changed files with 360 additions and 313 deletions

View File

@@ -5,7 +5,7 @@
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::tokenizer::{NgramTokenizer, TextAnalyzer};
use tantivy::tokenizer::NgramTokenizer;
use tantivy::{doc, Index};
fn main() -> tantivy::Result<()> {
@@ -52,10 +52,9 @@ fn main() -> tantivy::Result<()> {
// here we are registering our custome tokenizer
// this will store tokens of 3 characters each
index.tokenizers().register(
"ngram3",
TextAnalyzer::new(NgramTokenizer::new(3, 3, false)),
);
index
.tokenizers()
.register("ngram3", NgramTokenizer::new(3, 3, false));
// To insert document we need an index writer.
// There must be only one writer at a time.

View File

@@ -50,12 +50,13 @@ fn main() -> tantivy::Result<()> {
// This tokenizer lowers all of the text (to help with stop word matching)
// then removes all instances of `the` and `and` from the corpus
let tokenizer = TextAnalyzer::new(SimpleTokenizer)
let tokenizer = analyzer_builder(SimpleTokenizer)
.filter(LowerCaser::new())
.filter(StopWordFilter::new(vec![
.filter(StopWordFilter::remove(vec![
"the".to_string(),
"and".to_string(),
]));
]))
.build();
index.tokenizers().register("stoppy", tokenizer);