mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-20 10:10:42 +00:00
Fix bug. Cleanup some rough spots. Renamed functions. Fixed tests and docs.
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::QueryParser;
|
||||
use tantivy::schema::*;
|
||||
use tantivy::tokenizer::{NgramTokenizer, TextAnalyzer};
|
||||
use tantivy::tokenizer::NgramTokenizer;
|
||||
use tantivy::{doc, Index};
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
@@ -52,10 +52,9 @@ fn main() -> tantivy::Result<()> {
|
||||
|
||||
// here we are registering our custome tokenizer
|
||||
// this will store tokens of 3 characters each
|
||||
index.tokenizers().register(
|
||||
"ngram3",
|
||||
TextAnalyzer::new(NgramTokenizer::new(3, 3, false)),
|
||||
);
|
||||
index
|
||||
.tokenizers()
|
||||
.register("ngram3", NgramTokenizer::new(3, 3, false));
|
||||
|
||||
// To insert document we need an index writer.
|
||||
// There must be only one writer at a time.
|
||||
|
||||
@@ -50,12 +50,13 @@ fn main() -> tantivy::Result<()> {
|
||||
|
||||
// This tokenizer lowers all of the text (to help with stop word matching)
|
||||
// then removes all instances of `the` and `and` from the corpus
|
||||
let tokenizer = TextAnalyzer::new(SimpleTokenizer)
|
||||
let tokenizer = analyzer_builder(SimpleTokenizer)
|
||||
.filter(LowerCaser::new())
|
||||
.filter(StopWordFilter::new(vec![
|
||||
.filter(StopWordFilter::remove(vec![
|
||||
"the".to_string(),
|
||||
"and".to_string(),
|
||||
]));
|
||||
]))
|
||||
.build();
|
||||
|
||||
index.tokenizers().register("stoppy", tokenizer);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user