Dynamic analyzer (#755)

* Removed generics in tokenizers

* lowercaser

* Added TokenizerExt

* Introducing BoxedTokenizer

* Introducing BoxXXXXX helper struct

* Closes #762.

* Introducing a TextAnalyzer
This commit is contained in:
Paul Masurel
2020-01-29 18:23:37 +09:00
committed by GitHub
parent f6847c46d7
commit 811fd0cb9e
22 changed files with 348 additions and 442 deletions

View File

@@ -533,7 +533,7 @@ mod test {
use crate::schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
use crate::schema::{Schema, Term, INDEXED, STORED, STRING, TEXT};
use crate::tokenizer::{
LowerCaser, SimpleTokenizer, StopWordFilter, Tokenizer, TokenizerManager,
LowerCaser, SimpleTokenizer, StopWordFilter, TextAnalyzer, TokenizerManager,
};
use crate::Index;
use matches::assert_matches;
@@ -563,7 +563,7 @@ mod test {
let tokenizer_manager = TokenizerManager::default();
tokenizer_manager.register(
"en_with_stop_words",
SimpleTokenizer
TextAnalyzer::from(SimpleTokenizer)
.filter(LowerCaser)
.filter(StopWordFilter::remove(vec!["the".to_string()])),
);