Add dictionary-based SplitCompoundWords token filter.

This commit is contained in:
Adam Reichold
2022-10-26 19:12:21 +02:00
parent d777c964da
commit cd952429d2
3 changed files with 255 additions and 0 deletions

View File

@@ -126,6 +126,7 @@ mod ngram_tokenizer;
mod raw_tokenizer;
mod remove_long;
mod simple_tokenizer;
mod split_compound_words;
mod stemmer;
mod stop_word_filter;
mod tokenized_string;
@@ -141,6 +142,7 @@ pub use self::ngram_tokenizer::NgramTokenizer;
pub use self::raw_tokenizer::RawTokenizer;
pub use self::remove_long::RemoveLongFilter;
pub use self::simple_tokenizer::SimpleTokenizer;
pub use self::split_compound_words::SplitCompoundWords;
pub use self::stemmer::{Language, Stemmer};
pub use self::stop_word_filter::StopWordFilter;
pub use self::tokenized_string::{PreTokenizedStream, PreTokenizedString};