Revert "Fix non english stemmers (#521)"

This reverts commit 2cd31bcda2.
This commit is contained in:
Paul Masurel
2019-03-27 08:54:55 +09:00
committed by GitHub
parent 2cd31bcda2
commit b3a201e665
2 changed files with 11 additions and 10 deletions

View File

@@ -228,27 +228,27 @@ pub mod tests {
fn test_non_en_tokenizer() {
let tokenizer_manager = TokenizerManager::default();
tokenizer_manager.register(
"el_stem",
"es_stem",
SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(Stemmer::new(Language::Greek)),
.filter(Stemmer::new(Language::Spanish)),
);
let en_tokenizer = tokenizer_manager.get("el_stem").unwrap();
let en_tokenizer = tokenizer_manager.get("es_stem").unwrap();
let mut tokens: Vec<Token> = vec![];
{
let mut add_token = |token: &Token| {
tokens.push(token.clone());
};
en_tokenizer
.token_stream("Καλημέρα, χαρούμενε φορολογούμενε!")
.token_stream("Hola, feliz contribuyente!")
.process(&mut add_token);
}
assert_eq!(tokens.len(), 3);
assert_token(&tokens[0], 0, "καλημερ", 0, 16);
assert_token(&tokens[1], 1, "χαρουμεν", 18, 36);
assert_token(&tokens[2], 2, "φορολογουμεν", 37, 63);
assert_token(&tokens[0], 0, "hola", 0, 4);
assert_token(&tokens[1], 1, "feliz", 6, 11);
assert_token(&tokens[2], 2, "contribuyent", 12, 25);
}
#[test]

View File

@@ -2,6 +2,7 @@
use super::{Token, TokenFilter, TokenStream};
use rust_stemmers::{self, Algorithm};
use std::sync::Arc;
/// Available stemmer languages.
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
@@ -56,14 +57,14 @@ impl Language {
/// Tokens are expected to be lowercased beforehand.
#[derive(Clone)]
pub struct Stemmer {
stemmer_algorithm: Algorithm,
stemmer_algorithm: Arc<Algorithm>,
}
impl Stemmer {
/// Creates a new Stemmer `TokenFilter` for a given language algorithm.
pub fn new(language: Language) -> Stemmer {
Stemmer {
stemmer_algorithm: language.algorithm(),
stemmer_algorithm: Arc::new(language.algorithm()),
}
}
}
@@ -82,7 +83,7 @@ where
type ResultTokenStream = StemmerTokenStream<TailTokenStream>;
fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream {
let inner_stemmer = rust_stemmers::Stemmer::create(self.stemmer_algorithm);
let inner_stemmer = rust_stemmers::Stemmer::create(Algorithm::English);
StemmerTokenStream::wrap(inner_stemmer, token_stream)
}
}