Added alphanum only token filter

This commit is contained in:
Paul Masurel
2017-12-31 13:43:10 +09:00
parent 6f223253ea
commit 44e5c4dfd3
2 changed files with 67 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
use super::{Token, TokenFilter, TokenStream};
#[derive(Clone)]
pub struct AlphaNumOnlyFilter;
pub struct AlphaNumOnlyFilterStream<TailTokenStream>
where TailTokenStream: TokenStream
{
tail: TailTokenStream,
}
impl<TailTokenStream> AlphaNumOnlyFilterStream<TailTokenStream>
where TailTokenStream: TokenStream
{
fn predicate(&self, token: &Token) -> bool {
token.text.chars().all(|c| c.is_ascii_alphanumeric())
}
fn wrap(
tail: TailTokenStream,
) -> AlphaNumOnlyFilterStream<TailTokenStream> {
AlphaNumOnlyFilterStream {
tail
}
}
}
impl<TailTokenStream> TokenFilter<TailTokenStream> for AlphaNumOnlyFilter
where
TailTokenStream: TokenStream,
{
type ResultTokenStream = AlphaNumOnlyFilterStream<TailTokenStream>;
fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream {
AlphaNumOnlyFilterStream::wrap(token_stream)
}
}
impl<TailTokenStream> TokenStream for AlphaNumOnlyFilterStream<TailTokenStream>
where
TailTokenStream: TokenStream
{
fn token(&self) -> &Token {
self.tail.token()
}
fn token_mut(&mut self) -> &mut Token {
self.tail.token_mut()
}
fn advance(&mut self) -> bool {
loop {
if self.tail.advance() {
if self.predicate(self.tail.token()) {
return true;
}
} else {
return false;
}
}
}
}

View File

@@ -137,7 +137,9 @@ mod tokenizer_manager;
mod japanese_tokenizer;
mod token_stream_chain;
mod raw_tokenizer;
mod alphanum_only;
pub use self::alphanum_only::AlphaNumOnlyFilter;
pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
pub use self::tokenizer::BoxedTokenizer;
pub use self::tokenizer_manager::TokenizerManager;