mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-13 20:42:55 +00:00
Added alphanum only token filter
This commit is contained in:
65
src/tokenizer/alphanum_only.rs
Normal file
65
src/tokenizer/alphanum_only.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AlphaNumOnlyFilter;
|
||||
|
||||
|
||||
pub struct AlphaNumOnlyFilterStream<TailTokenStream>
|
||||
where TailTokenStream: TokenStream
|
||||
{
|
||||
tail: TailTokenStream,
|
||||
}
|
||||
|
||||
|
||||
impl<TailTokenStream> AlphaNumOnlyFilterStream<TailTokenStream>
|
||||
where TailTokenStream: TokenStream
|
||||
{
|
||||
fn predicate(&self, token: &Token) -> bool {
|
||||
token.text.chars().all(|c| c.is_ascii_alphanumeric())
|
||||
}
|
||||
|
||||
fn wrap(
|
||||
tail: TailTokenStream,
|
||||
) -> AlphaNumOnlyFilterStream<TailTokenStream> {
|
||||
AlphaNumOnlyFilterStream {
|
||||
tail
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<TailTokenStream> TokenFilter<TailTokenStream> for AlphaNumOnlyFilter
|
||||
where
|
||||
TailTokenStream: TokenStream,
|
||||
{
|
||||
type ResultTokenStream = AlphaNumOnlyFilterStream<TailTokenStream>;
|
||||
|
||||
fn transform(&self, token_stream: TailTokenStream) -> Self::ResultTokenStream {
|
||||
AlphaNumOnlyFilterStream::wrap(token_stream)
|
||||
}
|
||||
}
|
||||
|
||||
impl<TailTokenStream> TokenStream for AlphaNumOnlyFilterStream<TailTokenStream>
|
||||
where
|
||||
TailTokenStream: TokenStream
|
||||
{
|
||||
fn token(&self) -> &Token {
|
||||
self.tail.token()
|
||||
}
|
||||
|
||||
fn token_mut(&mut self) -> &mut Token {
|
||||
self.tail.token_mut()
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> bool {
|
||||
loop {
|
||||
if self.tail.advance() {
|
||||
if self.predicate(self.tail.token()) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -137,7 +137,9 @@ mod tokenizer_manager;
|
||||
mod japanese_tokenizer;
|
||||
mod token_stream_chain;
|
||||
mod raw_tokenizer;
|
||||
mod alphanum_only;
|
||||
|
||||
pub use self::alphanum_only::AlphaNumOnlyFilter;
|
||||
pub use self::tokenizer::{Token, TokenFilter, TokenStream, Tokenizer};
|
||||
pub use self::tokenizer::BoxedTokenizer;
|
||||
pub use self::tokenizer_manager::TokenizerManager;
|
||||
|
||||
Reference in New Issue
Block a user