mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
* Removed generics in tokenizers * lowercaser * Added TokenizerExt * Introducing BoxedTokenizer * Introducing BoxXXXXX helper struct * Closes #762. * Introducing a TextAnalyzer
45 lines
969 B
Rust
45 lines
969 B
Rust
use super::{Token, TokenStream, Tokenizer};
|
|
use crate::tokenizer::BoxTokenStream;
|
|
|
|
/// For each value of the field, emit a single unprocessed token.
|
|
#[derive(Clone)]
|
|
pub struct RawTokenizer;
|
|
|
|
pub struct RawTokenStream {
|
|
token: Token,
|
|
has_token: bool,
|
|
}
|
|
|
|
impl Tokenizer for RawTokenizer {
|
|
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
|
let token = Token {
|
|
offset_from: 0,
|
|
offset_to: text.len(),
|
|
position: 0,
|
|
text: text.to_string(),
|
|
position_length: 1,
|
|
};
|
|
RawTokenStream {
|
|
token,
|
|
has_token: true,
|
|
}
|
|
.into()
|
|
}
|
|
}
|
|
|
|
impl TokenStream for RawTokenStream {
|
|
fn advance(&mut self) -> bool {
|
|
let result = self.has_token;
|
|
self.has_token = false;
|
|
result
|
|
}
|
|
|
|
fn token(&self) -> &Token {
|
|
&self.token
|
|
}
|
|
|
|
fn token_mut(&mut self) -> &mut Token {
|
|
&mut self.token
|
|
}
|
|
}
|