mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-02 16:40:43 +00:00
Remove BoxTokenFilter.
This commit is contained in:
@@ -12,6 +12,7 @@ use crate::schema::Term;
|
||||
use crate::schema::Value;
|
||||
use crate::schema::{Field, FieldEntry};
|
||||
use crate::tokenizer::PreTokenizedStream;
|
||||
use crate::tokenizer::TokenStream;
|
||||
use crate::tokenizer::{FacetTokenizer, TextAnalyzer};
|
||||
use crate::tokenizer::{TokenStreamChain, Tokenizer};
|
||||
use crate::Opstamp;
|
||||
@@ -179,7 +180,8 @@ impl SegmentWriter {
|
||||
match field_value.value() {
|
||||
Value::PreTokStr(tok_str) => {
|
||||
streams_with_offsets.push((
|
||||
PreTokenizedStream::from(tok_str.clone()).into(),
|
||||
Box::new(PreTokenizedStream::from(tok_str.clone()))
|
||||
as Box<dyn TokenStream>,
|
||||
total_offset,
|
||||
));
|
||||
if let Some(last_token) = tok_str.tokens.last() {
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
//! // the "emoji" is dropped because its not an alphanum
|
||||
//! assert!(stream.next().is_none());
|
||||
//! ```
|
||||
use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
|
||||
/// `TokenFilter` that removes all tokens that contain non
|
||||
/// ascii alphanumeric characters.
|
||||
@@ -27,7 +27,7 @@ use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
|
||||
pub struct AlphaNumOnlyFilter;
|
||||
|
||||
pub struct AlphaNumOnlyFilterStream<'a> {
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
}
|
||||
|
||||
impl<'a> AlphaNumOnlyFilterStream<'a> {
|
||||
@@ -37,8 +37,8 @@ impl<'a> AlphaNumOnlyFilterStream<'a> {
|
||||
}
|
||||
|
||||
impl TokenFilter for AlphaNumOnlyFilter {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream::from(AlphaNumOnlyFilterStream { tail: token_stream })
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(AlphaNumOnlyFilterStream { tail: token_stream })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use super::{BoxTokenStream, Token, TokenFilter, TokenStream};
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use std::mem;
|
||||
|
||||
/// This class converts alphabetic, numeric, and symbolic Unicode characters
|
||||
@@ -8,8 +8,8 @@ use std::mem;
|
||||
pub struct AsciiFoldingFilter;
|
||||
|
||||
impl TokenFilter for AsciiFoldingFilter {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
From::from(AsciiFoldingFilterTokenStream {
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(AsciiFoldingFilterTokenStream {
|
||||
tail: token_stream,
|
||||
buffer: String::with_capacity(100),
|
||||
})
|
||||
@@ -18,7 +18,7 @@ impl TokenFilter for AsciiFoldingFilter {
|
||||
|
||||
pub struct AsciiFoldingFilterTokenStream<'a> {
|
||||
buffer: String,
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for AsciiFoldingFilterTokenStream<'a> {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use super::{BoxTokenStream, Token, TokenStream, Tokenizer};
|
||||
use super::{Token, TokenStream, Tokenizer};
|
||||
use crate::schema::FACET_SEP_BYTE;
|
||||
|
||||
/// The `FacetTokenizer` process a `Facet` binary representation
|
||||
@@ -26,13 +26,12 @@ pub struct FacetTokenStream<'a> {
|
||||
}
|
||||
|
||||
impl Tokenizer for FacetTokenizer {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
||||
FacetTokenStream {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(FacetTokenStream {
|
||||
text,
|
||||
state: State::RootFacetNotEmitted, //< pos is the first char that has not been processed yet.
|
||||
token: Token::default(),
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
use std::mem;
|
||||
|
||||
impl TokenFilter for LowerCaser {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream::from(LowerCaserTokenStream {
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(LowerCaserTokenStream {
|
||||
tail: token_stream,
|
||||
buffer: String::with_capacity(100),
|
||||
})
|
||||
@@ -17,7 +16,7 @@ pub struct LowerCaser;
|
||||
|
||||
pub struct LowerCaserTokenStream<'a> {
|
||||
buffer: String,
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
}
|
||||
|
||||
// writes a lowercased version of text into output.
|
||||
|
||||
@@ -145,9 +145,7 @@ pub use self::stop_word_filter::StopWordFilter;
|
||||
pub(crate) use self::token_stream_chain::TokenStreamChain;
|
||||
|
||||
pub use self::tokenized_string::{PreTokenizedStream, PreTokenizedString};
|
||||
pub use self::tokenizer::{
|
||||
BoxTokenFilter, BoxTokenStream, TextAnalyzer, Token, TokenFilter, TokenStream, Tokenizer,
|
||||
};
|
||||
pub use self::tokenizer::{TextAnalyzer, Token, TokenFilter, TokenStream, Tokenizer};
|
||||
|
||||
pub use self::tokenizer_manager::TokenizerManager;
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::{Token, TokenStream, Tokenizer};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
|
||||
/// Tokenize the text by splitting words into n-grams of the given size(s)
|
||||
///
|
||||
@@ -131,8 +130,8 @@ pub struct NgramTokenStream<'a> {
|
||||
}
|
||||
|
||||
impl Tokenizer for NgramTokenizer {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
||||
From::from(NgramTokenStream {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(NgramTokenStream {
|
||||
ngram_charidx_iterator: StutteringIterator::new(
|
||||
CodepointFrontiers::for_str(text),
|
||||
self.min_gram,
|
||||
@@ -308,9 +307,9 @@ mod tests {
|
||||
use super::StutteringIterator;
|
||||
use crate::tokenizer::tests::assert_token;
|
||||
use crate::tokenizer::tokenizer::Tokenizer;
|
||||
use crate::tokenizer::{BoxTokenStream, Token};
|
||||
use crate::tokenizer::{Token, TokenStream};
|
||||
|
||||
fn test_helper(mut tokenizer: BoxTokenStream) -> Vec<Token> {
|
||||
fn test_helper(mut tokenizer: Box<dyn TokenStream>) -> Vec<Token> {
|
||||
let mut tokens: Vec<Token> = vec![];
|
||||
tokenizer.process(&mut |token: &Token| tokens.push(token.clone()));
|
||||
tokens
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::{Token, TokenStream, Tokenizer};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
|
||||
/// For each value of the field, emit a single unprocessed token.
|
||||
#[derive(Clone)]
|
||||
@@ -11,7 +10,7 @@ pub struct RawTokenStream {
|
||||
}
|
||||
|
||||
impl Tokenizer for RawTokenizer {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
|
||||
let token = Token {
|
||||
offset_from: 0,
|
||||
offset_to: text.len(),
|
||||
@@ -19,11 +18,10 @@ impl Tokenizer for RawTokenizer {
|
||||
text: text.to_string(),
|
||||
position_length: 1,
|
||||
};
|
||||
RawTokenStream {
|
||||
Box::new(RawTokenStream {
|
||||
token,
|
||||
has_token: true,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
//! ```
|
||||
//!
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
|
||||
/// `RemoveLongFilter` removes tokens that are longer
|
||||
/// than a given number of bytes (in UTF-8 representation).
|
||||
@@ -39,8 +38,8 @@ impl<'a> RemoveLongFilterStream<'a> {
|
||||
}
|
||||
|
||||
impl TokenFilter for RemoveLongFilter {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream::from(RemoveLongFilterStream {
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(RemoveLongFilterStream {
|
||||
token_length_limit: self.length_limit,
|
||||
tail: token_stream,
|
||||
})
|
||||
@@ -49,7 +48,7 @@ impl TokenFilter for RemoveLongFilter {
|
||||
|
||||
pub struct RemoveLongFilterStream<'a> {
|
||||
token_length_limit: usize,
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for RemoveLongFilterStream<'a> {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use super::BoxTokenStream;
|
||||
use super::{Token, TokenStream, Tokenizer};
|
||||
use std::str::CharIndices;
|
||||
|
||||
@@ -13,8 +12,8 @@ pub struct SimpleTokenStream<'a> {
|
||||
}
|
||||
|
||||
impl Tokenizer for SimpleTokenizer {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream::from(SimpleTokenStream {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(SimpleTokenStream {
|
||||
text,
|
||||
chars: text.char_indices(),
|
||||
token: Token::default(),
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
use rust_stemmers::{self, Algorithm};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -78,9 +77,9 @@ impl Default for Stemmer {
|
||||
}
|
||||
|
||||
impl TokenFilter for Stemmer {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
let inner_stemmer = rust_stemmers::Stemmer::create(self.stemmer_algorithm);
|
||||
BoxTokenStream::from(StemmerTokenStream {
|
||||
Box::new(StemmerTokenStream {
|
||||
tail: token_stream,
|
||||
stemmer: inner_stemmer,
|
||||
})
|
||||
@@ -88,7 +87,7 @@ impl TokenFilter for Stemmer {
|
||||
}
|
||||
|
||||
pub struct StemmerTokenStream<'a> {
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
stemmer: rust_stemmers::Stemmer,
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
//! assert!(stream.next().is_none());
|
||||
//! ```
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use crate::tokenizer::BoxTokenStream;
|
||||
use fnv::FnvHasher;
|
||||
use std::collections::HashSet;
|
||||
use std::hash::BuildHasherDefault;
|
||||
@@ -51,12 +50,12 @@ impl StopWordFilter {
|
||||
|
||||
pub struct StopWordFilterStream<'a> {
|
||||
words: StopWordHashSet,
|
||||
tail: BoxTokenStream<'a>,
|
||||
tail: Box<dyn TokenStream + 'a>,
|
||||
}
|
||||
|
||||
impl TokenFilter for StopWordFilter {
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream::from(StopWordFilterStream {
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a> {
|
||||
Box::new(StopWordFilterStream {
|
||||
words: self.words.clone(),
|
||||
tail: token_stream,
|
||||
})
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
use crate::tokenizer::{BoxTokenStream, Token, TokenStream};
|
||||
use crate::tokenizer::{Token, TokenStream};
|
||||
|
||||
const POSITION_GAP: usize = 2;
|
||||
|
||||
pub(crate) struct TokenStreamChain<'a> {
|
||||
streams_with_offsets: Vec<(BoxTokenStream<'a>, usize)>,
|
||||
streams_with_offsets: Vec<(Box<dyn TokenStream + 'a>, usize)>,
|
||||
position_shift: usize,
|
||||
stream_idx: usize,
|
||||
token: Token,
|
||||
}
|
||||
|
||||
impl<'a> TokenStreamChain<'a> {
|
||||
pub fn new(streams_with_offsets: Vec<(BoxTokenStream<'a>, usize)>) -> TokenStreamChain<'a> {
|
||||
pub fn new(
|
||||
streams_with_offsets: Vec<(Box<dyn TokenStream + 'a>, usize)>,
|
||||
) -> TokenStreamChain<'a> {
|
||||
TokenStreamChain {
|
||||
streams_with_offsets,
|
||||
stream_idx: 0,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::tokenizer::{BoxTokenStream, Token, TokenStream, TokenStreamChain};
|
||||
use crate::tokenizer::{Token, TokenStream, TokenStreamChain};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
@@ -42,22 +42,23 @@ impl PreTokenizedStream {
|
||||
/// Creates a TokenStream from PreTokenizedString array
|
||||
pub fn chain_tokenized_strings<'a>(
|
||||
tok_strings: &'a [&'a PreTokenizedString],
|
||||
) -> BoxTokenStream {
|
||||
) -> Box<dyn TokenStream> {
|
||||
if tok_strings.len() == 1 {
|
||||
PreTokenizedStream::from(tok_strings[0].to_owned()).into()
|
||||
Box::new(PreTokenizedStream::from(tok_strings[0].to_owned()))
|
||||
} else {
|
||||
let mut streams_with_offsets = vec![];
|
||||
let mut total_offset = 0;
|
||||
for &tok_string in tok_strings {
|
||||
streams_with_offsets.push((
|
||||
PreTokenizedStream::from(tok_string.to_owned()).into(),
|
||||
Box::new(PreTokenizedStream::from(tok_string.to_owned()))
|
||||
as Box<dyn TokenStream>,
|
||||
total_offset,
|
||||
));
|
||||
if let Some(last_token) = tok_string.tokens.last() {
|
||||
total_offset += last_token.offset_to;
|
||||
}
|
||||
}
|
||||
TokenStreamChain::new(streams_with_offsets).into()
|
||||
Box::new(TokenStreamChain::new(streams_with_offsets))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use crate::tokenizer::TokenStreamChain;
|
||||
use serde::{Deserialize, Serialize};
|
||||
/// The tokenizer module contains all of the tools used to process
|
||||
/// text in `tantivy`.
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
/// Token
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
@@ -39,7 +38,7 @@ impl Default for Token {
|
||||
/// It simply wraps a `Tokenizer` and a list of `TokenFilter` that are applied sequentially.
|
||||
pub struct TextAnalyzer {
|
||||
tokenizer: Box<dyn Tokenizer>,
|
||||
token_filters: Vec<BoxTokenFilter>,
|
||||
token_filters: Vec<Box<dyn TokenFilter>>,
|
||||
}
|
||||
|
||||
impl<T: Tokenizer> From<T> for TextAnalyzer {
|
||||
@@ -49,11 +48,14 @@ impl<T: Tokenizer> From<T> for TextAnalyzer {
|
||||
}
|
||||
|
||||
impl TextAnalyzer {
|
||||
/// Creates a new `TextAnalyzer` given a tokenizer and a vector of `BoxTokenFilter`.
|
||||
/// Creates a new `TextAnalyzer` given a tokenizer and a vector of `Box<dyn TokenFilter>`.
|
||||
///
|
||||
/// When creating a `TextAnalyzer` from a `Tokenizer` alone, prefer using
|
||||
/// `TextAnalyzer::from(tokenizer)`.
|
||||
pub fn new<T: Tokenizer>(tokenizer: T, token_filters: Vec<BoxTokenFilter>) -> TextAnalyzer {
|
||||
pub fn new<T: Tokenizer>(
|
||||
tokenizer: T,
|
||||
token_filters: Vec<Box<dyn TokenFilter>>,
|
||||
) -> TextAnalyzer {
|
||||
TextAnalyzer {
|
||||
tokenizer: Box::new(tokenizer),
|
||||
token_filters,
|
||||
@@ -76,8 +78,8 @@ impl TextAnalyzer {
|
||||
/// .filter(Stemmer::default());
|
||||
/// ```
|
||||
///
|
||||
pub fn filter<F: Into<BoxTokenFilter>>(mut self, token_filter: F) -> Self {
|
||||
self.token_filters.push(token_filter.into());
|
||||
pub fn filter<F: TokenFilter>(mut self, token_filter: F) -> Self {
|
||||
self.token_filters.push(Box::new(token_filter));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -86,7 +88,7 @@ impl TextAnalyzer {
|
||||
/// The resulting `BoxTokenStream` is equivalent to what would be obtained if the &str were
|
||||
/// one concatenated `&str`, with an artificial position gap of `2` between the different fields
|
||||
/// to prevent accidental `PhraseQuery` to match accross two terms.
|
||||
pub fn token_stream_texts<'a>(&self, texts: &'a [&'a str]) -> BoxTokenStream<'a> {
|
||||
pub fn token_stream_texts<'a>(&self, texts: &'a [&'a str]) -> Box<dyn TokenStream + 'a> {
|
||||
debug_assert!(!texts.is_empty());
|
||||
if texts.len() == 1 {
|
||||
self.token_stream(texts[0])
|
||||
@@ -97,12 +99,12 @@ impl TextAnalyzer {
|
||||
streams_with_offsets.push((self.token_stream(text), total_offset));
|
||||
total_offset += text.len();
|
||||
}
|
||||
From::from(TokenStreamChain::new(streams_with_offsets))
|
||||
Box::new(TokenStreamChain::new(streams_with_offsets))
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a token stream for a given `str`.
|
||||
pub fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a> {
|
||||
pub fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a> {
|
||||
let mut token_stream = self.tokenizer.token_stream(text);
|
||||
for token_filter in &self.token_filters {
|
||||
token_stream = token_filter.transform(token_stream);
|
||||
@@ -134,7 +136,7 @@ impl Clone for TextAnalyzer {
|
||||
/// This API may change to use associated types.
|
||||
pub trait Tokenizer: 'static + Send + Sync + TokenizerClone {
|
||||
/// Creates a token stream for a given `str`.
|
||||
fn token_stream<'a>(&self, text: &'a str) -> BoxTokenStream<'a>;
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<dyn TokenStream + 'a>;
|
||||
}
|
||||
|
||||
pub trait TokenizerClone {
|
||||
@@ -150,48 +152,7 @@ impl<T: Tokenizer + Clone> TokenizerClone for T {
|
||||
/// Simple wrapper of `Box<dyn TokenStream + 'a>`.
|
||||
///
|
||||
/// See `TokenStream` for more information.
|
||||
pub struct BoxTokenStream<'a>(Box<dyn TokenStream + 'a>);
|
||||
|
||||
impl<'a, T> From<T> for BoxTokenStream<'a>
|
||||
where
|
||||
T: TokenStream + 'a,
|
||||
{
|
||||
fn from(token_stream: T) -> BoxTokenStream<'a> {
|
||||
BoxTokenStream(Box::new(token_stream))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for BoxTokenStream<'a> {
|
||||
type Target = dyn TokenStream + 'a;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&*self.0
|
||||
}
|
||||
}
|
||||
impl<'a> DerefMut for BoxTokenStream<'a> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut *self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple wrapper of `Box<dyn TokenFilter + 'a>`.
|
||||
///
|
||||
/// See `TokenStream` for more information.
|
||||
pub struct BoxTokenFilter(Box<dyn TokenFilter>);
|
||||
|
||||
impl Deref for BoxTokenFilter {
|
||||
type Target = dyn TokenFilter;
|
||||
|
||||
fn deref(&self) -> &dyn TokenFilter {
|
||||
&*self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: TokenFilter> From<T> for BoxTokenFilter {
|
||||
fn from(tokenizer: T) -> BoxTokenFilter {
|
||||
BoxTokenFilter(Box::new(tokenizer))
|
||||
}
|
||||
}
|
||||
// pub struct Box<dyn TokenStream + 'a>(Box<dyn TokenStream + 'a>);
|
||||
|
||||
/// `TokenStream` is the result of the tokenization.
|
||||
///
|
||||
@@ -272,18 +233,18 @@ pub trait TokenStream {
|
||||
}
|
||||
|
||||
pub trait TokenFilterClone {
|
||||
fn box_clone(&self) -> BoxTokenFilter;
|
||||
fn box_clone(&self) -> Box<dyn TokenFilter>;
|
||||
}
|
||||
|
||||
/// Trait for the pluggable components of `Tokenizer`s.
|
||||
pub trait TokenFilter: 'static + Send + Sync + TokenFilterClone {
|
||||
/// Wraps a token stream and returns the modified one.
|
||||
fn transform<'a>(&self, token_stream: BoxTokenStream<'a>) -> BoxTokenStream<'a>;
|
||||
fn transform<'a>(&self, token_stream: Box<dyn TokenStream + 'a>) -> Box<dyn TokenStream + 'a>;
|
||||
}
|
||||
|
||||
impl<T: TokenFilter + Clone> TokenFilterClone for T {
|
||||
fn box_clone(&self) -> BoxTokenFilter {
|
||||
BoxTokenFilter::from(self.clone())
|
||||
fn box_clone(&self) -> Box<dyn TokenFilter> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user