Remove BoxTokenStream.

This commit is contained in:
François Massot
2023-06-23 13:33:40 +02:00
parent b82cd08f5d
commit dc783f8328
5 changed files with 9 additions and 44 deletions

View File

@@ -209,7 +209,7 @@ impl SegmentWriter {
for value in values {
let mut token_stream = match value {
Value::PreTokStr(tok_str) => {
PreTokenizedStream::from(tok_str.clone()).into()
Box::new(PreTokenizedStream::from(tok_str.clone()))
}
Value::Str(ref text) => {
let text_analyzer =

View File

@@ -4,9 +4,7 @@ use std::collections::{BinaryHeap, HashMap};
use crate::query::bm25::idf;
use crate::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery};
use crate::schema::{Field, FieldType, IndexRecordOption, Term, Value};
use crate::tokenizer::{
BoxTokenStream, FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer,
};
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer};
use crate::{DocAddress, Result, Searcher, TantivyError};
#[derive(Debug, PartialEq)]
@@ -206,8 +204,7 @@ impl MoreLikeThis {
for value in values {
match value {
Value::PreTokStr(tok_str) => {
let mut token_stream: BoxTokenStream =
PreTokenizedStream::from(tok_str.clone()).into();
let mut token_stream = PreTokenizedStream::from(tok_str.clone());
token_stream.process(&mut |token| {
if !self.is_noise_word(token.text.clone()) {
let term = Term::from_field_text(field, &token.text);

View File

@@ -139,7 +139,7 @@ mod tokenizer;
mod tokenizer_manager;
mod whitespace_tokenizer;
pub use tokenizer_api::{BoxTokenStream, Token, TokenFilter, TokenStream, Tokenizer};
pub use tokenizer_api::{Token, TokenFilter, TokenStream, Tokenizer};
pub use self::alphanum_only::AlphaNumOnlyFilter;
pub use self::ascii_folding_filter::AsciiFoldingFilter;

View File

@@ -1,7 +1,7 @@
use dyn_clone::DynClone;
/// The tokenizer module contains all of the tools used to process
/// text in `tantivy`.
use tokenizer_api::{BoxTokenStream, TokenFilter, TokenStream, Tokenizer};
use tokenizer_api::{TokenFilter, TokenStream, Tokenizer};
use crate::tokenizer::empty_tokenizer::EmptyTokenizer;
@@ -14,12 +14,12 @@ pub struct TextAnalyzer {
/// A boxable `Tokenizer`, with its `TokenStream` type erased.
trait BoxableTokenizer: 'static + Send + Sync + DynClone {
/// Creates a boxed token stream for a given `str`.
fn box_token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a>;
fn box_token_stream<'a>(&'a mut self, text: &'a str) -> Box<dyn TokenStream + 'a>;
}
impl<T: Tokenizer> BoxableTokenizer for T {
fn box_token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a> {
self.token_stream(text).into()
fn box_token_stream<'a>(&'a mut self, text: &'a str) -> Box<dyn TokenStream + 'a> {
Box::new(self.token_stream(text))
}
}
@@ -98,7 +98,7 @@ impl TextAnalyzer {
}
/// Creates a token stream for a given `str`.
pub fn token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a> {
pub fn token_stream<'a>(&'a mut self, text: &'a str) -> Box<dyn TokenStream + 'a> {
self.tokenizer.box_token_stream(text)
}
}

View File

@@ -6,7 +6,6 @@
//! Checkout the [tantivy repo](https://github.com/quickwit-oss/tantivy/tree/main/src/tokenizer) for some examples.
use std::borrow::{Borrow, BorrowMut};
use std::ops::{Deref, DerefMut};
use serde::{Deserialize, Serialize};
@@ -60,37 +59,6 @@ pub trait Tokenizer: 'static + Clone + Send + Sync {
fn token_stream<'a>(&'a mut self, text: &'a str) -> Self::TokenStream<'a>;
}
/// Simple wrapper of `Box<dyn TokenStream + 'a>`.
pub struct BoxTokenStream<'a>(Box<dyn TokenStream + 'a>);
impl<'a> From<BoxTokenStream<'a>> for Box<dyn TokenStream + 'a> {
fn from(token_stream: BoxTokenStream<'a>) -> Self {
token_stream.0
}
}
impl<'a, T> From<T> for BoxTokenStream<'a>
where T: TokenStream + 'a
{
fn from(token_stream: T) -> BoxTokenStream<'a> {
BoxTokenStream(Box::new(token_stream))
}
}
impl<'a> Deref for BoxTokenStream<'a> {
type Target = dyn TokenStream + 'a;
fn deref(&self) -> &Self::Target {
&*self.0
}
}
impl<'a> DerefMut for BoxTokenStream<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut *self.0
}
}
impl<'a> TokenStream for Box<dyn TokenStream + 'a> {
fn advance(&mut self) -> bool {
let token_stream: &mut dyn TokenStream = self.borrow_mut();