From dc783f832887711307617b26f26ab44c5ff57ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Fri, 23 Jun 2023 13:33:40 +0200 Subject: [PATCH] Remove BoxTokenStream. --- src/indexer/segment_writer.rs | 2 +- src/query/more_like_this/more_like_this.rs | 7 ++--- src/tokenizer/mod.rs | 2 +- src/tokenizer/tokenizer.rs | 10 +++---- tokenizer-api/src/lib.rs | 32 ---------------------- 5 files changed, 9 insertions(+), 44 deletions(-) diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index bad8638e5..7fa58e18a 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -209,7 +209,7 @@ impl SegmentWriter { for value in values { let mut token_stream = match value { Value::PreTokStr(tok_str) => { - PreTokenizedStream::from(tok_str.clone()).into() + Box::new(PreTokenizedStream::from(tok_str.clone())) } Value::Str(ref text) => { let text_analyzer = diff --git a/src/query/more_like_this/more_like_this.rs b/src/query/more_like_this/more_like_this.rs index 994dd96c0..ca86c70b1 100644 --- a/src/query/more_like_this/more_like_this.rs +++ b/src/query/more_like_this/more_like_this.rs @@ -4,9 +4,7 @@ use std::collections::{BinaryHeap, HashMap}; use crate::query::bm25::idf; use crate::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery}; use crate::schema::{Field, FieldType, IndexRecordOption, Term, Value}; -use crate::tokenizer::{ - BoxTokenStream, FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer, -}; +use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TokenStream, Tokenizer}; use crate::{DocAddress, Result, Searcher, TantivyError}; #[derive(Debug, PartialEq)] @@ -206,8 +204,7 @@ impl MoreLikeThis { for value in values { match value { Value::PreTokStr(tok_str) => { - let mut token_stream: BoxTokenStream = - PreTokenizedStream::from(tok_str.clone()).into(); + let mut token_stream = PreTokenizedStream::from(tok_str.clone()); token_stream.process(&mut |token| { if !self.is_noise_word(token.text.clone()) { let term = Term::from_field_text(field, &token.text); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 9e53bbba8..42d98e90a 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -139,7 +139,7 @@ mod tokenizer; mod tokenizer_manager; mod whitespace_tokenizer; -pub use tokenizer_api::{BoxTokenStream, Token, TokenFilter, TokenStream, Tokenizer}; +pub use tokenizer_api::{Token, TokenFilter, TokenStream, Tokenizer}; pub use self::alphanum_only::AlphaNumOnlyFilter; pub use self::ascii_folding_filter::AsciiFoldingFilter; diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index 29a11b396..a79802291 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -1,7 +1,7 @@ use dyn_clone::DynClone; /// The tokenizer module contains all of the tools used to process /// text in `tantivy`. -use tokenizer_api::{BoxTokenStream, TokenFilter, TokenStream, Tokenizer}; +use tokenizer_api::{TokenFilter, TokenStream, Tokenizer}; use crate::tokenizer::empty_tokenizer::EmptyTokenizer; @@ -14,12 +14,12 @@ pub struct TextAnalyzer { /// A boxable `Tokenizer`, with its `TokenStream` type erased. trait BoxableTokenizer: 'static + Send + Sync + DynClone { /// Creates a boxed token stream for a given `str`. - fn box_token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a>; + fn box_token_stream<'a>(&'a mut self, text: &'a str) -> Box; } impl BoxableTokenizer for T { - fn box_token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a> { - self.token_stream(text).into() + fn box_token_stream<'a>(&'a mut self, text: &'a str) -> Box { + Box::new(self.token_stream(text)) } } @@ -98,7 +98,7 @@ impl TextAnalyzer { } /// Creates a token stream for a given `str`. - pub fn token_stream<'a>(&'a mut self, text: &'a str) -> BoxTokenStream<'a> { + pub fn token_stream<'a>(&'a mut self, text: &'a str) -> Box { self.tokenizer.box_token_stream(text) } } diff --git a/tokenizer-api/src/lib.rs b/tokenizer-api/src/lib.rs index eca0d9566..93defac11 100644 --- a/tokenizer-api/src/lib.rs +++ b/tokenizer-api/src/lib.rs @@ -6,7 +6,6 @@ //! Checkout the [tantivy repo](https://github.com/quickwit-oss/tantivy/tree/main/src/tokenizer) for some examples. use std::borrow::{Borrow, BorrowMut}; -use std::ops::{Deref, DerefMut}; use serde::{Deserialize, Serialize}; @@ -60,37 +59,6 @@ pub trait Tokenizer: 'static + Clone + Send + Sync { fn token_stream<'a>(&'a mut self, text: &'a str) -> Self::TokenStream<'a>; } -/// Simple wrapper of `Box`. -pub struct BoxTokenStream<'a>(Box); - -impl<'a> From> for Box { - fn from(token_stream: BoxTokenStream<'a>) -> Self { - token_stream.0 - } -} - -impl<'a, T> From for BoxTokenStream<'a> -where T: TokenStream + 'a -{ - fn from(token_stream: T) -> BoxTokenStream<'a> { - BoxTokenStream(Box::new(token_stream)) - } -} - -impl<'a> Deref for BoxTokenStream<'a> { - type Target = dyn TokenStream + 'a; - - fn deref(&self) -> &Self::Target { - &*self.0 - } -} - -impl<'a> DerefMut for BoxTokenStream<'a> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut *self.0 - } -} - impl<'a> TokenStream for Box { fn advance(&mut self) -> bool { let token_stream: &mut dyn TokenStream = self.borrow_mut();