mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 10:32:55 +00:00
Analyzer's Analyzer::token_stream does not need to me &mut self
This commit is contained in:
@@ -38,7 +38,7 @@ impl Default for Token {
|
||||
pub trait Analyzer<'a>: Sized + Clone {
|
||||
type TokenStreamImpl: TokenStream;
|
||||
|
||||
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl;
|
||||
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl;
|
||||
|
||||
fn filter<NewFilter>(self, new_filter: NewFilter) -> ChainAnalyzer<NewFilter, Self>
|
||||
where NewFilter: TokenFilterFactory<<Self as Analyzer<'a>>::TokenStreamImpl>
|
||||
@@ -51,8 +51,8 @@ pub trait Analyzer<'a>: Sized + Clone {
|
||||
}
|
||||
|
||||
pub trait BoxedAnalyzer: Send + Sync {
|
||||
fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a>;
|
||||
fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a>;
|
||||
fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
|
||||
fn boxed_clone(&self) -> Box<BoxedAnalyzer>;
|
||||
}
|
||||
|
||||
@@ -60,11 +60,11 @@ pub trait BoxedAnalyzer: Send + Sync {
|
||||
struct BoxableAnalyzer<A>(A) where A: for <'a> Analyzer<'a> + Send + Sync;
|
||||
|
||||
impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + for <'a> Analyzer<'a> {
|
||||
fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a> {
|
||||
fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a> {
|
||||
box self.0.token_stream(text)
|
||||
}
|
||||
|
||||
fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
|
||||
fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
|
||||
assert!(texts.len() > 0);
|
||||
if texts.len() == 1 {
|
||||
box self.0.token_stream(texts[0])
|
||||
@@ -72,7 +72,7 @@ impl<A> BoxedAnalyzer for BoxableAnalyzer<A> where A: 'static + Send + Sync + fo
|
||||
else {
|
||||
let mut offsets = vec!();
|
||||
let mut total_offset = 0;
|
||||
for text in texts {
|
||||
for &text in texts {
|
||||
offsets.push(total_offset);
|
||||
total_offset += text.len();
|
||||
}
|
||||
@@ -154,7 +154,7 @@ impl<'a, HeadTokenFilterFactory, TailAnalyzer> Analyzer<'a>
|
||||
{
|
||||
type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream;
|
||||
|
||||
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
let tail_token_stream = self.tail.token_stream(text );
|
||||
self.head.transform(tail_token_stream)
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream {
|
||||
impl<'a> Analyzer<'a> for JapaneseTokenizer {
|
||||
type TokenStreamImpl = JapaneseTokenizerStream;
|
||||
|
||||
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
let mut tokens = vec![];
|
||||
let mut offset_from;
|
||||
let mut offset_to = 0;
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::{TokenFilterFactory, TokenStream, Token};
|
||||
use std::ascii::AsciiExt;
|
||||
|
||||
|
||||
/// Token filter that lowercase terms.
|
||||
|
||||
@@ -29,7 +29,7 @@ mod test {
|
||||
#[test]
|
||||
fn test_raw_tokenizer() {
|
||||
let analyzer_manager = AnalyzerManager::default();
|
||||
let mut en_analyzer = analyzer_manager.get("raw").unwrap();
|
||||
let en_analyzer = analyzer_manager.get("raw").unwrap();
|
||||
let mut tokens: Vec<String> = vec![];
|
||||
{
|
||||
let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
|
||||
@@ -44,7 +44,7 @@ mod test {
|
||||
fn test_en_analyzer() {
|
||||
let analyzer_manager = AnalyzerManager::default();
|
||||
assert!(analyzer_manager.get("en_doesnotexist").is_none());
|
||||
let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
|
||||
let en_analyzer = analyzer_manager.get("en_stem").unwrap();
|
||||
let mut tokens: Vec<String> = vec![];
|
||||
{
|
||||
let mut add_token = |token: &Token| { tokens.push(token.term.clone()); };
|
||||
@@ -60,7 +60,7 @@ mod test {
|
||||
#[test]
|
||||
fn test_jp_analyzer() {
|
||||
let analyzer_manager = AnalyzerManager::default();
|
||||
let mut en_analyzer = analyzer_manager.get("ja").unwrap();
|
||||
let en_analyzer = analyzer_manager.get("ja").unwrap();
|
||||
|
||||
let mut tokens: Vec<String> = vec![];
|
||||
{
|
||||
@@ -78,7 +78,7 @@ mod test {
|
||||
#[test]
|
||||
fn test_tokenizer_empty() {
|
||||
let analyzer_manager = AnalyzerManager::default();
|
||||
let mut en_analyzer = analyzer_manager.get("en_stem").unwrap();
|
||||
let en_analyzer = analyzer_manager.get("en_stem").unwrap();
|
||||
{
|
||||
let mut tokens: Vec<String> = vec![];
|
||||
{
|
||||
|
||||
@@ -11,7 +11,7 @@ pub struct RawTokenStream {
|
||||
impl<'a> Analyzer<'a> for RawTokenizer {
|
||||
type TokenStreamImpl = RawTokenStream;
|
||||
|
||||
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
let token = Token {
|
||||
offset_from: 0,
|
||||
offset_to: text.len(),
|
||||
|
||||
@@ -14,7 +14,7 @@ pub struct SimpleTokenStream<'a> {
|
||||
impl<'a> Analyzer<'a> for SimpleTokenizer {
|
||||
type TokenStreamImpl = SimpleTokenStream<'a>;
|
||||
|
||||
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
|
||||
SimpleTokenStream {
|
||||
text: text,
|
||||
chars: text.char_indices(),
|
||||
|
||||
@@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain<TTokenStream>
|
||||
}
|
||||
|
||||
fn token(&self) -> &Token {
|
||||
if self.stream_idx > self.token_streams.len() {
|
||||
panic!("You called .token(), after the end of the token stream has been reached");
|
||||
}
|
||||
assert!(
|
||||
self.stream_idx <= self.token_streams.len(),
|
||||
"You called .token(), after the end of the token stream has been reached"
|
||||
);
|
||||
&self.token
|
||||
}
|
||||
|
||||
fn token_mut(&mut self) -> &mut Token {
|
||||
if self.stream_idx > self.token_streams.len() {
|
||||
panic!("You called .token(), after the end of the token stream has been reached");
|
||||
}
|
||||
assert!(
|
||||
self.stream_idx <= self.token_streams.len(),
|
||||
"You called .token(), after the end of the token stream has been reached"
|
||||
);
|
||||
&mut self.token
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,12 +13,16 @@
|
||||
///
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum IndexRecordOption {
|
||||
/// Records only the `DocId`s
|
||||
#[serde(rename = "basic")]
|
||||
Basic, //< records only the `DocId`s
|
||||
Basic,
|
||||
/// Records the document ids as well as the term frequency.
|
||||
#[serde(rename = "freq")]
|
||||
WithFreqs, //< records the document ids as well as the term frequency.
|
||||
WithFreqs,
|
||||
/// Records the document id, the term frequency and the positions of
|
||||
/// the occurences in the document.
|
||||
#[serde(rename = "position")]
|
||||
WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document.
|
||||
WithFreqsAndPositions,
|
||||
}
|
||||
|
||||
impl IndexRecordOption {
|
||||
|
||||
Reference in New Issue
Block a user