Merge branch 'master' of github.com:tantivy-search/tantivy

Conflicts:
	src/analyzer/mod.rs
	src/schema/index_record_option.rs
	src/tokenizer/lower_caser.rs
	src/tokenizer/tokenizer.rs
This commit is contained in:
Paul Masurel
2017-11-26 10:54:05 +09:00
6 changed files with 22 additions and 21 deletions

View File

@@ -21,7 +21,7 @@ pub struct JapaneseTokenizerStream {
impl<'a> Tokenizer<'a> for JapaneseTokenizer {
type TokenStreamImpl = JapaneseTokenizerStream;
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
let mut tokens = vec![];
let mut offset_from;
let mut offset_to = 0;

View File

@@ -159,7 +159,7 @@ mod test {
#[test]
fn test_raw_tokenizer() {
let tokenizer_manager = TokenizerManager::default();
let mut en_tokenizer = tokenizer_manager.get("raw").unwrap();
let en_tokenizer = tokenizer_manager.get("raw").unwrap();
let mut tokens: Vec<String> = vec![];
{
let mut add_token = |token: &Token| { tokens.push(token.text.clone()); };
@@ -174,7 +174,7 @@ mod test {
fn test_en_tokenizer() {
let tokenizer_manager = TokenizerManager::default();
assert!(tokenizer_manager.get("en_doesnotexist").is_none());
let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
let en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
let mut tokens: Vec<String> = vec![];
{
let mut add_token = |token: &Token| { tokens.push(token.text.clone()); };
@@ -190,7 +190,7 @@ mod test {
#[test]
fn test_jp_tokenizer() {
let tokenizer_manager = TokenizerManager::default();
let mut en_tokenizer = tokenizer_manager.get("ja").unwrap();
let en_tokenizer = tokenizer_manager.get("ja").unwrap();
let mut tokens: Vec<String> = vec![];
{
@@ -208,7 +208,7 @@ mod test {
#[test]
fn test_tokenizer_empty() {
let tokenizer_manager = TokenizerManager::default();
let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
let en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
{
let mut tokens: Vec<String> = vec![];
{

View File

@@ -13,7 +13,7 @@ pub struct RawTokenStream {
impl<'a> Tokenizer<'a> for RawTokenizer {
type TokenStreamImpl = RawTokenStream;
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
let token = Token {
offset_from: 0,
offset_to: text.len(),

View File

@@ -16,7 +16,7 @@ pub struct SimpleTokenStream<'a> {
impl<'a> Tokenizer<'a> for SimpleTokenizer {
type TokenStreamImpl = SimpleTokenStream<'a>;
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
SimpleTokenStream {
text: text,
chars: text.char_indices(),

View File

@@ -48,16 +48,18 @@ impl<'a, TTokenStream> TokenStream for TokenStreamChain<TTokenStream>
}
fn token(&self) -> &Token {
if self.stream_idx > self.token_streams.len() {
panic!("You called .token(), after the end of the token stream has been reached");
}
assert!(
self.stream_idx <= self.token_streams.len(),
"You called .token(), after the end of the token stream has been reached"
);
&self.token
}
fn token_mut(&mut self) -> &mut Token {
if self.stream_idx > self.token_streams.len() {
panic!("You called .token(), after the end of the token stream has been reached");
}
assert!(
self.stream_idx <= self.token_streams.len(),
"You called .token(), after the end of the token stream has been reached"
);
&mut self.token
}
}

View File

@@ -48,7 +48,7 @@ pub trait Tokenizer<'a>: Sized + Clone {
type TokenStreamImpl: TokenStream;
/// Creates a token stream for a given `str`.
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl;
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl;
/// Appends a token filter to the current tokenizer.
///
@@ -80,10 +80,9 @@ pub trait Tokenizer<'a>: Sized + Clone {
}
}
pub trait BoxedTokenizer: Send + Sync {
fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a>;
fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a>;
fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b>;
fn boxed_clone(&self) -> Box<BoxedTokenizer>;
}
@@ -91,11 +90,11 @@ pub trait BoxedTokenizer: Send + Sync {
struct BoxableTokenizer<A>(A) where A: for <'a> Tokenizer<'a> + Send + Sync;
impl<A> BoxedTokenizer for BoxableTokenizer<A> where A: 'static + Send + Sync + for <'a> Tokenizer<'a> {
fn token_stream<'a>(&mut self, text: &'a str) -> Box<TokenStream + 'a> {
fn token_stream<'a>(&self, text: &'a str) -> Box<TokenStream + 'a> {
box self.0.token_stream(text)
}
fn token_stream_texts<'b>(&mut self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
fn token_stream_texts<'b>(&self, texts: &'b [&'b str]) -> Box<TokenStream + 'b> {
assert!(texts.len() > 0);
if texts.len() == 1 {
box self.0.token_stream(texts[0])
@@ -103,7 +102,7 @@ impl<A> BoxedTokenizer for BoxableTokenizer<A> where A: 'static + Send + Sync +
else {
let mut offsets = vec!();
let mut total_offset = 0;
for text in texts {
for &text in texts {
offsets.push(total_offset);
total_offset += text.len();
}
@@ -217,7 +216,7 @@ impl<'a, HeadTokenFilterFactory, TailTokenizer> Tokenizer<'a>
{
type TokenStreamImpl = HeadTokenFilterFactory::ResultTokenStream;
fn token_stream(&mut self, text: &'a str) -> Self::TokenStreamImpl {
fn token_stream(&self, text: &'a str) -> Self::TokenStreamImpl {
let tail_token_stream = self.tail.token_stream(text );
self.head.transform(tail_token_stream)
}