diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 1d4f900fa..39cba7fb8 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -167,11 +167,9 @@ impl SegmentWriter { match field_value.value() { Value::PreTokStr(tok_str) => { offsets.push(total_offset); - total_offset += match tok_str.tokens.last() { - Some(token) => token.offset_to, - None => 0, - }; - + if let Some(last_token) = tok_str.tokens.last() { + total_offset += last_token.offset_to; + } token_streams .push(Box::new(PreTokenizedStream::from(tok_str.clone()))); } diff --git a/src/tokenizer/tokenized_string.rs b/src/tokenizer/tokenized_string.rs index 86bb74c94..07f122a35 100644 --- a/src/tokenizer/tokenized_string.rs +++ b/src/tokenizer/tokenized_string.rs @@ -43,44 +43,47 @@ impl PreTokenizedStream { tok_strings: &'a [&'a PreTokenizedString], ) -> Box { if tok_strings.len() == 1 { - Box::new(PreTokenizedStream::from((*tok_strings[0]).clone())) - } else { - let mut offsets = vec![]; - let mut total_offset = 0; - for &tok_string in tok_strings { - offsets.push(total_offset); - let offset = match tok_string.tokens.last() { - Some(token) => token.offset_to, - None => 0, - }; - total_offset += offset; - } - let token_streams: Vec<_> = tok_strings - .iter() - .map(|tok_string| PreTokenizedStream::from((*tok_string).clone())) - .collect(); - Box::new(TokenStreamChain::new(offsets, token_streams)) + return Box::new(PreTokenizedStream::from((*tok_strings[0]).clone())); } + let mut offsets = vec![]; + let mut total_offset = 0; + for &tok_string in tok_strings { + offsets.push(total_offset); + if let Some(last_token) = tok_string.tokens.last() { + total_offset += last_token.offset_to; + } + } + let token_streams: Vec<_> = tok_strings + .iter() + .map(|tok_string| PreTokenizedStream::from((*tok_string).clone())) + .collect(); + Box::new(TokenStreamChain::new(offsets, token_streams)) } } impl TokenStream for PreTokenizedStream { fn advance(&mut self) -> bool { + if self.current_token >= self.tokenized_string.tokens.len() as i64 - 1 { + // This was our last token. + return false; + } self.current_token += 1; - self.current_token < self.tokenized_string.tokens.len() as i64 + true } fn token(&self) -> &Token { - if self.current_token < 0 { - panic!("TokenStream not initialized. You should call advance() at least once.") - } + assert!( + self.current_token >= 0, + "TokenStream not initialized. You should call advance() at least once." + ); &self.tokenized_string.tokens[self.current_token as usize] } fn token_mut(&mut self) -> &mut Token { - if self.current_token < 0 { - panic!("TokenStream not initialized. You should call advance() at least once.") - } + assert!( + self.current_token >= 0, + "TokenStream not initialized. You should call advance() at least once." + ); &mut self.tokenized_string.tokens[self.current_token as usize] } } @@ -147,7 +150,7 @@ mod tests { let chain_parts = vec![&tok_text, &tok_text]; - let mut tok_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]); + let mut token_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]); let expected_tokens = vec![ Token { @@ -179,10 +182,10 @@ mod tests { position_length: 1, }, ]; - let mut i = 0; - while tok_stream.advance() { - assert!(*tok_stream.token() == expected_tokens[i]); - i += 1; + for expected_token in expected_tokens { + assert!(token_stream.advance()); + assert_eq!(token_stream.token(), &expected_token); } + assert!(!token_stream.advance()); } }