From 811ddf2226bd10f6b6e7894be2d0a6dcf083aeaf Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 8 Aug 2018 11:15:20 +0900 Subject: [PATCH] Closes #364 (#365) * Closes #364 * Trying to raise the recursion limit * Better unit test and bug fix on token offsets --- src/lib.rs | 1 + src/tokenizer/token_stream_chain.rs | 34 +++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d3b1ff771..c01226c55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ #![allow(new_without_default)] #![allow(decimal_literal_representation)] #![warn(missing_docs)] +#![recursion_limit="80"] //! # `tantivy` //! diff --git a/src/tokenizer/token_stream_chain.rs b/src/tokenizer/token_stream_chain.rs index 652ac078a..01d631e2b 100644 --- a/src/tokenizer/token_stream_chain.rs +++ b/src/tokenizer/token_stream_chain.rs @@ -1,5 +1,7 @@ use tokenizer::{Token, TokenStream}; +const POSITION_GAP: usize = 2; + pub(crate) struct TokenStreamChain { offsets: Vec, token_streams: Vec, @@ -37,14 +39,14 @@ where let token = token_stream.token(); let offset_offset = self.offsets[self.stream_idx]; self.token.offset_from = token.offset_from + offset_offset; - self.token.offset_from = token.offset_from + offset_offset; + self.token.offset_to = token.offset_to + offset_offset; self.token.position = token.position + self.position_shift; self.token.text.clear(); self.token.text.push_str(token.text.as_str()); return true; } else { self.stream_idx += 1; - self.position_shift = self.token.position + 2; + self.position_shift = self.token.position.wrapping_add(POSITION_GAP); } } false @@ -66,3 +68,31 @@ where &mut self.token } } + +#[cfg(test)] +mod tests { + use super::POSITION_GAP; + use super::TokenStreamChain; + use super::super::{Tokenizer, TokenStream, SimpleTokenizer}; + + #[test] + fn test_chain_first_emits_no_tokens() { + let token_streams = vec![SimpleTokenizer.token_stream(""), SimpleTokenizer.token_stream("hello world")]; + let mut token_chain = TokenStreamChain::new(vec![0, 0], token_streams); + + assert!(token_chain.advance()); + assert_eq!(token_chain.token().text, "hello"); + assert_eq!(token_chain.token().offset_from, 0); + assert_eq!(token_chain.token().offset_to, 5); + assert_eq!(token_chain.token().position, POSITION_GAP - 1); + + assert!(token_chain.advance()); + assert_eq!(token_chain.token().text, "world"); + assert_eq!(token_chain.token().offset_from, 6); + assert_eq!(token_chain.token().offset_to, 11); + assert_eq!(token_chain.token().position, POSITION_GAP); + + assert!(!token_chain.advance()); + } + +} \ No newline at end of file