Minor changes

This commit is contained in:
Paul Masurel
2019-11-06 10:11:59 +09:00
parent 7ddc6041a6
commit 69e8495724
2 changed files with 35 additions and 34 deletions

View File

@@ -167,11 +167,9 @@ impl SegmentWriter {
match field_value.value() {
Value::PreTokStr(tok_str) => {
offsets.push(total_offset);
total_offset += match tok_str.tokens.last() {
Some(token) => token.offset_to,
None => 0,
};
if let Some(last_token) = tok_str.tokens.last() {
total_offset += last_token.offset_to;
}
token_streams
.push(Box::new(PreTokenizedStream::from(tok_str.clone())));
}

View File

@@ -43,44 +43,47 @@ impl PreTokenizedStream {
tok_strings: &'a [&'a PreTokenizedString],
) -> Box<dyn TokenStream + 'a> {
if tok_strings.len() == 1 {
Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()))
} else {
let mut offsets = vec![];
let mut total_offset = 0;
for &tok_string in tok_strings {
offsets.push(total_offset);
let offset = match tok_string.tokens.last() {
Some(token) => token.offset_to,
None => 0,
};
total_offset += offset;
}
let token_streams: Vec<_> = tok_strings
.iter()
.map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
.collect();
Box::new(TokenStreamChain::new(offsets, token_streams))
return Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()));
}
let mut offsets = vec![];
let mut total_offset = 0;
for &tok_string in tok_strings {
offsets.push(total_offset);
if let Some(last_token) = tok_string.tokens.last() {
total_offset += last_token.offset_to;
}
}
let token_streams: Vec<_> = tok_strings
.iter()
.map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
.collect();
Box::new(TokenStreamChain::new(offsets, token_streams))
}
}
impl TokenStream for PreTokenizedStream {
fn advance(&mut self) -> bool {
if self.current_token >= self.tokenized_string.tokens.len() as i64 - 1 {
// This was our last token.
return false;
}
self.current_token += 1;
self.current_token < self.tokenized_string.tokens.len() as i64
true
}
fn token(&self) -> &Token {
if self.current_token < 0 {
panic!("TokenStream not initialized. You should call advance() at least once.")
}
assert!(
self.current_token >= 0,
"TokenStream not initialized. You should call advance() at least once."
);
&self.tokenized_string.tokens[self.current_token as usize]
}
fn token_mut(&mut self) -> &mut Token {
if self.current_token < 0 {
panic!("TokenStream not initialized. You should call advance() at least once.")
}
assert!(
self.current_token >= 0,
"TokenStream not initialized. You should call advance() at least once."
);
&mut self.tokenized_string.tokens[self.current_token as usize]
}
}
@@ -147,7 +150,7 @@ mod tests {
let chain_parts = vec![&tok_text, &tok_text];
let mut tok_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]);
let mut token_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]);
let expected_tokens = vec![
Token {
@@ -179,10 +182,10 @@ mod tests {
position_length: 1,
},
];
let mut i = 0;
while tok_stream.advance() {
assert!(*tok_stream.token() == expected_tokens[i]);
i += 1;
for expected_token in expected_tokens {
assert!(token_stream.advance());
assert_eq!(token_stream.token(), &expected_token);
}
assert!(!token_stream.advance());
}
}