mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 15:10:40 +00:00
Minor changes
This commit is contained in:
@@ -167,11 +167,9 @@ impl SegmentWriter {
|
||||
match field_value.value() {
|
||||
Value::PreTokStr(tok_str) => {
|
||||
offsets.push(total_offset);
|
||||
total_offset += match tok_str.tokens.last() {
|
||||
Some(token) => token.offset_to,
|
||||
None => 0,
|
||||
};
|
||||
|
||||
if let Some(last_token) = tok_str.tokens.last() {
|
||||
total_offset += last_token.offset_to;
|
||||
}
|
||||
token_streams
|
||||
.push(Box::new(PreTokenizedStream::from(tok_str.clone())));
|
||||
}
|
||||
|
||||
@@ -43,44 +43,47 @@ impl PreTokenizedStream {
|
||||
tok_strings: &'a [&'a PreTokenizedString],
|
||||
) -> Box<dyn TokenStream + 'a> {
|
||||
if tok_strings.len() == 1 {
|
||||
Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()))
|
||||
} else {
|
||||
let mut offsets = vec![];
|
||||
let mut total_offset = 0;
|
||||
for &tok_string in tok_strings {
|
||||
offsets.push(total_offset);
|
||||
let offset = match tok_string.tokens.last() {
|
||||
Some(token) => token.offset_to,
|
||||
None => 0,
|
||||
};
|
||||
total_offset += offset;
|
||||
}
|
||||
let token_streams: Vec<_> = tok_strings
|
||||
.iter()
|
||||
.map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
|
||||
.collect();
|
||||
Box::new(TokenStreamChain::new(offsets, token_streams))
|
||||
return Box::new(PreTokenizedStream::from((*tok_strings[0]).clone()));
|
||||
}
|
||||
let mut offsets = vec![];
|
||||
let mut total_offset = 0;
|
||||
for &tok_string in tok_strings {
|
||||
offsets.push(total_offset);
|
||||
if let Some(last_token) = tok_string.tokens.last() {
|
||||
total_offset += last_token.offset_to;
|
||||
}
|
||||
}
|
||||
let token_streams: Vec<_> = tok_strings
|
||||
.iter()
|
||||
.map(|tok_string| PreTokenizedStream::from((*tok_string).clone()))
|
||||
.collect();
|
||||
Box::new(TokenStreamChain::new(offsets, token_streams))
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenStream for PreTokenizedStream {
|
||||
fn advance(&mut self) -> bool {
|
||||
if self.current_token >= self.tokenized_string.tokens.len() as i64 - 1 {
|
||||
// This was our last token.
|
||||
return false;
|
||||
}
|
||||
self.current_token += 1;
|
||||
self.current_token < self.tokenized_string.tokens.len() as i64
|
||||
true
|
||||
}
|
||||
|
||||
fn token(&self) -> &Token {
|
||||
if self.current_token < 0 {
|
||||
panic!("TokenStream not initialized. You should call advance() at least once.")
|
||||
}
|
||||
assert!(
|
||||
self.current_token >= 0,
|
||||
"TokenStream not initialized. You should call advance() at least once."
|
||||
);
|
||||
&self.tokenized_string.tokens[self.current_token as usize]
|
||||
}
|
||||
|
||||
fn token_mut(&mut self) -> &mut Token {
|
||||
if self.current_token < 0 {
|
||||
panic!("TokenStream not initialized. You should call advance() at least once.")
|
||||
}
|
||||
assert!(
|
||||
self.current_token >= 0,
|
||||
"TokenStream not initialized. You should call advance() at least once."
|
||||
);
|
||||
&mut self.tokenized_string.tokens[self.current_token as usize]
|
||||
}
|
||||
}
|
||||
@@ -147,7 +150,7 @@ mod tests {
|
||||
|
||||
let chain_parts = vec![&tok_text, &tok_text];
|
||||
|
||||
let mut tok_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]);
|
||||
let mut token_stream = PreTokenizedStream::chain_tokenized_strings(&chain_parts[..]);
|
||||
|
||||
let expected_tokens = vec![
|
||||
Token {
|
||||
@@ -179,10 +182,10 @@ mod tests {
|
||||
position_length: 1,
|
||||
},
|
||||
];
|
||||
let mut i = 0;
|
||||
while tok_stream.advance() {
|
||||
assert!(*tok_stream.token() == expected_tokens[i]);
|
||||
i += 1;
|
||||
for expected_token in expected_tokens {
|
||||
assert!(token_stream.advance());
|
||||
assert_eq!(token_stream.token(), &expected_token);
|
||||
}
|
||||
assert!(!token_stream.advance());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user