mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
Collapse overlapped highlighted ranges (#1473)
This commit is contained in:
@@ -79,7 +79,7 @@ impl Snippet {
|
||||
let mut html = String::new();
|
||||
let mut start_from: usize = 0;
|
||||
|
||||
for item in self.highlighted.iter() {
|
||||
for item in collapse_overlapped_ranges(&self.highlighted) {
|
||||
html.push_str(&encode_minimal(&self.fragment[start_from..item.start]));
|
||||
html.push_str(HIGHLIGHTEN_PREFIX);
|
||||
html.push_str(&encode_minimal(&self.fragment[item.clone()]));
|
||||
@@ -186,6 +186,53 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns ranges that are collapsed into non-overlapped ranges.
|
||||
///
|
||||
/// ## Examples
|
||||
/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap
|
||||
/// - [0..1, 1..2] -> [0..1, 1..2] # no overlap
|
||||
/// - [0..2, 1..2] -> [0..2] # collapsed
|
||||
/// - [0..2, 1..3] -> [0..3] # collapsed
|
||||
/// - [0..3, 1..2] -> [0..3] # second range's end is also inside of the first range
|
||||
///
|
||||
/// Note: This function assumes `ranges` is sorted by `Range.start` in ascending order.
|
||||
fn collapse_overlapped_ranges(ranges: &[Range<usize>]) -> Vec<Range<usize>> {
|
||||
debug_assert!(is_sorted(ranges.iter().map(|range| range.start)));
|
||||
|
||||
let mut result = Vec::new();
|
||||
let mut ranges_it = ranges.iter();
|
||||
|
||||
let mut current = match ranges_it.next() {
|
||||
Some(range) => range.clone(),
|
||||
None => return result,
|
||||
};
|
||||
|
||||
for range in ranges {
|
||||
if current.end > range.start {
|
||||
current = current.start..std::cmp::max(current.end, range.end);
|
||||
} else {
|
||||
result.push(current);
|
||||
current = range.clone();
|
||||
}
|
||||
}
|
||||
|
||||
result.push(current);
|
||||
result
|
||||
}
|
||||
|
||||
fn is_sorted(mut it: impl Iterator<Item = usize>) -> bool {
|
||||
if let Some(first) = it.next() {
|
||||
let mut prev = first;
|
||||
for item in it {
|
||||
if item < prev {
|
||||
return false;
|
||||
}
|
||||
prev = item;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// `SnippetGenerator`
|
||||
///
|
||||
/// # Example
|
||||
@@ -320,10 +367,10 @@ mod tests {
|
||||
|
||||
use maplit::btreemap;
|
||||
|
||||
use super::{search_fragments, select_best_fragment_combination};
|
||||
use super::{collapse_overlapped_ranges, search_fragments, select_best_fragment_combination};
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT};
|
||||
use crate::tokenizer::SimpleTokenizer;
|
||||
use crate::tokenizer::{NgramTokenizer, SimpleTokenizer};
|
||||
use crate::{Index, SnippetGenerator};
|
||||
|
||||
const TEST_TEXT: &str = r#"Rust is a systems programming language sponsored by
|
||||
@@ -588,4 +635,47 @@ Survey in 2016, 2017, and 2018."#;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collapse_overlapped_ranges() {
|
||||
assert_eq!(
|
||||
collapse_overlapped_ranges(&vec![0..1, 2..3,]),
|
||||
vec![0..1, 2..3]
|
||||
);
|
||||
assert_eq!(
|
||||
collapse_overlapped_ranges(&vec![0..1, 1..2,]),
|
||||
vec![0..1, 1..2]
|
||||
);
|
||||
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..2,]), vec![0..2]);
|
||||
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..3,]), vec![0..3]);
|
||||
assert_eq!(collapse_overlapped_ranges(&vec![0..3, 1..2,]), vec![0..3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snippet_with_overlapped_highlighted_ranges() {
|
||||
let text = "abc";
|
||||
|
||||
let mut terms = BTreeMap::new();
|
||||
terms.insert(String::from("ab"), 0.9);
|
||||
terms.insert(String::from("bc"), 1.0);
|
||||
|
||||
let fragments = search_fragments(
|
||||
&From::from(NgramTokenizer::all_ngrams(2, 2)),
|
||||
text,
|
||||
&terms,
|
||||
3,
|
||||
);
|
||||
|
||||
assert_eq!(fragments.len(), 1);
|
||||
{
|
||||
let first = &fragments[0];
|
||||
assert_eq!(first.score, 1.9);
|
||||
assert_eq!(first.start_offset, 0);
|
||||
assert_eq!(first.stop_offset, 3);
|
||||
}
|
||||
|
||||
let snippet = select_best_fragment_combination(&fragments[..], text);
|
||||
assert_eq!(snippet.fragment, "abc");
|
||||
assert_eq!(snippet.to_html(), "<b>abc</b>");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user