Collapse overlapped highlighted ranges (#1473)

This commit is contained in:
UEDA Akira
2022-08-26 14:37:08 +09:00
committed by GitHub
parent 03e4630cd8
commit 17093e8ffe

View File

@@ -79,7 +79,7 @@ impl Snippet {
let mut html = String::new();
let mut start_from: usize = 0;
for item in self.highlighted.iter() {
for item in collapse_overlapped_ranges(&self.highlighted) {
html.push_str(&encode_minimal(&self.fragment[start_from..item.start]));
html.push_str(HIGHLIGHTEN_PREFIX);
html.push_str(&encode_minimal(&self.fragment[item.clone()]));
@@ -186,6 +186,53 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
}
}
/// Returns ranges that are collapsed into non-overlapped ranges.
///
/// ## Examples
/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap
/// - [0..1, 1..2] -> [0..1, 1..2] # no overlap
/// - [0..2, 1..2] -> [0..2] # collapsed
/// - [0..2, 1..3] -> [0..3] # collapsed
/// - [0..3, 1..2] -> [0..3] # second range's end is also inside of the first range
///
/// Note: This function assumes `ranges` is sorted by `Range.start` in ascending order.
fn collapse_overlapped_ranges(ranges: &[Range<usize>]) -> Vec<Range<usize>> {
debug_assert!(is_sorted(ranges.iter().map(|range| range.start)));
let mut result = Vec::new();
let mut ranges_it = ranges.iter();
let mut current = match ranges_it.next() {
Some(range) => range.clone(),
None => return result,
};
for range in ranges {
if current.end > range.start {
current = current.start..std::cmp::max(current.end, range.end);
} else {
result.push(current);
current = range.clone();
}
}
result.push(current);
result
}
fn is_sorted(mut it: impl Iterator<Item = usize>) -> bool {
if let Some(first) = it.next() {
let mut prev = first;
for item in it {
if item < prev {
return false;
}
prev = item;
}
}
true
}
/// `SnippetGenerator`
///
/// # Example
@@ -320,10 +367,10 @@ mod tests {
use maplit::btreemap;
use super::{search_fragments, select_best_fragment_combination};
use super::{collapse_overlapped_ranges, search_fragments, select_best_fragment_combination};
use crate::query::QueryParser;
use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT};
use crate::tokenizer::SimpleTokenizer;
use crate::tokenizer::{NgramTokenizer, SimpleTokenizer};
use crate::{Index, SnippetGenerator};
const TEST_TEXT: &str = r#"Rust is a systems programming language sponsored by
@@ -588,4 +635,47 @@ Survey in 2016, 2017, and 2018."#;
}
Ok(())
}
#[test]
fn test_collapse_overlapped_ranges() {
assert_eq!(
collapse_overlapped_ranges(&vec![0..1, 2..3,]),
vec![0..1, 2..3]
);
assert_eq!(
collapse_overlapped_ranges(&vec![0..1, 1..2,]),
vec![0..1, 1..2]
);
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..2,]), vec![0..2]);
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..3,]), vec![0..3]);
assert_eq!(collapse_overlapped_ranges(&vec![0..3, 1..2,]), vec![0..3]);
}
#[test]
fn test_snippet_with_overlapped_highlighted_ranges() {
let text = "abc";
let mut terms = BTreeMap::new();
terms.insert(String::from("ab"), 0.9);
terms.insert(String::from("bc"), 1.0);
let fragments = search_fragments(
&From::from(NgramTokenizer::all_ngrams(2, 2)),
text,
&terms,
3,
);
assert_eq!(fragments.len(), 1);
{
let first = &fragments[0];
assert_eq!(first.score, 1.9);
assert_eq!(first.start_offset, 0);
assert_eq!(first.stop_offset, 3);
}
let snippet = select_best_fragment_combination(&fragments[..], text);
assert_eq!(snippet.fragment, "abc");
assert_eq!(snippet.to_html(), "<b>abc</b>");
}
}