add comment about cpu-intensive operation in async context

This commit is contained in:
trinity-1686a
2024-12-20 12:20:35 +01:00
parent 42efc7f7c8
commit ebf4d84553
2 changed files with 6 additions and 1 deletions

View File

@@ -351,11 +351,16 @@ impl InvertedIndexReader {
let mut _term_info = self
.get_term_range_async(.., automaton.clone(), None, merge_holes_under)
.await?;
// we build a 2nd iterator, this one with no holes, so we don't go through blocks we can't
// match, and just download them to reduce our query count. This makes the assumption
// there is a caching layer below, which might not always be true, but is in Quickwit.
let term_info = self.get_term_range_async(.., automaton, None, 0).await?;
// TODO this operation is often cheap for "friendly" automatons, but can be very costly for
// "unfriendly" ones such as ".*a{50}" (very few terms if any match this pattern, but we
// can't know early). In this case, we decompress and iterate over the entire sstable, while
// still being in async context. Ideally we should spawn this on a threadpool.
let range_to_load = term_info
.map(|term_info| term_info.postings_range)
.coalesce(|range1, range2| {

View File

@@ -132,7 +132,7 @@ fn match_range_start<S, A: Automaton<State = S>>(
automaton: &A,
mut state: S,
) -> bool {
// case [abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
// case ]abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
// - [h-\xff].*
// - g[k-\xff].*
// - gj.+ == gf[\0-\xff].*