mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
add comment about cpu-intensive operation in async context
This commit is contained in:
@@ -351,11 +351,16 @@ impl InvertedIndexReader {
|
||||
let mut _term_info = self
|
||||
.get_term_range_async(.., automaton.clone(), None, merge_holes_under)
|
||||
.await?;
|
||||
|
||||
// we build a 2nd iterator, this one with no holes, so we don't go through blocks we can't
|
||||
// match, and just download them to reduce our query count. This makes the assumption
|
||||
// there is a caching layer below, which might not always be true, but is in Quickwit.
|
||||
let term_info = self.get_term_range_async(.., automaton, None, 0).await?;
|
||||
|
||||
// TODO this operation is often cheap for "friendly" automatons, but can be very costly for
|
||||
// "unfriendly" ones such as ".*a{50}" (very few terms if any match this pattern, but we
|
||||
// can't know early). In this case, we decompress and iterate over the entire sstable, while
|
||||
// still being in async context. Ideally we should spawn this on a threadpool.
|
||||
let range_to_load = term_info
|
||||
.map(|term_info| term_info.postings_range)
|
||||
.coalesce(|range1, range2| {
|
||||
|
||||
@@ -132,7 +132,7 @@ fn match_range_start<S, A: Automaton<State = S>>(
|
||||
automaton: &A,
|
||||
mut state: S,
|
||||
) -> bool {
|
||||
// case [abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
|
||||
// case ]abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
|
||||
// - [h-\xff].*
|
||||
// - g[k-\xff].*
|
||||
// - gj.+ == gf[\0-\xff].*
|
||||
|
||||
Reference in New Issue
Block a user