mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 23:20:40 +00:00
change rangebound style
This commit is contained in:
@@ -348,20 +348,20 @@ impl InvertedIndexReader {
|
||||
let merge_holes_under_bytes = (80 * 1024 * 1024 * 50) / 1000;
|
||||
// we build a first iterator to download everything. Simply calling the function already
|
||||
// loads everything, but doesn't start iterating over the sstable.
|
||||
let mut _term_info = self
|
||||
let mut _term_infos = self
|
||||
.get_term_range_async(.., automaton.clone(), None, merge_holes_under_bytes)
|
||||
.await?;
|
||||
|
||||
// we build a 2nd iterator, this one with no holes, so we don't go through blocks we can't
|
||||
// match, and just download them to reduce our query count. This makes the assumption
|
||||
// there is a caching layer below, which might not always be true, but is in Quickwit.
|
||||
let term_info = self.get_term_range_async(.., automaton, None, 0).await?;
|
||||
let term_infos = self.get_term_range_async(.., automaton, None, 0).await?;
|
||||
|
||||
// TODO this operation is often cheap for "friendly" automatons, but can be very costly for
|
||||
// "unfriendly" ones such as ".*a{50}" (very few terms if any match this pattern, but we
|
||||
// can't know early). In this case, we decompress and iterate over the entire sstable, while
|
||||
// still being in async context. Ideally we should spawn this on a threadpool.
|
||||
let range_to_load = term_info
|
||||
let posting_ranges_to_load = term_infos
|
||||
.map(|term_info| term_info.postings_range)
|
||||
.coalesce(|range1, range2| {
|
||||
if range1.end + merge_holes_under_bytes >= range2.start {
|
||||
@@ -371,7 +371,7 @@ impl InvertedIndexReader {
|
||||
}
|
||||
});
|
||||
|
||||
let slices_downloaded = futures_util::stream::iter(range_to_load)
|
||||
let slices_downloaded = futures_util::stream::iter(postings_ranges_to_load)
|
||||
.map(|posting_slice| {
|
||||
self.postings_file_slice
|
||||
.read_bytes_slice_async(posting_slice)
|
||||
|
||||
@@ -52,10 +52,10 @@ fn can_block_match_automaton_with_start(
|
||||
// - keys are `abcd` and `abce` => we test for abc[d-e].*
|
||||
// - keys are `abcd` and `abc` => contradiction with start_key < end_key.
|
||||
//
|
||||
// ideally for ]abc, abcde] we could test for abc([\0-c].*|d([\0-d].*|e)?)
|
||||
// ideally for (abc, abcde] we could test for abc([\0-c].*|d([\0-d].*|e)?)
|
||||
// but let's start simple (and correct), and tighten our bounds latter
|
||||
//
|
||||
// and for ]abcde, abcfg] we could test for abc(d(e.+|[f-\xff].*)|e.*|f([\0-f].*|g)?)
|
||||
// and for (abcde, abcfg] we could test for abc(d(e.+|[f-\xff].*)|e.*|f([\0-f].*|g)?)
|
||||
// abc (
|
||||
// d(e.+|[f-\xff].*) |
|
||||
// e.* |
|
||||
@@ -69,8 +69,8 @@ fn can_block_match_automaton_with_start(
|
||||
// - ? is a the thing before can_match(), or current state.is_match()
|
||||
// - | means test both side
|
||||
|
||||
// we have two cases, either start_key is a prefix of end_key (e.g. ]abc, abcjp]),
|
||||
// or it is not (e.g. ]abcdg, abcjp]). It is not possible however that end_key be a prefix of
|
||||
// we have two cases, either start_key is a prefix of end_key (e.g. (abc, abcjp]),
|
||||
// or it is not (e.g. (abcdg, abcjp]). It is not possible however that end_key be a prefix of
|
||||
// start_key (or that both are equal) because we already handled start_key >= end_key.
|
||||
//
|
||||
// if we are in the first case, we want to visit the following states:
|
||||
@@ -103,7 +103,7 @@ fn can_block_match_automaton_with_start(
|
||||
|
||||
// things starting with start_range were handled in match_range_start
|
||||
// this starting with end_range are handled bellow.
|
||||
// this can run for 0 iteration in cases such as ]abc, abd]
|
||||
// this can run for 0 iteration in cases such as (abc, abd]
|
||||
for rb in (start_range + 1)..end_range {
|
||||
let new_state = automaton.accept(&base_state, rb);
|
||||
if automaton.can_match(&new_state) {
|
||||
@@ -132,7 +132,7 @@ fn match_range_start<S, A: Automaton<State = S>>(
|
||||
automaton: &A,
|
||||
mut state: S,
|
||||
) -> bool {
|
||||
// case ]abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
|
||||
// case (abcdgj, abcpqr], `abcd` is already consumed, we need to handle:
|
||||
// - [h-\xff].*
|
||||
// - g[k-\xff].*
|
||||
// - gj.+ == gf[\0-\xff].*
|
||||
@@ -177,7 +177,7 @@ fn match_range_end<S, A: Automaton<State = S>>(
|
||||
automaton: &A,
|
||||
mut state: S,
|
||||
) -> bool {
|
||||
// for ]abcdef, abcmps]. the prefix `abcm` has been consumed, `[d-l].*` was handled elsewhere,
|
||||
// for (abcdef, abcmps]. the prefix `abcm` has been consumed, `[d-l].*` was handled elsewhere,
|
||||
// we just need to handle
|
||||
// - [\0-o].*
|
||||
// - p
|
||||
|
||||
Reference in New Issue
Block a user