From fe0c7c54082c077f4cb12f61cc435aa2b02248ba Mon Sep 17 00:00:00 2001 From: trinity Pointard Date: Thu, 2 Jan 2025 11:56:05 +0100 Subject: [PATCH] change rangebound style --- src/index/inverted_index_reader.rs | 8 ++++---- sstable/src/block_match_automaton.rs | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/index/inverted_index_reader.rs b/src/index/inverted_index_reader.rs index 064d10954..142d62901 100644 --- a/src/index/inverted_index_reader.rs +++ b/src/index/inverted_index_reader.rs @@ -348,20 +348,20 @@ impl InvertedIndexReader { let merge_holes_under_bytes = (80 * 1024 * 1024 * 50) / 1000; // we build a first iterator to download everything. Simply calling the function already // loads everything, but doesn't start iterating over the sstable. - let mut _term_info = self + let mut _term_infos = self .get_term_range_async(.., automaton.clone(), None, merge_holes_under_bytes) .await?; // we build a 2nd iterator, this one with no holes, so we don't go through blocks we can't // match, and just download them to reduce our query count. This makes the assumption // there is a caching layer below, which might not always be true, but is in Quickwit. - let term_info = self.get_term_range_async(.., automaton, None, 0).await?; + let term_infos = self.get_term_range_async(.., automaton, None, 0).await?; // TODO this operation is often cheap for "friendly" automatons, but can be very costly for // "unfriendly" ones such as ".*a{50}" (very few terms if any match this pattern, but we // can't know early). In this case, we decompress and iterate over the entire sstable, while // still being in async context. Ideally we should spawn this on a threadpool. - let range_to_load = term_info + let posting_ranges_to_load = term_infos .map(|term_info| term_info.postings_range) .coalesce(|range1, range2| { if range1.end + merge_holes_under_bytes >= range2.start { @@ -371,7 +371,7 @@ impl InvertedIndexReader { } }); - let slices_downloaded = futures_util::stream::iter(range_to_load) + let slices_downloaded = futures_util::stream::iter(postings_ranges_to_load) .map(|posting_slice| { self.postings_file_slice .read_bytes_slice_async(posting_slice) diff --git a/sstable/src/block_match_automaton.rs b/sstable/src/block_match_automaton.rs index bb516ea2e..0c84a05e0 100644 --- a/sstable/src/block_match_automaton.rs +++ b/sstable/src/block_match_automaton.rs @@ -52,10 +52,10 @@ fn can_block_match_automaton_with_start( // - keys are `abcd` and `abce` => we test for abc[d-e].* // - keys are `abcd` and `abc` => contradiction with start_key < end_key. // - // ideally for ]abc, abcde] we could test for abc([\0-c].*|d([\0-d].*|e)?) + // ideally for (abc, abcde] we could test for abc([\0-c].*|d([\0-d].*|e)?) // but let's start simple (and correct), and tighten our bounds latter // - // and for ]abcde, abcfg] we could test for abc(d(e.+|[f-\xff].*)|e.*|f([\0-f].*|g)?) + // and for (abcde, abcfg] we could test for abc(d(e.+|[f-\xff].*)|e.*|f([\0-f].*|g)?) // abc ( // d(e.+|[f-\xff].*) | // e.* | @@ -69,8 +69,8 @@ fn can_block_match_automaton_with_start( // - ? is a the thing before can_match(), or current state.is_match() // - | means test both side - // we have two cases, either start_key is a prefix of end_key (e.g. ]abc, abcjp]), - // or it is not (e.g. ]abcdg, abcjp]). It is not possible however that end_key be a prefix of + // we have two cases, either start_key is a prefix of end_key (e.g. (abc, abcjp]), + // or it is not (e.g. (abcdg, abcjp]). It is not possible however that end_key be a prefix of // start_key (or that both are equal) because we already handled start_key >= end_key. // // if we are in the first case, we want to visit the following states: @@ -103,7 +103,7 @@ fn can_block_match_automaton_with_start( // things starting with start_range were handled in match_range_start // this starting with end_range are handled bellow. - // this can run for 0 iteration in cases such as ]abc, abd] + // this can run for 0 iteration in cases such as (abc, abd] for rb in (start_range + 1)..end_range { let new_state = automaton.accept(&base_state, rb); if automaton.can_match(&new_state) { @@ -132,7 +132,7 @@ fn match_range_start>( automaton: &A, mut state: S, ) -> bool { - // case ]abcdgj, abcpqr], `abcd` is already consumed, we need to handle: + // case (abcdgj, abcpqr], `abcd` is already consumed, we need to handle: // - [h-\xff].* // - g[k-\xff].* // - gj.+ == gf[\0-\xff].* @@ -177,7 +177,7 @@ fn match_range_end>( automaton: &A, mut state: S, ) -> bool { - // for ]abcdef, abcmps]. the prefix `abcm` has been consumed, `[d-l].*` was handled elsewhere, + // for (abcdef, abcmps]. the prefix `abcm` has been consumed, `[d-l].*` was handled elsewhere, // we just need to handle // - [\0-o].* // - p