From 98cf4ba63a77ebca3b75720c41d5dfad0361f293 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 27 Mar 2018 16:14:28 +0900 Subject: [PATCH] Small refactor of postings's skip method --- src/postings/segment_postings.rs | 109 +++++++++++++++---------------- 1 file changed, 52 insertions(+), 57 deletions(-) diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 236ad2d91..17f107ed8 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -5,7 +5,6 @@ use common::BitSet; use common::HasLen; use postings::Postings; use docset::{DocSet, SkipResult}; -use std::cmp; use fst::Streamer; use compression::compressed_block_size; use postings::{NoDelete, DeleteSet}; @@ -125,37 +124,32 @@ impl SegmentPostings { } } -impl DocSet for SegmentPostings { - // goes to the next element. - // next needs to be called a first time to point to the correct element. - #[inline] - fn advance(&mut self) -> bool { - loop { - { - if self.position_computer.is_some() { - let term_freq = self.term_freq() as usize; - self.position_computer.as_mut().unwrap().add_skip(term_freq); - } - } - self.cur += 1; - if self.cur >= self.block_cursor.block_len() { - self.cur = 0; - if !self.block_cursor.advance() { - self.cur = COMPRESSION_BLOCK_SIZE; - return false; - } - } - if !self.delete_bitset.is_deleted(self.doc()) { - return true; - } +fn exponential_search(target: u32, mut start: usize, arr: &[u32]) -> (usize, usize) { + let end = arr.len(); + debug_assert!(target >= arr[start]); + debug_assert!(target <= arr[end - 1]); + let mut jump = 1; + loop { + let new = start + jump; + if new >= end { + return (start, end) } + if arr[new] > target { + return (start, new); + } + start = new; + jump *= 2; } +} - +impl DocSet for SegmentPostings { fn skip_next(&mut self, target: DocId) -> SkipResult { - if !self.advance() { + if !self.adv ance() { return SkipResult::End; } + if self.doc() == target { + return SkipResult::Reached; + } // in the following, thanks to the call to advance above, // we know that the position is not loaded and we need @@ -197,41 +191,16 @@ impl DocSet for SegmentPostings { { // we're in the right block now, start with an exponential search let block_docs = self.block_cursor.docs(); - let block_len = block_docs.len(); - debug_assert!(target >= block_docs[self.cur]); - debug_assert!(target <= block_docs[block_len - 1]); + let (mut start, end) = exponential_search(target, self.cur, block_docs); - let mut start = self.cur; - let mut end = block_len; - let mut count = 1; - loop { - let new = start + count; - if new < end && block_docs[new] < target { - start = new; - count *= 2; - } else { - break; - } - } - end = cmp::min(start + count, end); + start += block_docs[start..end] + .binary_search(&target) + .unwrap_or_else(|e| e); - // now do a binary search - let mut count = end - start; - while count > 0 { - let step = count / 2; - let mid = start + step; - let doc = block_docs[mid]; - if doc < target { - start = mid + 1; - count -= step + 1; - } else { - count = step; - } - } - - // `doc` is now >= `target` + // `doc` is now the first element >= `target` let doc = block_docs[start]; + debug_assert!(doc >= target); if self.position_computer.is_some() { let freqs_skipped = &self.block_cursor.freqs()[self.cur..start]; @@ -258,6 +227,32 @@ impl DocSet for SegmentPostings { } } + + // goes to the next element. + // next needs to be called a first time to point to the correct element. + #[inline] + fn advance(&mut self) -> bool { + loop { + { + if self.position_computer.is_some() { + let term_freq = self.term_freq() as usize; + self.position_computer.as_mut().unwrap().add_skip(term_freq); + } + } + self.cur += 1; + if self.cur >= self.block_cursor.block_len() { + self.cur = 0; + if !self.block_cursor.advance() { + self.cur = COMPRESSION_BLOCK_SIZE; + return false; + } + } + if !self.delete_bitset.is_deleted(self.doc()) { + return true; + } + } + } + fn size_hint(&self) -> u32 { self.len() as u32 }