From 21e0adefdafb2fbc41a4f795a40956edaa2ac379 Mon Sep 17 00:00:00 2001 From: PSeitz Date: Mon, 26 Sep 2022 08:42:33 +0800 Subject: [PATCH] use binary search instead of linear for get_val in merge (#1548) * use binary search instead of linear for get_val in merge * use partition_point --- src/indexer/sorted_doc_id_multivalue_column.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/indexer/sorted_doc_id_multivalue_column.rs b/src/indexer/sorted_doc_id_multivalue_column.rs index bcd86baad..95328571f 100644 --- a/src/indexer/sorted_doc_id_multivalue_column.rs +++ b/src/indexer/sorted_doc_id_multivalue_column.rs @@ -72,13 +72,9 @@ impl<'a> SortedDocIdMultiValueColumn<'a> { impl<'a> Column for SortedDocIdMultiValueColumn<'a> { fn get_val(&self, pos: u64) -> u64 { // use the offsets index to find the doc_id which will contain the position. - // the offsets are strictly increasing so we can do a simple search on it. - let new_doc_id: DocId = self - .offsets - .iter() - .position(|&offset| offset > pos) - .expect("pos is out of bounds") as DocId - - 1u32; + // the offsets are strictly increasing so we can do a binary search on it. + + let new_doc_id: DocId = self.offsets.partition_point(|&offset| offset <= pos) as DocId - 1; // Offsets start at 0, so -1 is safe // now we need to find the position of `pos` in the multivalued bucket let num_pos_covered_until_now = self.offsets[new_doc_id as usize];