diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs index cf83903de..2d4237699 100644 --- a/fastfield_codecs/benches/bench.rs +++ b/fastfield_codecs/benches/bench.rs @@ -112,7 +112,13 @@ mod tests { let column = get_u128_column_from_data(&data); b.iter(|| { - column.get_positions_for_value_range(major_item..=major_item, 0..data.len() as u32) + let mut positions = Vec::new(); + column.get_positions_for_value_range( + major_item..=major_item, + 0..data.len() as u32, + &mut positions, + ); + positions }); } @@ -122,7 +128,13 @@ mod tests { let column = get_u128_column_from_data(&data); b.iter(|| { - column.get_positions_for_value_range(minor_item..=minor_item, 0..data.len() as u32) + let mut positions = Vec::new(); + column.get_positions_for_value_range( + minor_item..=minor_item, + 0..data.len() as u32, + &mut positions, + ); + positions }); } @@ -131,7 +143,15 @@ mod tests { let (_major_item, _minor_item, data) = get_data_50percent_item(); let column = get_u128_column_from_data(&data); - b.iter(|| column.get_positions_for_value_range(0..=u128::MAX, 0..data.len() as u32)); + b.iter(|| { + let mut positions = Vec::new(); + column.get_positions_for_value_range( + 0..=u128::MAX, + 0..data.len() as u32, + &mut positions, + ); + positions + }); } #[bench] diff --git a/fastfield_codecs/src/column.rs b/fastfield_codecs/src/column.rs index 6dee298d3..a7a35092f 100644 --- a/fastfield_codecs/src/column.rs +++ b/fastfield_codecs/src/column.rs @@ -39,17 +39,16 @@ pub trait Column: Send + Sync { &self, value_range: RangeInclusive, doc_id_range: Range, - ) -> Vec { - let mut vals = Vec::new(); + positions: &mut Vec, + ) { let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); for idx in doc_id_range.start..doc_id_range.end { let val = self.get_val(idx); if value_range.contains(&val) { - vals.push(idx); + positions.push(idx); } } - vals } /// Returns the minimum value for this fast field. @@ -227,11 +226,13 @@ where &self, range: RangeInclusive, doc_id_range: Range, - ) -> Vec { + positions: &mut Vec, + ) { self.from_column.get_positions_for_value_range( self.monotonic_mapping.inverse(range.start().clone()) ..=self.monotonic_mapping.inverse(range.end().clone()), doc_id_range, + positions, ) } diff --git a/fastfield_codecs/src/compact_space/mod.rs b/fastfield_codecs/src/compact_space/mod.rs index b0f4c9240..c6e2da7ea 100644 --- a/fastfield_codecs/src/compact_space/mod.rs +++ b/fastfield_codecs/src/compact_space/mod.rs @@ -310,8 +310,9 @@ impl Column for CompactSpaceDecompressor { &self, value_range: RangeInclusive, doc_id_range: Range, - ) -> Vec { - self.get_positions_for_value_range(value_range, doc_id_range) + positions: &mut Vec, + ) { + self.get_positions_for_value_range(value_range, doc_id_range, positions) } } @@ -351,9 +352,10 @@ impl CompactSpaceDecompressor { &self, value_range: RangeInclusive, doc_id_range: Range, - ) -> Vec { + positions: &mut Vec, + ) { if value_range.start() > value_range.end() { - return Vec::new(); + return; } let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals()); let from_value = *value_range.start(); @@ -365,7 +367,7 @@ impl CompactSpaceDecompressor { // Quick return, if both ranges fall into the same non-mapped space, the range can't cover // any values, so we can early exit match (compact_to, compact_from) { - (Err(pos1), Err(pos2)) if pos1 == pos2 => return Vec::new(), + (Err(pos1), Err(pos2)) if pos1 == pos2 => return, _ => {} } @@ -387,7 +389,6 @@ impl CompactSpaceDecompressor { }); let range = compact_from..=compact_to; - let mut positions = Vec::new(); let scan_num_docs = doc_id_range.end - doc_id_range.start; @@ -420,8 +421,6 @@ impl CompactSpaceDecompressor { for idx in cutoff..doc_id_range.end { push_if_in_range(idx, get_val(idx as u32)); } - - positions } #[inline] @@ -514,8 +513,12 @@ mod tests { .positions(|val| range.contains(val)) .map(|pos| pos as u32) .collect::>(); - let positions = - decompressor.get_positions_for_value_range(range, 0..decompressor.num_vals()); + let mut positions = Vec::new(); + decompressor.get_positions_for_value_range( + range, + 0..decompressor.num_vals(), + &mut positions, + ); assert_eq!(positions, expected_positions); }; @@ -559,59 +562,100 @@ mod tests { for (pos, val) in vals.iter().enumerate() { let val = *val as u128; let pos = pos as u32; - let positions = decomp.get_positions_for_value_range(val..=val, pos..pos + 1); + let mut positions = Vec::new(); + decomp.get_positions_for_value_range(val..=val, pos..pos + 1, &mut positions); assert_eq!(positions, vec![pos]); } // handle docid range out of bounds - let positions = decomp.get_positions_for_value_range(0..=1, 1..u32::MAX); + let positions = get_positions_for_value_range_helper(&decomp, 0..=1, 1..u32::MAX); assert_eq!(positions, vec![]); - let positions = decomp.get_positions_for_value_range(0..=1, complete_range.clone()); + let positions = + get_positions_for_value_range_helper(&decomp, 0..=1, complete_range.clone()); assert_eq!(positions, vec![0]); - let positions = decomp.get_positions_for_value_range(0..=2, complete_range.clone()); + let positions = + get_positions_for_value_range_helper(&decomp, 0..=2, complete_range.clone()); assert_eq!(positions, vec![0]); - let positions = decomp.get_positions_for_value_range(0..=3, complete_range.clone()); + let positions = + get_positions_for_value_range_helper(&decomp, 0..=3, complete_range.clone()); assert_eq!(positions, vec![0, 2]); assert_eq!( - decomp.get_positions_for_value_range(99999u128..=99999u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 99999u128..=99999u128, + complete_range.clone() + ), vec![3] ); assert_eq!( - decomp.get_positions_for_value_range(99999u128..=100000u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 99999u128..=100000u128, + complete_range.clone() + ), vec![3, 4] ); assert_eq!( - decomp.get_positions_for_value_range(99998u128..=100000u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 99998u128..=100000u128, + complete_range.clone() + ), vec![3, 4] ); assert_eq!( - decomp.get_positions_for_value_range(99998u128..=99999u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 99998u128..=99999u128, + complete_range.clone() + ), vec![3] ); assert_eq!( - decomp.get_positions_for_value_range(99998u128..=99998u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 99998u128..=99998u128, + complete_range.clone() + ), vec![] ); assert_eq!( - decomp.get_positions_for_value_range(333u128..=333u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 333u128..=333u128, + complete_range.clone() + ), vec![8] ); assert_eq!( - decomp.get_positions_for_value_range(332u128..=333u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 332u128..=333u128, + complete_range.clone() + ), vec![8] ); assert_eq!( - decomp.get_positions_for_value_range(332u128..=334u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 332u128..=334u128, + complete_range.clone() + ), vec![8] ); assert_eq!( - decomp.get_positions_for_value_range(333u128..=334u128, complete_range.clone()), + get_positions_for_value_range_helper( + &decomp, + 333u128..=334u128, + complete_range.clone() + ), vec![8] ); assert_eq!( - decomp.get_positions_for_value_range( + get_positions_for_value_range_helper( + &decomp, 4_000_211_221u128..=5_000_000_000u128, complete_range.clone() ), @@ -640,12 +684,28 @@ mod tests { let data = test_aux_vals(vals); let decomp = CompactSpaceDecompressor::open(data).unwrap(); let complete_range = 0..vals.len() as u32; - let positions = decomp.get_positions_for_value_range(0..=5, complete_range.clone()); - assert_eq!(positions, vec![]); - let positions = decomp.get_positions_for_value_range(0..=100, complete_range.clone()); - assert_eq!(positions, vec![0]); - let positions = decomp.get_positions_for_value_range(0..=105, complete_range.clone()); - assert_eq!(positions, vec![0]); + assert_eq!( + get_positions_for_value_range_helper(&decomp, 0..=5, complete_range.clone()), + vec![] + ); + assert_eq!( + get_positions_for_value_range_helper(&decomp, 0..=100, complete_range.clone()), + vec![0] + ); + assert_eq!( + get_positions_for_value_range_helper(&decomp, 0..=105, complete_range.clone()), + vec![0] + ); + } + + fn get_positions_for_value_range_helper + ?Sized, T: PartialOrd>( + column: &C, + value_range: RangeInclusive, + doc_id_range: Range, + ) -> Vec { + let mut positions = Vec::new(); + column.get_positions_for_value_range(value_range, doc_id_range, &mut positions); + positions } #[test] @@ -671,19 +731,26 @@ mod tests { let complete_range = 0..vals.len() as u32; assert_eq!( - decomp.get_positions_for_value_range(199..=200, complete_range.clone()), + get_positions_for_value_range_helper(&*decomp, 199..=200, complete_range.clone()), vec![0] ); + assert_eq!( - decomp.get_positions_for_value_range(199..=201, complete_range.clone()), + get_positions_for_value_range_helper(&*decomp, 199..=201, complete_range.clone()), vec![0, 1] ); + assert_eq!( - decomp.get_positions_for_value_range(200..=200, complete_range.clone()), + get_positions_for_value_range_helper(&*decomp, 200..=200, complete_range.clone()), vec![0] ); + assert_eq!( - decomp.get_positions_for_value_range(1_000_000..=1_000_000, complete_range.clone()), + get_positions_for_value_range_helper( + &*decomp, + 1_000_000..=1_000_000, + complete_range.clone() + ), vec![11] ); } diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index bda3a1a44..552ae7057 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -217,9 +217,11 @@ mod tests { .filter(|(_, el)| **el == data[test_rand_idx]) .map(|(pos, _)| pos as u32) .collect(); - let positions = reader.get_positions_for_value_range( + let mut positions = Vec::new(); + reader.get_positions_for_value_range( data[test_rand_idx]..=data[test_rand_idx], 0..data.len() as u32, + &mut positions, ); assert_eq!(expected_positions, positions); } diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index d951f2732..754844140 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -115,10 +115,15 @@ fn bench_ip() { let decompressor = open_u128::(OwnedBytes::new(data)).unwrap(); // Sample some ranges + let mut doc_values = Vec::new(); for value in dataset.iter().take(1110).skip(1100).cloned() { + doc_values.clear(); print_time!("get range"); - let doc_values = - decompressor.get_positions_for_value_range(value..=value, 0..decompressor.num_vals()); + decompressor.get_positions_for_value_range( + value..=value, + 0..decompressor.num_vals(), + &mut doc_values, + ); println!("{:?}", doc_values.len()); } } diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 59fda70b6..2894bee38 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -165,9 +165,9 @@ impl MultiValuedU128FastFieldReader { value_range: RangeInclusive, doc_id_range: Range, ) -> Vec { - let positions = self - .vals_reader - .get_positions_for_value_range(value_range, doc_id_range); + let mut positions = Vec::new(); // TODO replace + self.vals_reader + .get_positions_for_value_range(value_range, doc_id_range, &mut positions); positions_to_docids(&positions, self.idx_reader.as_ref()) } diff --git a/src/query/range_query_ip_fastfield.rs b/src/query/range_query_ip_fastfield.rs index 40c7b43ad..ffd2f3405 100644 --- a/src/query/range_query_ip_fastfield.rs +++ b/src/query/range_query_ip_fastfield.rs @@ -110,10 +110,12 @@ impl VecCursor { self.docs.get(self.current_pos).map(|el| *el as u32) } - fn set_data(&mut self, data: Vec) { - self.docs = data; + fn get_cleared_data(&mut self) -> &mut Vec { + self.docs.clear(); self.current_pos = 0; + &mut self.docs } + fn is_empty(&self) -> bool { self.current_pos >= self.docs.len() } @@ -131,7 +133,8 @@ struct IpRangeDocSet { /// - We do a full scan. => We can load large chunks. We don't know in advance if seek call /// will come, so we start with small chunks /// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we - /// should load small chunks. + /// should load small chunks. When the seeks are small, we can employ the same strategy as on a + /// full scan. fetch_horizon: u32, /// Current batch of loaded docs. loaded_docs: VecCursor, @@ -194,10 +197,12 @@ impl IpRangeDocSet { finished_to_end = true; } - let data = self - .ip_addr_fast_field - .get_positions_for_value_range(self.value_range.clone(), self.next_fetch_start..end); - self.loaded_docs.set_data(data); + let data = self.loaded_docs.get_cleared_data(); + self.ip_addr_fast_field.get_positions_for_value_range( + self.value_range.clone(), + self.next_fetch_start..end, + data, + ); self.next_fetch_start = end; finished_to_end }