pass positions parameter

This commit is contained in:
Pascal Seitz
2022-10-25 17:44:51 +08:00
parent 5e159c26bf
commit 6213ea476a
7 changed files with 156 additions and 56 deletions

View File

@@ -112,7 +112,13 @@ mod tests {
let column = get_u128_column_from_data(&data);
b.iter(|| {
column.get_positions_for_value_range(major_item..=major_item, 0..data.len() as u32)
let mut positions = Vec::new();
column.get_positions_for_value_range(
major_item..=major_item,
0..data.len() as u32,
&mut positions,
);
positions
});
}
@@ -122,7 +128,13 @@ mod tests {
let column = get_u128_column_from_data(&data);
b.iter(|| {
column.get_positions_for_value_range(minor_item..=minor_item, 0..data.len() as u32)
let mut positions = Vec::new();
column.get_positions_for_value_range(
minor_item..=minor_item,
0..data.len() as u32,
&mut positions,
);
positions
});
}
@@ -131,7 +143,15 @@ mod tests {
let (_major_item, _minor_item, data) = get_data_50percent_item();
let column = get_u128_column_from_data(&data);
b.iter(|| column.get_positions_for_value_range(0..=u128::MAX, 0..data.len() as u32));
b.iter(|| {
let mut positions = Vec::new();
column.get_positions_for_value_range(
0..=u128::MAX,
0..data.len() as u32,
&mut positions,
);
positions
});
}
#[bench]

View File

@@ -39,17 +39,16 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
&self,
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
) -> Vec<u32> {
let mut vals = Vec::new();
positions: &mut Vec<u32>,
) {
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
for idx in doc_id_range.start..doc_id_range.end {
let val = self.get_val(idx);
if value_range.contains(&val) {
vals.push(idx);
positions.push(idx);
}
}
vals
}
/// Returns the minimum value for this fast field.
@@ -227,11 +226,13 @@ where
&self,
range: RangeInclusive<Output>,
doc_id_range: Range<u32>,
) -> Vec<u32> {
positions: &mut Vec<u32>,
) {
self.from_column.get_positions_for_value_range(
self.monotonic_mapping.inverse(range.start().clone())
..=self.monotonic_mapping.inverse(range.end().clone()),
doc_id_range,
positions,
)
}

View File

@@ -310,8 +310,9 @@ impl Column<u128> for CompactSpaceDecompressor {
&self,
value_range: RangeInclusive<u128>,
doc_id_range: Range<u32>,
) -> Vec<u32> {
self.get_positions_for_value_range(value_range, doc_id_range)
positions: &mut Vec<u32>,
) {
self.get_positions_for_value_range(value_range, doc_id_range, positions)
}
}
@@ -351,9 +352,10 @@ impl CompactSpaceDecompressor {
&self,
value_range: RangeInclusive<u128>,
doc_id_range: Range<u32>,
) -> Vec<u32> {
positions: &mut Vec<u32>,
) {
if value_range.start() > value_range.end() {
return Vec::new();
return;
}
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
let from_value = *value_range.start();
@@ -365,7 +367,7 @@ impl CompactSpaceDecompressor {
// Quick return, if both ranges fall into the same non-mapped space, the range can't cover
// any values, so we can early exit
match (compact_to, compact_from) {
(Err(pos1), Err(pos2)) if pos1 == pos2 => return Vec::new(),
(Err(pos1), Err(pos2)) if pos1 == pos2 => return,
_ => {}
}
@@ -387,7 +389,6 @@ impl CompactSpaceDecompressor {
});
let range = compact_from..=compact_to;
let mut positions = Vec::new();
let scan_num_docs = doc_id_range.end - doc_id_range.start;
@@ -420,8 +421,6 @@ impl CompactSpaceDecompressor {
for idx in cutoff..doc_id_range.end {
push_if_in_range(idx, get_val(idx as u32));
}
positions
}
#[inline]
@@ -514,8 +513,12 @@ mod tests {
.positions(|val| range.contains(val))
.map(|pos| pos as u32)
.collect::<Vec<_>>();
let positions =
decompressor.get_positions_for_value_range(range, 0..decompressor.num_vals());
let mut positions = Vec::new();
decompressor.get_positions_for_value_range(
range,
0..decompressor.num_vals(),
&mut positions,
);
assert_eq!(positions, expected_positions);
};
@@ -559,59 +562,100 @@ mod tests {
for (pos, val) in vals.iter().enumerate() {
let val = *val as u128;
let pos = pos as u32;
let positions = decomp.get_positions_for_value_range(val..=val, pos..pos + 1);
let mut positions = Vec::new();
decomp.get_positions_for_value_range(val..=val, pos..pos + 1, &mut positions);
assert_eq!(positions, vec![pos]);
}
// handle docid range out of bounds
let positions = decomp.get_positions_for_value_range(0..=1, 1..u32::MAX);
let positions = get_positions_for_value_range_helper(&decomp, 0..=1, 1..u32::MAX);
assert_eq!(positions, vec![]);
let positions = decomp.get_positions_for_value_range(0..=1, complete_range.clone());
let positions =
get_positions_for_value_range_helper(&decomp, 0..=1, complete_range.clone());
assert_eq!(positions, vec![0]);
let positions = decomp.get_positions_for_value_range(0..=2, complete_range.clone());
let positions =
get_positions_for_value_range_helper(&decomp, 0..=2, complete_range.clone());
assert_eq!(positions, vec![0]);
let positions = decomp.get_positions_for_value_range(0..=3, complete_range.clone());
let positions =
get_positions_for_value_range_helper(&decomp, 0..=3, complete_range.clone());
assert_eq!(positions, vec![0, 2]);
assert_eq!(
decomp.get_positions_for_value_range(99999u128..=99999u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
99999u128..=99999u128,
complete_range.clone()
),
vec![3]
);
assert_eq!(
decomp.get_positions_for_value_range(99999u128..=100000u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
99999u128..=100000u128,
complete_range.clone()
),
vec![3, 4]
);
assert_eq!(
decomp.get_positions_for_value_range(99998u128..=100000u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
99998u128..=100000u128,
complete_range.clone()
),
vec![3, 4]
);
assert_eq!(
decomp.get_positions_for_value_range(99998u128..=99999u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
99998u128..=99999u128,
complete_range.clone()
),
vec![3]
);
assert_eq!(
decomp.get_positions_for_value_range(99998u128..=99998u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
99998u128..=99998u128,
complete_range.clone()
),
vec![]
);
assert_eq!(
decomp.get_positions_for_value_range(333u128..=333u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
333u128..=333u128,
complete_range.clone()
),
vec![8]
);
assert_eq!(
decomp.get_positions_for_value_range(332u128..=333u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
332u128..=333u128,
complete_range.clone()
),
vec![8]
);
assert_eq!(
decomp.get_positions_for_value_range(332u128..=334u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
332u128..=334u128,
complete_range.clone()
),
vec![8]
);
assert_eq!(
decomp.get_positions_for_value_range(333u128..=334u128, complete_range.clone()),
get_positions_for_value_range_helper(
&decomp,
333u128..=334u128,
complete_range.clone()
),
vec![8]
);
assert_eq!(
decomp.get_positions_for_value_range(
get_positions_for_value_range_helper(
&decomp,
4_000_211_221u128..=5_000_000_000u128,
complete_range.clone()
),
@@ -640,12 +684,28 @@ mod tests {
let data = test_aux_vals(vals);
let decomp = CompactSpaceDecompressor::open(data).unwrap();
let complete_range = 0..vals.len() as u32;
let positions = decomp.get_positions_for_value_range(0..=5, complete_range.clone());
assert_eq!(positions, vec![]);
let positions = decomp.get_positions_for_value_range(0..=100, complete_range.clone());
assert_eq!(positions, vec![0]);
let positions = decomp.get_positions_for_value_range(0..=105, complete_range.clone());
assert_eq!(positions, vec![0]);
assert_eq!(
get_positions_for_value_range_helper(&decomp, 0..=5, complete_range.clone()),
vec![]
);
assert_eq!(
get_positions_for_value_range_helper(&decomp, 0..=100, complete_range.clone()),
vec![0]
);
assert_eq!(
get_positions_for_value_range_helper(&decomp, 0..=105, complete_range.clone()),
vec![0]
);
}
fn get_positions_for_value_range_helper<C: Column<T> + ?Sized, T: PartialOrd>(
column: &C,
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
) -> Vec<u32> {
let mut positions = Vec::new();
column.get_positions_for_value_range(value_range, doc_id_range, &mut positions);
positions
}
#[test]
@@ -671,19 +731,26 @@ mod tests {
let complete_range = 0..vals.len() as u32;
assert_eq!(
decomp.get_positions_for_value_range(199..=200, complete_range.clone()),
get_positions_for_value_range_helper(&*decomp, 199..=200, complete_range.clone()),
vec![0]
);
assert_eq!(
decomp.get_positions_for_value_range(199..=201, complete_range.clone()),
get_positions_for_value_range_helper(&*decomp, 199..=201, complete_range.clone()),
vec![0, 1]
);
assert_eq!(
decomp.get_positions_for_value_range(200..=200, complete_range.clone()),
get_positions_for_value_range_helper(&*decomp, 200..=200, complete_range.clone()),
vec![0]
);
assert_eq!(
decomp.get_positions_for_value_range(1_000_000..=1_000_000, complete_range.clone()),
get_positions_for_value_range_helper(
&*decomp,
1_000_000..=1_000_000,
complete_range.clone()
),
vec![11]
);
}

View File

@@ -217,9 +217,11 @@ mod tests {
.filter(|(_, el)| **el == data[test_rand_idx])
.map(|(pos, _)| pos as u32)
.collect();
let positions = reader.get_positions_for_value_range(
let mut positions = Vec::new();
reader.get_positions_for_value_range(
data[test_rand_idx]..=data[test_rand_idx],
0..data.len() as u32,
&mut positions,
);
assert_eq!(expected_positions, positions);
}

View File

@@ -115,10 +115,15 @@ fn bench_ip() {
let decompressor = open_u128::<u128>(OwnedBytes::new(data)).unwrap();
// Sample some ranges
let mut doc_values = Vec::new();
for value in dataset.iter().take(1110).skip(1100).cloned() {
doc_values.clear();
print_time!("get range");
let doc_values =
decompressor.get_positions_for_value_range(value..=value, 0..decompressor.num_vals());
decompressor.get_positions_for_value_range(
value..=value,
0..decompressor.num_vals(),
&mut doc_values,
);
println!("{:?}", doc_values.len());
}
}

View File

@@ -165,9 +165,9 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
value_range: RangeInclusive<T>,
doc_id_range: Range<u32>,
) -> Vec<DocId> {
let positions = self
.vals_reader
.get_positions_for_value_range(value_range, doc_id_range);
let mut positions = Vec::new(); // TODO replace
self.vals_reader
.get_positions_for_value_range(value_range, doc_id_range, &mut positions);
positions_to_docids(&positions, self.idx_reader.as_ref())
}

View File

@@ -110,10 +110,12 @@ impl VecCursor {
self.docs.get(self.current_pos).map(|el| *el as u32)
}
fn set_data(&mut self, data: Vec<u32>) {
self.docs = data;
fn get_cleared_data(&mut self) -> &mut Vec<u32> {
self.docs.clear();
self.current_pos = 0;
&mut self.docs
}
fn is_empty(&self) -> bool {
self.current_pos >= self.docs.len()
}
@@ -131,7 +133,8 @@ struct IpRangeDocSet {
/// - We do a full scan. => We can load large chunks. We don't know in advance if seek call
/// will come, so we start with small chunks
/// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we
/// should load small chunks.
/// should load small chunks. When the seeks are small, we can employ the same strategy as on a
/// full scan.
fetch_horizon: u32,
/// Current batch of loaded docs.
loaded_docs: VecCursor,
@@ -194,10 +197,12 @@ impl IpRangeDocSet {
finished_to_end = true;
}
let data = self
.ip_addr_fast_field
.get_positions_for_value_range(self.value_range.clone(), self.next_fetch_start..end);
self.loaded_docs.set_data(data);
let data = self.loaded_docs.get_cleared_data();
self.ip_addr_fast_field.get_positions_for_value_range(
self.value_range.clone(),
self.next_fetch_start..end,
data,
);
self.next_fetch_start = end;
finished_to_end
}