mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
pass positions parameter
This commit is contained in:
@@ -112,7 +112,13 @@ mod tests {
|
||||
let column = get_u128_column_from_data(&data);
|
||||
|
||||
b.iter(|| {
|
||||
column.get_positions_for_value_range(major_item..=major_item, 0..data.len() as u32)
|
||||
let mut positions = Vec::new();
|
||||
column.get_positions_for_value_range(
|
||||
major_item..=major_item,
|
||||
0..data.len() as u32,
|
||||
&mut positions,
|
||||
);
|
||||
positions
|
||||
});
|
||||
}
|
||||
|
||||
@@ -122,7 +128,13 @@ mod tests {
|
||||
let column = get_u128_column_from_data(&data);
|
||||
|
||||
b.iter(|| {
|
||||
column.get_positions_for_value_range(minor_item..=minor_item, 0..data.len() as u32)
|
||||
let mut positions = Vec::new();
|
||||
column.get_positions_for_value_range(
|
||||
minor_item..=minor_item,
|
||||
0..data.len() as u32,
|
||||
&mut positions,
|
||||
);
|
||||
positions
|
||||
});
|
||||
}
|
||||
|
||||
@@ -131,7 +143,15 @@ mod tests {
|
||||
let (_major_item, _minor_item, data) = get_data_50percent_item();
|
||||
let column = get_u128_column_from_data(&data);
|
||||
|
||||
b.iter(|| column.get_positions_for_value_range(0..=u128::MAX, 0..data.len() as u32));
|
||||
b.iter(|| {
|
||||
let mut positions = Vec::new();
|
||||
column.get_positions_for_value_range(
|
||||
0..=u128::MAX,
|
||||
0..data.len() as u32,
|
||||
&mut positions,
|
||||
);
|
||||
positions
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
|
||||
@@ -39,17 +39,16 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
let mut vals = Vec::new();
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
||||
|
||||
for idx in doc_id_range.start..doc_id_range.end {
|
||||
let val = self.get_val(idx);
|
||||
if value_range.contains(&val) {
|
||||
vals.push(idx);
|
||||
positions.push(idx);
|
||||
}
|
||||
}
|
||||
vals
|
||||
}
|
||||
|
||||
/// Returns the minimum value for this fast field.
|
||||
@@ -227,11 +226,13 @@ where
|
||||
&self,
|
||||
range: RangeInclusive<Output>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
self.from_column.get_positions_for_value_range(
|
||||
self.monotonic_mapping.inverse(range.start().clone())
|
||||
..=self.monotonic_mapping.inverse(range.end().clone()),
|
||||
doc_id_range,
|
||||
positions,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -310,8 +310,9 @@ impl Column<u128> for CompactSpaceDecompressor {
|
||||
&self,
|
||||
value_range: RangeInclusive<u128>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
self.get_positions_for_value_range(value_range, doc_id_range)
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
self.get_positions_for_value_range(value_range, doc_id_range, positions)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -351,9 +352,10 @@ impl CompactSpaceDecompressor {
|
||||
&self,
|
||||
value_range: RangeInclusive<u128>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
if value_range.start() > value_range.end() {
|
||||
return Vec::new();
|
||||
return;
|
||||
}
|
||||
let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
|
||||
let from_value = *value_range.start();
|
||||
@@ -365,7 +367,7 @@ impl CompactSpaceDecompressor {
|
||||
// Quick return, if both ranges fall into the same non-mapped space, the range can't cover
|
||||
// any values, so we can early exit
|
||||
match (compact_to, compact_from) {
|
||||
(Err(pos1), Err(pos2)) if pos1 == pos2 => return Vec::new(),
|
||||
(Err(pos1), Err(pos2)) if pos1 == pos2 => return,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -387,7 +389,6 @@ impl CompactSpaceDecompressor {
|
||||
});
|
||||
|
||||
let range = compact_from..=compact_to;
|
||||
let mut positions = Vec::new();
|
||||
|
||||
let scan_num_docs = doc_id_range.end - doc_id_range.start;
|
||||
|
||||
@@ -420,8 +421,6 @@ impl CompactSpaceDecompressor {
|
||||
for idx in cutoff..doc_id_range.end {
|
||||
push_if_in_range(idx, get_val(idx as u32));
|
||||
}
|
||||
|
||||
positions
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -514,8 +513,12 @@ mod tests {
|
||||
.positions(|val| range.contains(val))
|
||||
.map(|pos| pos as u32)
|
||||
.collect::<Vec<_>>();
|
||||
let positions =
|
||||
decompressor.get_positions_for_value_range(range, 0..decompressor.num_vals());
|
||||
let mut positions = Vec::new();
|
||||
decompressor.get_positions_for_value_range(
|
||||
range,
|
||||
0..decompressor.num_vals(),
|
||||
&mut positions,
|
||||
);
|
||||
assert_eq!(positions, expected_positions);
|
||||
};
|
||||
|
||||
@@ -559,59 +562,100 @@ mod tests {
|
||||
for (pos, val) in vals.iter().enumerate() {
|
||||
let val = *val as u128;
|
||||
let pos = pos as u32;
|
||||
let positions = decomp.get_positions_for_value_range(val..=val, pos..pos + 1);
|
||||
let mut positions = Vec::new();
|
||||
decomp.get_positions_for_value_range(val..=val, pos..pos + 1, &mut positions);
|
||||
assert_eq!(positions, vec![pos]);
|
||||
}
|
||||
|
||||
// handle docid range out of bounds
|
||||
let positions = decomp.get_positions_for_value_range(0..=1, 1..u32::MAX);
|
||||
let positions = get_positions_for_value_range_helper(&decomp, 0..=1, 1..u32::MAX);
|
||||
assert_eq!(positions, vec![]);
|
||||
|
||||
let positions = decomp.get_positions_for_value_range(0..=1, complete_range.clone());
|
||||
let positions =
|
||||
get_positions_for_value_range_helper(&decomp, 0..=1, complete_range.clone());
|
||||
assert_eq!(positions, vec![0]);
|
||||
let positions = decomp.get_positions_for_value_range(0..=2, complete_range.clone());
|
||||
let positions =
|
||||
get_positions_for_value_range_helper(&decomp, 0..=2, complete_range.clone());
|
||||
assert_eq!(positions, vec![0]);
|
||||
let positions = decomp.get_positions_for_value_range(0..=3, complete_range.clone());
|
||||
let positions =
|
||||
get_positions_for_value_range_helper(&decomp, 0..=3, complete_range.clone());
|
||||
assert_eq!(positions, vec![0, 2]);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(99999u128..=99999u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99999u128..=99999u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![3]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(99999u128..=100000u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99999u128..=100000u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![3, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(99998u128..=100000u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=100000u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![3, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(99998u128..=99999u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=99999u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![3]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(99998u128..=99998u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
99998u128..=99998u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(333u128..=333u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
333u128..=333u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![8]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(332u128..=333u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
332u128..=333u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![8]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(332u128..=334u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
332u128..=334u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![8]
|
||||
);
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(333u128..=334u128, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
333u128..=334u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![8]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(
|
||||
get_positions_for_value_range_helper(
|
||||
&decomp,
|
||||
4_000_211_221u128..=5_000_000_000u128,
|
||||
complete_range.clone()
|
||||
),
|
||||
@@ -640,12 +684,28 @@ mod tests {
|
||||
let data = test_aux_vals(vals);
|
||||
let decomp = CompactSpaceDecompressor::open(data).unwrap();
|
||||
let complete_range = 0..vals.len() as u32;
|
||||
let positions = decomp.get_positions_for_value_range(0..=5, complete_range.clone());
|
||||
assert_eq!(positions, vec![]);
|
||||
let positions = decomp.get_positions_for_value_range(0..=100, complete_range.clone());
|
||||
assert_eq!(positions, vec![0]);
|
||||
let positions = decomp.get_positions_for_value_range(0..=105, complete_range.clone());
|
||||
assert_eq!(positions, vec![0]);
|
||||
assert_eq!(
|
||||
get_positions_for_value_range_helper(&decomp, 0..=5, complete_range.clone()),
|
||||
vec![]
|
||||
);
|
||||
assert_eq!(
|
||||
get_positions_for_value_range_helper(&decomp, 0..=100, complete_range.clone()),
|
||||
vec![0]
|
||||
);
|
||||
assert_eq!(
|
||||
get_positions_for_value_range_helper(&decomp, 0..=105, complete_range.clone()),
|
||||
vec![0]
|
||||
);
|
||||
}
|
||||
|
||||
fn get_positions_for_value_range_helper<C: Column<T> + ?Sized, T: PartialOrd>(
|
||||
column: &C,
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
let mut positions = Vec::new();
|
||||
column.get_positions_for_value_range(value_range, doc_id_range, &mut positions);
|
||||
positions
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -671,19 +731,26 @@ mod tests {
|
||||
let complete_range = 0..vals.len() as u32;
|
||||
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(199..=200, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(&*decomp, 199..=200, complete_range.clone()),
|
||||
vec![0]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(199..=201, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(&*decomp, 199..=201, complete_range.clone()),
|
||||
vec![0, 1]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(200..=200, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(&*decomp, 200..=200, complete_range.clone()),
|
||||
vec![0]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
decomp.get_positions_for_value_range(1_000_000..=1_000_000, complete_range.clone()),
|
||||
get_positions_for_value_range_helper(
|
||||
&*decomp,
|
||||
1_000_000..=1_000_000,
|
||||
complete_range.clone()
|
||||
),
|
||||
vec![11]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -217,9 +217,11 @@ mod tests {
|
||||
.filter(|(_, el)| **el == data[test_rand_idx])
|
||||
.map(|(pos, _)| pos as u32)
|
||||
.collect();
|
||||
let positions = reader.get_positions_for_value_range(
|
||||
let mut positions = Vec::new();
|
||||
reader.get_positions_for_value_range(
|
||||
data[test_rand_idx]..=data[test_rand_idx],
|
||||
0..data.len() as u32,
|
||||
&mut positions,
|
||||
);
|
||||
assert_eq!(expected_positions, positions);
|
||||
}
|
||||
|
||||
@@ -115,10 +115,15 @@ fn bench_ip() {
|
||||
|
||||
let decompressor = open_u128::<u128>(OwnedBytes::new(data)).unwrap();
|
||||
// Sample some ranges
|
||||
let mut doc_values = Vec::new();
|
||||
for value in dataset.iter().take(1110).skip(1100).cloned() {
|
||||
doc_values.clear();
|
||||
print_time!("get range");
|
||||
let doc_values =
|
||||
decompressor.get_positions_for_value_range(value..=value, 0..decompressor.num_vals());
|
||||
decompressor.get_positions_for_value_range(
|
||||
value..=value,
|
||||
0..decompressor.num_vals(),
|
||||
&mut doc_values,
|
||||
);
|
||||
println!("{:?}", doc_values.len());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -165,9 +165,9 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
|
||||
value_range: RangeInclusive<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<DocId> {
|
||||
let positions = self
|
||||
.vals_reader
|
||||
.get_positions_for_value_range(value_range, doc_id_range);
|
||||
let mut positions = Vec::new(); // TODO replace
|
||||
self.vals_reader
|
||||
.get_positions_for_value_range(value_range, doc_id_range, &mut positions);
|
||||
|
||||
positions_to_docids(&positions, self.idx_reader.as_ref())
|
||||
}
|
||||
|
||||
@@ -110,10 +110,12 @@ impl VecCursor {
|
||||
self.docs.get(self.current_pos).map(|el| *el as u32)
|
||||
}
|
||||
|
||||
fn set_data(&mut self, data: Vec<u32>) {
|
||||
self.docs = data;
|
||||
fn get_cleared_data(&mut self) -> &mut Vec<u32> {
|
||||
self.docs.clear();
|
||||
self.current_pos = 0;
|
||||
&mut self.docs
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.current_pos >= self.docs.len()
|
||||
}
|
||||
@@ -131,7 +133,8 @@ struct IpRangeDocSet {
|
||||
/// - We do a full scan. => We can load large chunks. We don't know in advance if seek call
|
||||
/// will come, so we start with small chunks
|
||||
/// - We load docs, interspersed with seek calls. When there are big jumps in the seek, we
|
||||
/// should load small chunks.
|
||||
/// should load small chunks. When the seeks are small, we can employ the same strategy as on a
|
||||
/// full scan.
|
||||
fetch_horizon: u32,
|
||||
/// Current batch of loaded docs.
|
||||
loaded_docs: VecCursor,
|
||||
@@ -194,10 +197,12 @@ impl IpRangeDocSet {
|
||||
finished_to_end = true;
|
||||
}
|
||||
|
||||
let data = self
|
||||
.ip_addr_fast_field
|
||||
.get_positions_for_value_range(self.value_range.clone(), self.next_fetch_start..end);
|
||||
self.loaded_docs.set_data(data);
|
||||
let data = self.loaded_docs.get_cleared_data();
|
||||
self.ip_addr_fast_field.get_positions_for_value_range(
|
||||
self.value_range.clone(),
|
||||
self.next_fetch_start..end,
|
||||
data,
|
||||
);
|
||||
self.next_fetch_start = end;
|
||||
finished_to_end
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user