mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-04 09:30:42 +00:00
fix multivalue ff index creation regression (#1543)
fixes multivalue ff regression by avoiding using `get_val`. Line::train calls repeatedly get_val, but get_val implementation on Column for multivalues is very slow. The fix is to use the iterator instead. Longterm fix should be to remove get_val access in serialization. Old Code test fastfield::bench::bench_multi_value_ff_merge_few_segments ... bench: 46,103,960 ns/iter (+/- 2,066,083) test fastfield::bench::bench_multi_value_ff_merge_many_segments ... bench: 83,073,036 ns/iter (+/- 4,373,615) est fastfield::bench::bench_multi_value_ff_merge_many_segments_log_merge ... bench: 64,178,576 ns/iter (+/- 1,466,700) Current running 3 tests test fastfield::multivalued::bench::bench_multi_value_ff_merge_few_segments ... bench: 57,379,523 ns/iter (+/- 3,220,787) test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments ... bench: 90,831,688 ns/iter (+/- 1,445,486) test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments_log_merge ... bench: 158,313,264 ns/iter (+/- 28,823,250) With Fix running 3 tests test fastfield::multivalued::bench::bench_multi_value_ff_merge_few_segments ... bench: 57,635,671 ns/iter (+/- 2,707,361) test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments ... bench: 91,468,712 ns/iter (+/- 11,393,581) test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments_log_merge ... bench: 73,909,138 ns/iter (+/- 15,846,097)
This commit is contained in:
@@ -69,11 +69,17 @@ impl Line {
|
||||
|
||||
// Same as train, but the intercept is only estimated from provided sample positions
|
||||
pub fn estimate(ys: &dyn Column, sample_positions: &[u64]) -> Self {
|
||||
Self::train_from(ys, sample_positions.iter().cloned())
|
||||
Self::train_from(
|
||||
ys,
|
||||
sample_positions
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|pos| (pos, ys.get_val(pos))),
|
||||
)
|
||||
}
|
||||
|
||||
// Intercept is only computed from provided positions
|
||||
fn train_from(ys: &dyn Column, positions: impl Iterator<Item = u64>) -> Self {
|
||||
fn train_from(ys: &dyn Column, positions_and_values: impl Iterator<Item = (u64, u64)>) -> Self {
|
||||
let num_vals = if let Some(num_vals) = NonZeroU64::new(ys.num_vals() - 1) {
|
||||
num_vals
|
||||
} else {
|
||||
@@ -114,11 +120,8 @@ impl Line {
|
||||
intercept: 0,
|
||||
};
|
||||
let heuristic_shift = y0.wrapping_sub(MID_POINT);
|
||||
line.intercept = positions
|
||||
.map(|pos| {
|
||||
let y = ys.get_val(pos);
|
||||
y.wrapping_sub(line.eval(pos))
|
||||
})
|
||||
line.intercept = positions_and_values
|
||||
.map(|(pos, y)| y.wrapping_sub(line.eval(pos)))
|
||||
.min_by_key(|&val| val.wrapping_sub(heuristic_shift))
|
||||
.unwrap_or(0u64); //< Never happens.
|
||||
line
|
||||
@@ -135,7 +138,10 @@ impl Line {
|
||||
/// This function is only invariable by translation if all of the
|
||||
/// `ys` are packaged into half of the space. (See heuristic below)
|
||||
pub fn train(ys: &dyn Column) -> Self {
|
||||
Self::train_from(ys, 0..ys.num_vals())
|
||||
Self::train_from(
|
||||
ys,
|
||||
ys.iter().enumerate().map(|(pos, val)| (pos as u64, val)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user