diff --git a/columnar/benches/bench_access.rs b/columnar/benches/bench_access.rs index 1c8830893..38b095b71 100644 --- a/columnar/benches/bench_access.rs +++ b/columnar/benches/bench_access.rs @@ -57,7 +57,7 @@ fn bench_group(mut runner: InputGroup) { column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All); for val in buffer.iter() { - let Some(val) = val else { continue }; + let Some(Some(val)) = val else { continue }; sum += *val; } } diff --git a/columnar/src/column/mod.rs b/columnar/src/column/mod.rs index 43f496b2d..dab7e6e24 100644 --- a/columnar/src/column/mod.rs +++ b/columnar/src/column/mod.rs @@ -94,7 +94,7 @@ impl Column { pub fn first_vals_in_value_range( &self, docids: &[DocId], - output: &mut [Option], + output: &mut [Option>], value_range: ValueRange, ) { match (&self.index, value_range) { @@ -105,33 +105,74 @@ impl Column { } (ColumnIndex::Optional(optional_index), ValueRange::All) => { for (i, docid) in docids.iter().enumerate() { - output[i] = optional_index - .rank_if_exists(*docid) - .map(|rowid| self.values.get_val(rowid)); + output[i] = Some( + optional_index + .rank_if_exists(*docid) + .map(|rowid| self.values.get_val(rowid)), + ); } } (ColumnIndex::Optional(optional_index), ValueRange::Inclusive(range)) => { for (i, docid) in docids.iter().enumerate() { - output[i] = optional_index - .rank_if_exists(*docid) - .map(|rowid| self.values.get_val(rowid)) - .filter(|val| range.contains(val)); + output[i] = match optional_index.rank_if_exists(*docid) { + Some(rowid) => { + let val = self.values.get_val(rowid); + if range.contains(&val) { + Some(Some(val)) + } else { + None + } + } + None => None, // range does not include NULL + }; } } - (ColumnIndex::Optional(optional_index), ValueRange::GreaterThan(threshold, _)) => { + ( + ColumnIndex::Optional(optional_index), + ValueRange::GreaterThan(threshold, nulls_match), + ) => { for (i, docid) in docids.iter().enumerate() { - output[i] = optional_index - .rank_if_exists(*docid) - .map(|rowid| self.values.get_val(rowid)) - .filter(|val| *val > threshold); + output[i] = match optional_index.rank_if_exists(*docid) { + Some(rowid) => { + let val = self.values.get_val(rowid); + if val > threshold { + Some(Some(val)) + } else { + None + } + } + None => { + if nulls_match { + Some(None) + } else { + None + } + } + }; } } - (ColumnIndex::Optional(optional_index), ValueRange::LessThan(threshold, _)) => { + ( + ColumnIndex::Optional(optional_index), + ValueRange::LessThan(threshold, nulls_match), + ) => { for (i, docid) in docids.iter().enumerate() { - output[i] = optional_index - .rank_if_exists(*docid) - .map(|rowid| self.values.get_val(rowid)) - .filter(|val| *val < threshold); + output[i] = match optional_index.rank_if_exists(*docid) { + Some(rowid) => { + let val = self.values.get_val(rowid); + if val < threshold { + Some(Some(val)) + } else { + None + } + } + None => { + if nulls_match { + Some(None) + } else { + None + } + } + }; } } (ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => { @@ -139,9 +180,9 @@ impl Column { let range = multivalued_index.range(*docid); let is_empty = range.start == range.end; if !is_empty { - output[i] = Some(self.values.get_val(range.start)); + output[i] = Some(Some(self.values.get_val(range.start))); } else { - output[i] = None; + output[i] = Some(None); } } } @@ -152,7 +193,7 @@ impl Column { if !is_empty { let val = self.values.get_val(row_range.start); if range.contains(&val) { - output[i] = Some(val); + output[i] = Some(Some(val)); } else { output[i] = None; } @@ -163,7 +204,7 @@ impl Column { } ( ColumnIndex::Multivalued(multivalued_index), - ValueRange::GreaterThan(threshold, _), + ValueRange::GreaterThan(threshold, nulls_match), ) => { for (i, docid) in docids.iter().enumerate() { let row_range = multivalued_index.range(*docid); @@ -171,28 +212,39 @@ impl Column { if !is_empty { let val = self.values.get_val(row_range.start); if val > threshold { - output[i] = Some(val); + output[i] = Some(Some(val)); } else { output[i] = None; } } else { - output[i] = None; + if nulls_match { + output[i] = Some(None); + } else { + output[i] = None; + } } } } - (ColumnIndex::Multivalued(multivalued_index), ValueRange::LessThan(threshold, _)) => { + ( + ColumnIndex::Multivalued(multivalued_index), + ValueRange::LessThan(threshold, nulls_match), + ) => { for (i, docid) in docids.iter().enumerate() { let row_range = multivalued_index.range(*docid); let is_empty = row_range.start == row_range.end; if !is_empty { let val = self.values.get_val(row_range.start); if val < threshold { - output[i] = Some(val); + output[i] = Some(Some(val)); } else { output[i] = None; } } else { - output[i] = None; + if nulls_match { + output[i] = Some(None); + } else { + output[i] = None; + } } } } diff --git a/columnar/src/column_values/mod.rs b/columnar/src/column_values/mod.rs index 7fe21d46e..7561ca4f1 100644 --- a/columnar/src/column_values/mod.rs +++ b/columnar/src/column_values/mod.rs @@ -116,23 +116,43 @@ pub trait ColumnValues: Send + Sync + DowncastSync { fn get_vals_in_value_range( &self, indexes: &[u32], - output: &mut [Option], + output: &mut [Option>], value_range: ValueRange, ) { assert!(indexes.len() == output.len()); match value_range { - ValueRange::All => self.get_vals_opt(indexes, output), + ValueRange::All => { + for (out, idx) in output.iter_mut().zip(indexes) { + *out = Some(Some(self.get_val(*idx))); + } + } ValueRange::Inclusive(range) => { let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4)); for (out_x4, idx_x4) in out_and_idx_chunks { let v0 = self.get_val(idx_x4[0]); - out_x4[0] = if range.contains(&v0) { Some(v0) } else { None }; + out_x4[0] = if range.contains(&v0) { + Some(Some(v0)) + } else { + None + }; let v1 = self.get_val(idx_x4[1]); - out_x4[1] = if range.contains(&v1) { Some(v1) } else { None }; + out_x4[1] = if range.contains(&v1) { + Some(Some(v1)) + } else { + None + }; let v2 = self.get_val(idx_x4[2]); - out_x4[2] = if range.contains(&v2) { Some(v2) } else { None }; + out_x4[2] = if range.contains(&v2) { + Some(Some(v2)) + } else { + None + }; let v3 = self.get_val(idx_x4[3]); - out_x4[3] = if range.contains(&v3) { Some(v3) } else { None }; + out_x4[3] = if range.contains(&v3) { + Some(Some(v3)) + } else { + None + }; } let out_and_idx_chunks = output .chunks_exact_mut(4) @@ -141,20 +161,24 @@ pub trait ColumnValues: Send + Sync + DowncastSync { .zip(indexes.chunks_exact(4).remainder()); for (out, idx) in out_and_idx_chunks { let v = self.get_val(*idx); - *out = if range.contains(&v) { Some(v) } else { None }; + *out = if range.contains(&v) { + Some(Some(v)) + } else { + None + }; } } ValueRange::GreaterThan(threshold, _) => { let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4)); for (out_x4, idx_x4) in out_and_idx_chunks { let v0 = self.get_val(idx_x4[0]); - out_x4[0] = if v0 > threshold { Some(v0) } else { None }; + out_x4[0] = if v0 > threshold { Some(Some(v0)) } else { None }; let v1 = self.get_val(idx_x4[1]); - out_x4[1] = if v1 > threshold { Some(v1) } else { None }; + out_x4[1] = if v1 > threshold { Some(Some(v1)) } else { None }; let v2 = self.get_val(idx_x4[2]); - out_x4[2] = if v2 > threshold { Some(v2) } else { None }; + out_x4[2] = if v2 > threshold { Some(Some(v2)) } else { None }; let v3 = self.get_val(idx_x4[3]); - out_x4[3] = if v3 > threshold { Some(v3) } else { None }; + out_x4[3] = if v3 > threshold { Some(Some(v3)) } else { None }; } let out_and_idx_chunks = output .chunks_exact_mut(4) @@ -163,20 +187,20 @@ pub trait ColumnValues: Send + Sync + DowncastSync { .zip(indexes.chunks_exact(4).remainder()); for (out, idx) in out_and_idx_chunks { let v = self.get_val(*idx); - *out = if v > threshold { Some(v) } else { None }; + *out = if v > threshold { Some(Some(v)) } else { None }; } } ValueRange::LessThan(threshold, _) => { let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4)); for (out_x4, idx_x4) in out_and_idx_chunks { let v0 = self.get_val(idx_x4[0]); - out_x4[0] = if v0 < threshold { Some(v0) } else { None }; + out_x4[0] = if v0 < threshold { Some(Some(v0)) } else { None }; let v1 = self.get_val(idx_x4[1]); - out_x4[1] = if v1 < threshold { Some(v1) } else { None }; + out_x4[1] = if v1 < threshold { Some(Some(v1)) } else { None }; let v2 = self.get_val(idx_x4[2]); - out_x4[2] = if v2 < threshold { Some(v2) } else { None }; + out_x4[2] = if v2 < threshold { Some(Some(v2)) } else { None }; let v3 = self.get_val(idx_x4[3]); - out_x4[3] = if v3 < threshold { Some(v3) } else { None }; + out_x4[3] = if v3 < threshold { Some(Some(v3)) } else { None }; } let out_and_idx_chunks = output .chunks_exact_mut(4) @@ -185,7 +209,7 @@ pub trait ColumnValues: Send + Sync + DowncastSync { .zip(indexes.chunks_exact(4).remainder()); for (out, idx) in out_and_idx_chunks { let v = self.get_val(*idx); - *out = if v < threshold { Some(v) } else { None }; + *out = if v < threshold { Some(Some(v)) } else { None }; } } } @@ -298,6 +322,16 @@ impl ColumnValues for EmptyColumnValues { fn num_vals(&self) -> u32 { 0 } + + fn get_vals_in_value_range( + &self, + indexes: &[u32], + output: &mut [Option>], + value_range: ValueRange, + ) { + let _ = (indexes, output, value_range); + panic!("Internal Error: Called get_vals_in_value_range of empty column.") + } } impl ColumnValues for Arc> { @@ -315,7 +349,7 @@ impl ColumnValues for Arc], + output: &mut [Option>], value_range: ValueRange, ) { self.as_ref() diff --git a/columnar/src/column_values/u64_based/bitpacked.rs b/columnar/src/column_values/u64_based/bitpacked.rs index 089cae152..c34f1cddc 100644 --- a/columnar/src/column_values/u64_based/bitpacked.rs +++ b/columnar/src/column_values/u64_based/bitpacked.rs @@ -110,12 +110,14 @@ impl ColumnValues for BitpackedReader { fn get_vals_in_value_range( &self, indexes: &[u32], - output: &mut [Option], + output: &mut [Option>], value_range: ValueRange, ) { match value_range { ValueRange::All => { - self.get_vals_opt(indexes, output); + for (out, idx) in output.iter_mut().zip(indexes) { + *out = Some(Some(self.get_val(*idx))); + } } ValueRange::Inclusive(range) => { if let Some(transformed_range) = @@ -124,7 +126,8 @@ impl ColumnValues for BitpackedReader { for (i, doc) in indexes.iter().enumerate() { let raw_val = self.unpack_val(*doc); if transformed_range.contains(&raw_val) { - output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val); + output[i] = + Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val)); } else { output[i] = None; } @@ -137,7 +140,9 @@ impl ColumnValues for BitpackedReader { } ValueRange::GreaterThan(threshold, _) => { if threshold < self.stats.min_value { - self.get_vals_opt(indexes, output); + for (out, idx) in output.iter_mut().zip(indexes) { + *out = Some(Some(self.get_val(*idx))); + } } else if threshold >= self.stats.max_value { for out in output.iter_mut() { *out = None; @@ -147,7 +152,8 @@ impl ColumnValues for BitpackedReader { for (i, doc) in indexes.iter().enumerate() { let raw_val = self.unpack_val(*doc); if raw_val > raw_threshold { - output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val); + output[i] = + Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val)); } else { output[i] = None; } @@ -156,7 +162,9 @@ impl ColumnValues for BitpackedReader { } ValueRange::LessThan(threshold, _) => { if threshold > self.stats.max_value { - self.get_vals_opt(indexes, output); + for (out, idx) in output.iter_mut().zip(indexes) { + *out = Some(Some(self.get_val(*idx))); + } } else if threshold <= self.stats.min_value { for out in output.iter_mut() { *out = None; @@ -188,7 +196,8 @@ impl ColumnValues for BitpackedReader { for (i, doc) in indexes.iter().enumerate() { let raw_val = self.unpack_val(*doc); if raw_val < raw_threshold { - output[i] = Some(self.stats.min_value + self.stats.gcd.get() * raw_val); + output[i] = + Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val)); } else { output[i] = None; } @@ -197,7 +206,6 @@ impl ColumnValues for BitpackedReader { } } } - fn get_row_ids_for_value_range( &self, range: ValueRange, diff --git a/src/collector/sort_key/sort_by_static_fast_value.rs b/src/collector/sort_key/sort_by_static_fast_value.rs index d333aad53..2f43b3a8f 100644 --- a/src/collector/sort_key/sort_by_static_fast_value.rs +++ b/src/collector/sort_key/sort_by_static_fast_value.rs @@ -72,6 +72,7 @@ impl SortKeyComputer for SortByStaticFastValue { sort_column, typ: PhantomData, buffer: Vec::new(), + fetch_buffer: Vec::new(), }) } } @@ -80,6 +81,7 @@ pub struct SortByFastValueSegmentSortKeyComputer { sort_column: Column, typ: PhantomData, buffer: Vec>, + fetch_buffer: Vec>>, } impl SegmentSortKeyComputer for SortByFastValueSegmentSortKeyComputer { @@ -93,9 +95,13 @@ impl SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu } fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec { - self.buffer.resize(docs.len(), None); + self.fetch_buffer.resize(docs.len(), None); self.sort_column - .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All); + .first_vals_in_value_range(docs, &mut self.fetch_buffer, ValueRange::All); + + self.buffer.clear(); + self.buffer + .extend(self.fetch_buffer.iter().map(|val| val.flatten())); &mut self.buffer } diff --git a/src/collector/sort_key/sort_by_string.rs b/src/collector/sort_key/sort_by_string.rs index 78f05549e..c1e28bcb1 100644 --- a/src/collector/sort_key/sort_by_string.rs +++ b/src/collector/sort_key/sort_by_string.rs @@ -41,6 +41,7 @@ impl SortKeyComputer for SortByString { Ok(ByStringColumnSegmentSortKeyComputer { str_column_opt, buffer: Vec::new(), + fetch_buffer: Vec::new(), }) } } @@ -48,6 +49,7 @@ impl SortKeyComputer for SortByString { pub struct ByStringColumnSegmentSortKeyComputer { str_column_opt: Option, buffer: Vec>, + fetch_buffer: Vec>>, } impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer { @@ -62,12 +64,17 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer { } fn segment_sort_keys(&mut self, docs: &[DocId]) -> &mut Vec { - self.buffer.resize(docs.len(), None); + self.fetch_buffer.resize(docs.len(), None); if let Some(str_column) = &self.str_column_opt { - str_column - .ords() - .first_vals_in_value_range(docs, &mut self.buffer, ValueRange::All); + str_column.ords().first_vals_in_value_range( + docs, + &mut self.fetch_buffer, + ValueRange::All, + ); } + self.buffer.clear(); + self.buffer + .extend(self.fetch_buffer.iter().map(|val| val.flatten())); &mut self.buffer }