mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 23:20:40 +00:00
Squash with Optional. WIP: Still needs work: we are allocating.
This commit is contained in:
@@ -46,18 +46,18 @@ fn bench_group(mut runner: InputGroup<Column>) {
|
||||
runner.register("access_first_vals", |column| {
|
||||
let mut sum = 0;
|
||||
const BLOCK_SIZE: usize = 32;
|
||||
let mut docs = vec![0; BLOCK_SIZE];
|
||||
let mut buffer = vec![None; BLOCK_SIZE];
|
||||
let mut docs = Vec::with_capacity(BLOCK_SIZE);
|
||||
let mut buffer = Vec::with_capacity(BLOCK_SIZE);
|
||||
for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
|
||||
// fill docs
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
docs.clear();
|
||||
for idx in 0..BLOCK_SIZE {
|
||||
docs[idx] = idx as u32 + i;
|
||||
docs.push(idx as u32 + i);
|
||||
}
|
||||
|
||||
column.first_vals_in_value_range(&docs, &mut buffer, ValueRange::All);
|
||||
buffer.clear();
|
||||
column.first_vals_in_value_range(&mut docs, &mut buffer, ValueRange::All);
|
||||
for val in buffer.iter() {
|
||||
let Some(Some(val)) = val else { continue };
|
||||
let Some(val) = val else { continue };
|
||||
sum += *val;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,15 +93,29 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
#[inline]
|
||||
pub fn first_vals_in_value_range(
|
||||
&self,
|
||||
docids: &[DocId],
|
||||
output: &mut [Option<Option<T>>],
|
||||
docids: &mut Vec<DocId>,
|
||||
values: &mut Vec<Option<T>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
match (&self.index, value_range) {
|
||||
(ColumnIndex::Empty { .. }, _) => {}
|
||||
(ColumnIndex::Empty { .. }, value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(_) => false,
|
||||
ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
if nulls_match {
|
||||
for _ in 0..docids.len() {
|
||||
values.push(None);
|
||||
}
|
||||
} else {
|
||||
docids.clear();
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Full, value_range) => {
|
||||
self.values
|
||||
.get_vals_in_value_range(docids, output, value_range);
|
||||
.get_vals_in_value_range(docids, values, value_range);
|
||||
}
|
||||
(ColumnIndex::Optional(optional_index), value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
@@ -111,100 +125,85 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
|
||||
let mut row_ids = Vec::with_capacity(docids.len());
|
||||
let mut output_indices = Vec::with_capacity(docids.len());
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let original_input_docids = std::mem::take(docids); // Take ownership to iterate and rebuild
|
||||
let mut temp_present_doc_ids = Vec::with_capacity(original_input_docids.len());
|
||||
let mut temp_row_ids = Vec::with_capacity(original_input_docids.len());
|
||||
|
||||
// Collect docids that have values and their corresponding row_ids
|
||||
for docid in original_input_docids.iter() {
|
||||
if let Some(row_id) = optional_index.rank_if_exists(*docid) {
|
||||
row_ids.push(row_id);
|
||||
output_indices.push(i);
|
||||
} else if nulls_match {
|
||||
output[i] = Some(None);
|
||||
} else {
|
||||
output[i] = None;
|
||||
temp_present_doc_ids.push(*docid);
|
||||
temp_row_ids.push(row_id);
|
||||
}
|
||||
}
|
||||
|
||||
if !row_ids.is_empty() {
|
||||
let mut values = vec![None; row_ids.len()];
|
||||
self.values
|
||||
.get_vals_in_value_range(&row_ids, &mut values, value_range);
|
||||
for (val, output_idx) in values.into_iter().zip(output_indices) {
|
||||
output[output_idx] = val;
|
||||
let mut temp_values_for_present_docs = Vec::with_capacity(temp_row_ids.len());
|
||||
// Batch process present values
|
||||
self.values.get_vals_in_value_range(
|
||||
&mut temp_row_ids,
|
||||
&mut temp_values_for_present_docs,
|
||||
value_range,
|
||||
);
|
||||
|
||||
// Now, rebuild the docids and values vectors, merging nulls_match
|
||||
let mut present_iter = temp_present_doc_ids
|
||||
.into_iter()
|
||||
.zip(temp_values_for_present_docs.into_iter())
|
||||
.peekable();
|
||||
|
||||
docids.clear();
|
||||
values.clear();
|
||||
|
||||
for docid_orig in original_input_docids.into_iter() {
|
||||
let mut is_present = false;
|
||||
if let Some((present_docid, _)) = present_iter.peek() {
|
||||
if docid_orig == present_docid {
|
||||
is_present = true;
|
||||
}
|
||||
}
|
||||
|
||||
if is_present {
|
||||
let (present_docid, present_value) = present_iter.next().unwrap();
|
||||
docids.push(present_docid);
|
||||
values.push(present_value);
|
||||
} else if nulls_match {
|
||||
docids.push(docid_orig);
|
||||
values.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Multivalued(multivalued_index), ValueRange::All) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let range = multivalued_index.range(*docid);
|
||||
let is_empty = range.start == range.end;
|
||||
if !is_empty {
|
||||
output[i] = Some(Some(self.values.get_val(range.start)));
|
||||
} else {
|
||||
output[i] = Some(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Multivalued(multivalued_index), ValueRange::Inclusive(range)) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let row_range = multivalued_index.range(*docid);
|
||||
(ColumnIndex::Multivalued(multivalued_index), value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(_) => false,
|
||||
ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
let mut write_head = 0;
|
||||
for i in 0..docids.len() {
|
||||
let docid = docids[i];
|
||||
let row_range = multivalued_index.range(docid);
|
||||
let is_empty = row_range.start == row_range.end;
|
||||
if !is_empty {
|
||||
let val = self.values.get_val(row_range.start);
|
||||
if range.contains(&val) {
|
||||
output[i] = Some(Some(val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
ColumnIndex::Multivalued(multivalued_index),
|
||||
ValueRange::GreaterThan(threshold, nulls_match),
|
||||
) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let row_range = multivalued_index.range(*docid);
|
||||
let is_empty = row_range.start == row_range.end;
|
||||
if !is_empty {
|
||||
let val = self.values.get_val(row_range.start);
|
||||
if val > threshold {
|
||||
output[i] = Some(Some(val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
} else {
|
||||
if nulls_match {
|
||||
output[i] = Some(None);
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
ColumnIndex::Multivalued(multivalued_index),
|
||||
ValueRange::LessThan(threshold, nulls_match),
|
||||
) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let row_range = multivalued_index.range(*docid);
|
||||
let is_empty = row_range.start == row_range.end;
|
||||
if !is_empty {
|
||||
let val = self.values.get_val(row_range.start);
|
||||
if val < threshold {
|
||||
output[i] = Some(Some(val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
}
|
||||
} else {
|
||||
if nulls_match {
|
||||
output[i] = Some(None);
|
||||
} else {
|
||||
output[i] = None;
|
||||
let matches = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(&val),
|
||||
ValueRange::GreaterThan(t, _) => val > *t,
|
||||
ValueRange::LessThan(t, _) => val < *t,
|
||||
};
|
||||
if matches {
|
||||
docids[write_head] = docid;
|
||||
values.push(Some(val));
|
||||
write_head += 1;
|
||||
}
|
||||
} else if nulls_match {
|
||||
docids[write_head] = docid;
|
||||
values.push(None);
|
||||
write_head += 1;
|
||||
}
|
||||
}
|
||||
docids.truncate(write_head);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,104 +115,171 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
/// The values are filtered by the provided value range.
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<Option<T>>],
|
||||
indexes: &mut Vec<u32>,
|
||||
output: &mut Vec<Option<T>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
assert!(indexes.len() == output.len());
|
||||
let mut write_head = 0;
|
||||
let mut read_head = 0;
|
||||
let len = indexes.len();
|
||||
|
||||
match value_range {
|
||||
ValueRange::All => {
|
||||
for (out, idx) in output.iter_mut().zip(indexes) {
|
||||
*out = Some(Some(self.get_val(*idx)));
|
||||
while read_head + 3 < len {
|
||||
let idx0 = indexes[read_head];
|
||||
let idx1 = indexes[read_head + 1];
|
||||
let idx2 = indexes[read_head + 2];
|
||||
let idx3 = indexes[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
indexes[write_head] = idx0;
|
||||
output.push(Some(val0));
|
||||
write_head += 1;
|
||||
indexes[write_head] = idx1;
|
||||
output.push(Some(val1));
|
||||
write_head += 1;
|
||||
indexes[write_head] = idx2;
|
||||
output.push(Some(val2));
|
||||
write_head += 1;
|
||||
indexes[write_head] = idx3;
|
||||
output.push(Some(val3));
|
||||
write_head += 1;
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::Inclusive(range) => {
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let v0 = self.get_val(idx_x4[0]);
|
||||
out_x4[0] = if range.contains(&v0) {
|
||||
Some(Some(v0))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let v1 = self.get_val(idx_x4[1]);
|
||||
out_x4[1] = if range.contains(&v1) {
|
||||
Some(Some(v1))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let v2 = self.get_val(idx_x4[2]);
|
||||
out_x4[2] = if range.contains(&v2) {
|
||||
Some(Some(v2))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let v3 = self.get_val(idx_x4[3]);
|
||||
out_x4[3] = if range.contains(&v3) {
|
||||
Some(Some(v3))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
let v = self.get_val(*idx);
|
||||
*out = if range.contains(&v) {
|
||||
Some(Some(v))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
ValueRange::Inclusive(ref range) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = indexes[read_head];
|
||||
let idx1 = indexes[read_head + 1];
|
||||
let idx2 = indexes[read_head + 2];
|
||||
let idx3 = indexes[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if range.contains(&val0) {
|
||||
indexes[write_head] = idx0;
|
||||
output.push(Some(val0));
|
||||
write_head += 1;
|
||||
}
|
||||
if range.contains(&val1) {
|
||||
indexes[write_head] = idx1;
|
||||
output.push(Some(val1));
|
||||
write_head += 1;
|
||||
}
|
||||
if range.contains(&val2) {
|
||||
indexes[write_head] = idx2;
|
||||
output.push(Some(val2));
|
||||
write_head += 1;
|
||||
}
|
||||
if range.contains(&val3) {
|
||||
indexes[write_head] = idx3;
|
||||
output.push(Some(val3));
|
||||
write_head += 1;
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let v0 = self.get_val(idx_x4[0]);
|
||||
out_x4[0] = if v0 > threshold { Some(Some(v0)) } else { None };
|
||||
let v1 = self.get_val(idx_x4[1]);
|
||||
out_x4[1] = if v1 > threshold { Some(Some(v1)) } else { None };
|
||||
let v2 = self.get_val(idx_x4[2]);
|
||||
out_x4[2] = if v2 > threshold { Some(Some(v2)) } else { None };
|
||||
let v3 = self.get_val(idx_x4[3]);
|
||||
out_x4[3] = if v3 > threshold { Some(Some(v3)) } else { None };
|
||||
}
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
let v = self.get_val(*idx);
|
||||
*out = if v > threshold { Some(Some(v)) } else { None };
|
||||
ValueRange::GreaterThan(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = indexes[read_head];
|
||||
let idx1 = indexes[read_head + 1];
|
||||
let idx2 = indexes[read_head + 2];
|
||||
let idx3 = indexes[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 > *threshold {
|
||||
indexes[write_head] = idx0;
|
||||
output.push(Some(val0));
|
||||
write_head += 1;
|
||||
}
|
||||
if val1 > *threshold {
|
||||
indexes[write_head] = idx1;
|
||||
output.push(Some(val1));
|
||||
write_head += 1;
|
||||
}
|
||||
if val2 > *threshold {
|
||||
indexes[write_head] = idx2;
|
||||
output.push(Some(val2));
|
||||
write_head += 1;
|
||||
}
|
||||
if val3 > *threshold {
|
||||
indexes[write_head] = idx3;
|
||||
output.push(Some(val3));
|
||||
write_head += 1;
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let v0 = self.get_val(idx_x4[0]);
|
||||
out_x4[0] = if v0 < threshold { Some(Some(v0)) } else { None };
|
||||
let v1 = self.get_val(idx_x4[1]);
|
||||
out_x4[1] = if v1 < threshold { Some(Some(v1)) } else { None };
|
||||
let v2 = self.get_val(idx_x4[2]);
|
||||
out_x4[2] = if v2 < threshold { Some(Some(v2)) } else { None };
|
||||
let v3 = self.get_val(idx_x4[3]);
|
||||
out_x4[3] = if v3 < threshold { Some(Some(v3)) } else { None };
|
||||
}
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
let v = self.get_val(*idx);
|
||||
*out = if v < threshold { Some(Some(v)) } else { None };
|
||||
ValueRange::LessThan(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = indexes[read_head];
|
||||
let idx1 = indexes[read_head + 1];
|
||||
let idx2 = indexes[read_head + 2];
|
||||
let idx3 = indexes[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 < *threshold {
|
||||
indexes[write_head] = idx0;
|
||||
output.push(Some(val0));
|
||||
write_head += 1;
|
||||
}
|
||||
if val1 < *threshold {
|
||||
indexes[write_head] = idx1;
|
||||
output.push(Some(val1));
|
||||
write_head += 1;
|
||||
}
|
||||
if val2 < *threshold {
|
||||
indexes[write_head] = idx2;
|
||||
output.push(Some(val2));
|
||||
write_head += 1;
|
||||
}
|
||||
if val3 < *threshold {
|
||||
indexes[write_head] = idx3;
|
||||
output.push(Some(val3));
|
||||
write_head += 1;
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Process remaining elements (0 to 3)
|
||||
while read_head < len {
|
||||
let idx = indexes[read_head];
|
||||
let val = self.get_val(idx);
|
||||
let matches = match value_range {
|
||||
// 'value_range' is still moved here. This is the outer `value_range`
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(ref r) => r.contains(&val),
|
||||
ValueRange::GreaterThan(ref t, _) => val > *t,
|
||||
ValueRange::LessThan(ref t, _) => val < *t,
|
||||
};
|
||||
if matches {
|
||||
indexes[write_head] = idx;
|
||||
output.push(Some(val));
|
||||
write_head += 1;
|
||||
}
|
||||
read_head += 1;
|
||||
}
|
||||
indexes.truncate(write_head);
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
@@ -325,8 +392,8 @@ impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
|
||||
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<Option<T>>],
|
||||
indexes: &mut Vec<u32>,
|
||||
output: &mut Vec<Option<T>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
let _ = (indexes, output, value_range);
|
||||
@@ -348,8 +415,8 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
|
||||
#[inline(always)]
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<Option<T>>],
|
||||
indexes: &mut Vec<u32>,
|
||||
output: &mut Vec<Option<T>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
self.as_ref()
|
||||
|
||||
@@ -109,82 +109,71 @@ impl ColumnValues for BitpackedReader {
|
||||
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
indexes: &[u32],
|
||||
output: &mut [Option<Option<u64>>],
|
||||
indexes: &mut Vec<u32>,
|
||||
output: &mut Vec<Option<u64>>,
|
||||
value_range: ValueRange<u64>,
|
||||
) {
|
||||
let mut write_head = 0;
|
||||
match value_range {
|
||||
ValueRange::All => {
|
||||
for (out, idx) in output.iter_mut().zip(indexes) {
|
||||
*out = Some(Some(self.get_val(*idx)));
|
||||
for i in 0..indexes.len() {
|
||||
let idx = indexes[i];
|
||||
indexes[write_head] = idx;
|
||||
output.push(Some(self.get_val(idx)));
|
||||
write_head += 1;
|
||||
}
|
||||
}
|
||||
ValueRange::Inclusive(range) => {
|
||||
if let Some(transformed_range) =
|
||||
transform_range_before_linear_transformation(&self.stats, range)
|
||||
{
|
||||
for (i, doc) in indexes.iter().enumerate() {
|
||||
let raw_val = self.unpack_val(*doc);
|
||||
for i in 0..indexes.len() {
|
||||
let doc = indexes[i];
|
||||
let raw_val = self.unpack_val(doc);
|
||||
if transformed_range.contains(&raw_val) {
|
||||
output[i] =
|
||||
Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
indexes[write_head] = doc;
|
||||
output
|
||||
.push(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
write_head += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for out in output.iter_mut() {
|
||||
*out = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
if threshold < self.stats.min_value {
|
||||
for (out, idx) in output.iter_mut().zip(indexes) {
|
||||
*out = Some(Some(self.get_val(*idx)));
|
||||
for i in 0..indexes.len() {
|
||||
let idx = indexes[i];
|
||||
indexes[write_head] = idx;
|
||||
output.push(Some(self.get_val(idx)));
|
||||
write_head += 1;
|
||||
}
|
||||
} else if threshold >= self.stats.max_value {
|
||||
for out in output.iter_mut() {
|
||||
*out = None;
|
||||
}
|
||||
// All filtered out
|
||||
} else {
|
||||
let raw_threshold = (threshold - self.stats.min_value) / self.stats.gcd.get();
|
||||
for (i, doc) in indexes.iter().enumerate() {
|
||||
let raw_val = self.unpack_val(*doc);
|
||||
for i in 0..indexes.len() {
|
||||
let doc = indexes[i];
|
||||
let raw_val = self.unpack_val(doc);
|
||||
if raw_val > raw_threshold {
|
||||
output[i] =
|
||||
Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
indexes[write_head] = doc;
|
||||
output
|
||||
.push(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
write_head += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
if threshold > self.stats.max_value {
|
||||
for (out, idx) in output.iter_mut().zip(indexes) {
|
||||
*out = Some(Some(self.get_val(*idx)));
|
||||
for i in 0..indexes.len() {
|
||||
let idx = indexes[i];
|
||||
indexes[write_head] = idx;
|
||||
output.push(Some(self.get_val(idx)));
|
||||
write_head += 1;
|
||||
}
|
||||
} else if threshold <= self.stats.min_value {
|
||||
for out in output.iter_mut() {
|
||||
*out = None;
|
||||
}
|
||||
// All filtered out
|
||||
} else {
|
||||
// val < threshold
|
||||
// min + gcd * raw < threshold
|
||||
// gcd * raw < threshold - min
|
||||
// raw < (threshold - min) / gcd
|
||||
// If (threshold - min) % gcd == 0, then strictly less.
|
||||
// If remainder != 0, e.g. gcd=10, min=0, threshold=15. raw < 1.5 => raw <= 1.
|
||||
// (15-0)/10 = 1. raw < 1? No, raw=1 => 10 < 15. Correct.
|
||||
// threshold=10. raw < 1. raw=0 => 0 < 10. Correct.
|
||||
// So integer division works for strictly less if exact?
|
||||
// 10 < 10 is false. 10/10 = 1. raw < 1 => raw=0. 0 < 10.
|
||||
// So raw < (threshold - min + gcd - 1) / gcd ?
|
||||
// No. raw_val * gcd < threshold - min.
|
||||
// raw_val < (threshold - min) / gcd (float).
|
||||
// integers: raw_val < ceil((threshold - min)/gcd)
|
||||
// raw_val < (threshold - min + gcd - 1) / gcd.
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
let raw_threshold = if diff % gcd == 0 {
|
||||
@@ -193,18 +182,20 @@ impl ColumnValues for BitpackedReader {
|
||||
diff / gcd + 1
|
||||
};
|
||||
|
||||
for (i, doc) in indexes.iter().enumerate() {
|
||||
let raw_val = self.unpack_val(*doc);
|
||||
for i in 0..indexes.len() {
|
||||
let doc = indexes[i];
|
||||
let raw_val = self.unpack_val(doc);
|
||||
if raw_val < raw_threshold {
|
||||
output[i] =
|
||||
Some(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
} else {
|
||||
output[i] = None;
|
||||
indexes[write_head] = doc;
|
||||
output
|
||||
.push(Some(self.stats.min_value + self.stats.gcd.get() * raw_val));
|
||||
write_head += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
indexes.truncate(write_head);
|
||||
}
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
|
||||
@@ -74,6 +74,7 @@ impl<T: FastValue> SortKeyComputer for SortByStaticFastValue<T> {
|
||||
typ: PhantomData,
|
||||
buffer: Vec::new(),
|
||||
fetch_buffer: Vec::new(),
|
||||
doc_buffer: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -82,7 +83,8 @@ pub struct SortByFastValueSegmentSortKeyComputer<T> {
|
||||
sort_column: Column<u64>,
|
||||
typ: PhantomData<T>,
|
||||
buffer: Vec<(DocId, Option<u64>)>,
|
||||
fetch_buffer: Vec<Option<Option<u64>>>,
|
||||
fetch_buffer: Vec<Option<u64>>,
|
||||
doc_buffer: Vec<DocId>,
|
||||
}
|
||||
|
||||
impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyComputer<T> {
|
||||
@@ -100,16 +102,19 @@ impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu
|
||||
docs: &[DocId],
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) -> &mut Vec<(DocId, Self::SegmentSortKey)> {
|
||||
self.fetch_buffer.resize(docs.len(), None);
|
||||
self.doc_buffer.clear();
|
||||
self.doc_buffer.extend_from_slice(docs);
|
||||
self.fetch_buffer.clear();
|
||||
let u64_filter = convert_optional_u64_range_to_u64_range(filter);
|
||||
self.sort_column
|
||||
.first_vals_in_value_range(docs, &mut self.fetch_buffer, u64_filter);
|
||||
self.sort_column.first_vals_in_value_range(
|
||||
&mut self.doc_buffer,
|
||||
&mut self.fetch_buffer,
|
||||
u64_filter,
|
||||
);
|
||||
|
||||
self.buffer.clear();
|
||||
for (&doc, val) in docs.iter().zip(self.fetch_buffer.iter()) {
|
||||
if let Some(val) = val {
|
||||
self.buffer.push((doc, *val));
|
||||
}
|
||||
for (&doc, &val) in self.doc_buffer.iter().zip(self.fetch_buffer.iter()) {
|
||||
self.buffer.push((doc, val));
|
||||
}
|
||||
&mut self.buffer
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ impl SortKeyComputer for SortByString {
|
||||
str_column_opt,
|
||||
buffer: Vec::new(),
|
||||
fetch_buffer: Vec::new(),
|
||||
doc_buffer: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -52,7 +53,8 @@ impl SortKeyComputer for SortByString {
|
||||
pub struct ByStringColumnSegmentSortKeyComputer {
|
||||
str_column_opt: Option<StrColumn>,
|
||||
buffer: Vec<(DocId, Option<TermOrdinal>)>,
|
||||
fetch_buffer: Vec<Option<Option<TermOrdinal>>>,
|
||||
fetch_buffer: Vec<Option<TermOrdinal>>,
|
||||
doc_buffer: Vec<DocId>,
|
||||
}
|
||||
|
||||
impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
@@ -71,23 +73,28 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
docs: &[DocId],
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) -> &mut Vec<(DocId, Self::SegmentSortKey)> {
|
||||
self.fetch_buffer.resize(docs.len(), None);
|
||||
self.doc_buffer.clear();
|
||||
self.doc_buffer.extend_from_slice(docs);
|
||||
self.fetch_buffer.clear();
|
||||
|
||||
if let Some(str_column) = &self.str_column_opt {
|
||||
let u64_filter = convert_optional_u64_range_to_u64_range(filter);
|
||||
str_column
|
||||
.ords()
|
||||
.first_vals_in_value_range(docs, &mut self.fetch_buffer, u64_filter);
|
||||
str_column.ords().first_vals_in_value_range(
|
||||
&mut self.doc_buffer,
|
||||
&mut self.fetch_buffer,
|
||||
u64_filter,
|
||||
);
|
||||
} else if range_contains_none(&filter) {
|
||||
self.fetch_buffer.fill(Some(None));
|
||||
for _ in 0..docs.len() {
|
||||
self.fetch_buffer.push(None);
|
||||
}
|
||||
} else {
|
||||
self.fetch_buffer.fill(None);
|
||||
self.doc_buffer.clear();
|
||||
}
|
||||
|
||||
self.buffer.clear();
|
||||
for (&doc, val) in docs.iter().zip(self.fetch_buffer.iter()) {
|
||||
if let Some(val) = val {
|
||||
self.buffer.push((doc, *val));
|
||||
}
|
||||
for (&doc, &val) in self.doc_buffer.iter().zip(self.fetch_buffer.iter()) {
|
||||
self.buffer.push((doc, val));
|
||||
}
|
||||
&mut self.buffer
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user