mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-08 18:12:55 +00:00
Compare commits
5 Commits
main
...
stuhood.la
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
db9e35e7ee | ||
|
|
7f39d5eab9 | ||
|
|
af53ffe5df | ||
|
|
041c6f01a3 | ||
|
|
9615eb73b8 |
@@ -16,7 +16,7 @@ stacker = { version= "0.6", path = "../stacker", package="tantivy-stacker"}
|
||||
sstable = { version= "0.6", path = "../sstable", package = "tantivy-sstable" }
|
||||
common = { version= "0.10", path = "../common", package = "tantivy-common" }
|
||||
tantivy-bitpacker = { version= "0.9", path = "../bitpacker/" }
|
||||
serde = "1.0.152"
|
||||
serde = { version = "1.0.152", features = ["derive"] }
|
||||
downcast-rs = "2.0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use binggan::{InputGroup, black_box};
|
||||
use common::*;
|
||||
use tantivy_columnar::Column;
|
||||
use tantivy_columnar::{Column, ValueRange};
|
||||
|
||||
pub mod common;
|
||||
|
||||
@@ -46,16 +46,16 @@ fn bench_group(mut runner: InputGroup<Column>) {
|
||||
runner.register("access_first_vals", |column| {
|
||||
let mut sum = 0;
|
||||
const BLOCK_SIZE: usize = 32;
|
||||
let mut docs = vec![0; BLOCK_SIZE];
|
||||
let mut buffer = vec![None; BLOCK_SIZE];
|
||||
let mut docs = Vec::with_capacity(BLOCK_SIZE);
|
||||
let mut buffer = Vec::with_capacity(BLOCK_SIZE);
|
||||
for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
|
||||
// fill docs
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
docs.clear();
|
||||
for idx in 0..BLOCK_SIZE {
|
||||
docs[idx] = idx as u32 + i;
|
||||
docs.push(idx as u32 + i);
|
||||
}
|
||||
|
||||
column.first_vals(&docs, &mut buffer);
|
||||
buffer.clear();
|
||||
column.first_vals_in_value_range(&mut docs, &mut buffer, ValueRange::All);
|
||||
for val in buffer.iter() {
|
||||
let Some(val) = val else { continue };
|
||||
sum += *val;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
mod dictionary_encoded;
|
||||
mod serialize;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::fmt::{self, Debug};
|
||||
use std::io::Write;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
@@ -19,6 +20,11 @@ use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
|
||||
use crate::column_values::{ColumnValues, monotonic_map_column};
|
||||
use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
|
||||
|
||||
thread_local! {
|
||||
static ROWS: RefCell<Vec<RowId>> = const { RefCell::new(Vec::new()) };
|
||||
static DOCS: RefCell<Vec<DocId>> = const { RefCell::new(Vec::new()) };
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Column<T = u64> {
|
||||
pub index: ColumnIndex,
|
||||
@@ -89,31 +95,6 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
self.values_for_doc(row_id).next()
|
||||
}
|
||||
|
||||
/// Load the first value for each docid in the provided slice.
|
||||
#[inline]
|
||||
pub fn first_vals(&self, docids: &[DocId], output: &mut [Option<T>]) {
|
||||
match &self.index {
|
||||
ColumnIndex::Empty { .. } => {}
|
||||
ColumnIndex::Full => self.values.get_vals_opt(docids, output),
|
||||
ColumnIndex::Optional(optional_index) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
output[i] = optional_index
|
||||
.rank_if_exists(*docid)
|
||||
.map(|rowid| self.values.get_val(rowid));
|
||||
}
|
||||
}
|
||||
ColumnIndex::Multivalued(multivalued_index) => {
|
||||
for (i, docid) in docids.iter().enumerate() {
|
||||
let range = multivalued_index.range(*docid);
|
||||
let is_empty = range.start == range.end;
|
||||
if !is_empty {
|
||||
output[i] = Some(self.values.get_val(range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a block of docids to row_ids.
|
||||
///
|
||||
/// returns the row_ids and the matching docids on the same index
|
||||
@@ -143,7 +124,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
#[inline]
|
||||
pub fn get_docids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
value_range: ValueRange<T>,
|
||||
selected_docid_range: Range<u32>,
|
||||
doc_ids: &mut Vec<u32>,
|
||||
) {
|
||||
@@ -168,6 +149,194 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
|
||||
}
|
||||
}
|
||||
|
||||
// Separate impl block for methods requiring `Default` for `T`.
|
||||
impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
|
||||
/// Load the first value for each docid in the provided slice.
|
||||
///
|
||||
/// The `docids` vector is mutated: documents that do not match the `value_range` are removed.
|
||||
/// The `values` vector is populated with the values of the remaining documents.
|
||||
#[inline]
|
||||
pub fn first_vals_in_value_range(
|
||||
&self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<crate::ComparableDoc<Option<T>, DocId>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
match (&self.index, value_range) {
|
||||
(ColumnIndex::Empty { .. }, value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(_) => false,
|
||||
ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
if nulls_match {
|
||||
for &doc in input_docs {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Full, value_range) => {
|
||||
self.values
|
||||
.get_vals_in_value_range(input_docs, input_docs, output, value_range);
|
||||
}
|
||||
(ColumnIndex::Optional(optional_index), value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(_) => false,
|
||||
ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
|
||||
let fallback_needed = ROWS.with(|rows_cell| {
|
||||
DOCS.with(|docs_cell| {
|
||||
let mut rows = rows_cell.borrow_mut();
|
||||
let mut docs = docs_cell.borrow_mut();
|
||||
rows.clear();
|
||||
docs.clear();
|
||||
|
||||
let mut has_nulls = false;
|
||||
|
||||
for &doc_id in input_docs {
|
||||
if let Some(row_id) = optional_index.rank_if_exists(doc_id) {
|
||||
rows.push(row_id);
|
||||
docs.push(doc_id);
|
||||
} else {
|
||||
has_nulls = true;
|
||||
if nulls_match {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !has_nulls || !nulls_match {
|
||||
self.values.get_vals_in_value_range(
|
||||
&rows,
|
||||
&docs,
|
||||
output,
|
||||
value_range.clone(),
|
||||
);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
})
|
||||
});
|
||||
|
||||
if fallback_needed {
|
||||
for &doc_id in input_docs {
|
||||
if let Some(row_id) = optional_index.rank_if_exists(doc_id) {
|
||||
let val = self.values.get_val(row_id);
|
||||
let value_matches = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(&val),
|
||||
ValueRange::GreaterThan(t, _) => val > *t,
|
||||
ValueRange::GreaterThanOrEqual(t, _) => val >= *t,
|
||||
ValueRange::LessThan(t, _) => val < *t,
|
||||
ValueRange::LessThanOrEqual(t, _) => val <= *t,
|
||||
};
|
||||
|
||||
if value_matches {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc_id,
|
||||
sort_key: Some(val),
|
||||
});
|
||||
}
|
||||
} else if nulls_match {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc_id,
|
||||
sort_key: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
(ColumnIndex::Multivalued(multivalued_index), value_range) => {
|
||||
let nulls_match = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(_) => false,
|
||||
ValueRange::GreaterThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::GreaterThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThan(_, nulls_match) => *nulls_match,
|
||||
ValueRange::LessThanOrEqual(_, nulls_match) => *nulls_match,
|
||||
};
|
||||
for i in 0..input_docs.len() {
|
||||
let docid = input_docs[i];
|
||||
let row_range = multivalued_index.range(docid);
|
||||
let is_empty = row_range.start == row_range.end;
|
||||
if !is_empty {
|
||||
let val = self.values.get_val(row_range.start);
|
||||
let matches = match &value_range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(&val),
|
||||
ValueRange::GreaterThan(t, _) => val > *t,
|
||||
ValueRange::GreaterThanOrEqual(t, _) => val >= *t,
|
||||
ValueRange::LessThan(t, _) => val < *t,
|
||||
ValueRange::LessThanOrEqual(t, _) => val <= *t,
|
||||
};
|
||||
if matches {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: docid,
|
||||
sort_key: Some(val),
|
||||
});
|
||||
}
|
||||
} else if nulls_match {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: docid,
|
||||
sort_key: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A range of values.
|
||||
///
|
||||
/// This type is intended to be used in batch APIs, where the cost of unpacking the enum
|
||||
/// is outweighed by the time spent processing a batch.
|
||||
///
|
||||
/// Implementers should pattern match on the variants to use optimized loops for each case.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ValueRange<T> {
|
||||
/// A range that includes both start and end.
|
||||
Inclusive(RangeInclusive<T>),
|
||||
/// A range that matches all values.
|
||||
All,
|
||||
/// A range that matches all values greater than the threshold.
|
||||
/// The boolean flag indicates if null values should be included.
|
||||
GreaterThan(T, bool),
|
||||
/// A range that matches all values greater than or equal to the threshold.
|
||||
/// The boolean flag indicates if null values should be included.
|
||||
GreaterThanOrEqual(T, bool),
|
||||
/// A range that matches all values less than the threshold.
|
||||
/// The boolean flag indicates if null values should be included.
|
||||
LessThan(T, bool),
|
||||
/// A range that matches all values less than or equal to the threshold.
|
||||
/// The boolean flag indicates if null values should be included.
|
||||
LessThanOrEqual(T, bool),
|
||||
}
|
||||
|
||||
impl<T: PartialOrd> ValueRange<T> {
|
||||
pub fn intersects(&self, min: T, max: T) -> bool {
|
||||
match self {
|
||||
ValueRange::Inclusive(range) => *range.start() <= max && *range.end() >= min,
|
||||
ValueRange::All => true,
|
||||
ValueRange::GreaterThan(val, _) => max > *val,
|
||||
ValueRange::GreaterThanOrEqual(val, _) => max >= *val,
|
||||
ValueRange::LessThan(val, _) => min < *val,
|
||||
ValueRange::LessThanOrEqual(val, _) => min <= *val,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for Cardinality {
|
||||
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> std::io::Result<()> {
|
||||
self.to_code().serialize(writer)
|
||||
|
||||
@@ -333,7 +333,7 @@ mod tests {
|
||||
use std::ops::Range;
|
||||
|
||||
use super::MultiValueIndex;
|
||||
use crate::{ColumnarReader, DynamicColumn};
|
||||
use crate::{ColumnarReader, DynamicColumn, ValueRange};
|
||||
|
||||
fn index_to_pos_helper(
|
||||
index: &MultiValueIndex,
|
||||
@@ -413,7 +413,7 @@ mod tests {
|
||||
assert_eq!(row_id_range, 0..4);
|
||||
|
||||
let check = |range, expected| {
|
||||
let full_range = 0..=u64::MAX;
|
||||
let full_range = ValueRange::All;
|
||||
let mut docids = Vec::new();
|
||||
column.get_docids_for_value_range(full_range, range, &mut docids);
|
||||
assert_eq!(docids, expected);
|
||||
|
||||
@@ -7,13 +7,15 @@
|
||||
//! - Monotonically map values to u64/u128
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use downcast_rs::DowncastSync;
|
||||
pub use monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
|
||||
pub use monotonic_mapping_u128::MonotonicallyMappableToU128;
|
||||
|
||||
use crate::column::ValueRange;
|
||||
|
||||
mod merge;
|
||||
pub(crate) mod monotonic_mapping;
|
||||
pub(crate) mod monotonic_mapping_u128;
|
||||
@@ -109,6 +111,307 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
}
|
||||
}
|
||||
|
||||
/// Load the values for the provided docids.
|
||||
///
|
||||
/// The values are filtered by the provided value range.
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
input_indexes: &[u32],
|
||||
input_doc_ids: &[u32],
|
||||
output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
let len = input_indexes.len();
|
||||
let mut read_head = 0;
|
||||
|
||||
match value_range {
|
||||
ValueRange::All => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::Inclusive(ref range) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if range.contains(&val0) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
}
|
||||
if range.contains(&val1) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
}
|
||||
if range.contains(&val2) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
}
|
||||
if range.contains(&val3) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThan(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 > *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
}
|
||||
if val1 > *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
}
|
||||
if val2 > *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
}
|
||||
if val3 > *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 >= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
}
|
||||
if val1 >= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
}
|
||||
if val2 >= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
}
|
||||
if val3 >= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 < *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
}
|
||||
if val1 < *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
}
|
||||
if val2 < *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
}
|
||||
if val3 < *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
ValueRange::LessThanOrEqual(ref threshold, _) => {
|
||||
while read_head + 3 < len {
|
||||
let idx0 = input_indexes[read_head];
|
||||
let idx1 = input_indexes[read_head + 1];
|
||||
let idx2 = input_indexes[read_head + 2];
|
||||
let idx3 = input_indexes[read_head + 3];
|
||||
|
||||
let doc0 = input_doc_ids[read_head];
|
||||
let doc1 = input_doc_ids[read_head + 1];
|
||||
let doc2 = input_doc_ids[read_head + 2];
|
||||
let doc3 = input_doc_ids[read_head + 3];
|
||||
|
||||
let val0 = self.get_val(idx0);
|
||||
let val1 = self.get_val(idx1);
|
||||
let val2 = self.get_val(idx2);
|
||||
let val3 = self.get_val(idx3);
|
||||
|
||||
if val0 <= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc0,
|
||||
sort_key: Some(val0),
|
||||
});
|
||||
}
|
||||
if val1 <= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc1,
|
||||
sort_key: Some(val1),
|
||||
});
|
||||
}
|
||||
if val2 <= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc2,
|
||||
sort_key: Some(val2),
|
||||
});
|
||||
}
|
||||
if val3 <= *threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc: doc3,
|
||||
sort_key: Some(val3),
|
||||
});
|
||||
}
|
||||
|
||||
read_head += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Process remaining elements (0 to 3)
|
||||
while read_head < len {
|
||||
let idx = input_indexes[read_head];
|
||||
let doc = input_doc_ids[read_head];
|
||||
let val = self.get_val(idx);
|
||||
let matches = match value_range {
|
||||
// 'value_range' is still moved here. This is the outer `value_range`
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(ref r) => r.contains(&val),
|
||||
ValueRange::GreaterThan(ref t, _) => val > *t,
|
||||
ValueRange::GreaterThanOrEqual(ref t, _) => val >= *t,
|
||||
ValueRange::LessThan(ref t, _) => val < *t,
|
||||
ValueRange::LessThanOrEqual(ref t, _) => val <= *t,
|
||||
};
|
||||
if matches {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(val),
|
||||
});
|
||||
}
|
||||
read_head += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Fills an output buffer with the fast field values
|
||||
/// associated with the `DocId` going from
|
||||
/// `start` to `start + output.len()`.
|
||||
@@ -129,15 +432,54 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
/// Note that position == docid for single value fast fields
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<T>,
|
||||
value_range: ValueRange<T>,
|
||||
row_id_range: Range<RowId>,
|
||||
row_id_hits: &mut Vec<RowId>,
|
||||
) {
|
||||
let row_id_range = row_id_range.start..row_id_range.end.min(self.num_vals());
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if value_range.contains(&val) {
|
||||
row_id_hits.push(idx);
|
||||
match value_range {
|
||||
ValueRange::Inclusive(range) => {
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if range.contains(&val) {
|
||||
row_id_hits.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if val > threshold {
|
||||
row_id_hits.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if val >= threshold {
|
||||
row_id_hits.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if val < threshold {
|
||||
row_id_hits.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
for idx in row_id_range {
|
||||
let val = self.get_val(idx);
|
||||
if val <= threshold {
|
||||
row_id_hits.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::All => {
|
||||
row_id_hits.extend(row_id_range);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,6 +535,17 @@ impl<T: PartialOrd + Default> ColumnValues<T> for EmptyColumnValues {
|
||||
fn num_vals(&self) -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
input_indexes: &[u32],
|
||||
input_doc_ids: &[u32],
|
||||
output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
let _ = (input_indexes, input_doc_ids, output, value_range);
|
||||
panic!("Internal Error: Called get_vals_in_value_range of empty column.")
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnValues<T>> {
|
||||
@@ -206,6 +559,18 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
|
||||
self.as_ref().get_vals_opt(indexes, output)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
input_indexes: &[u32],
|
||||
input_doc_ids: &[u32],
|
||||
output: &mut Vec<crate::ComparableDoc<Option<T>, crate::DocId>>,
|
||||
value_range: ValueRange<T>,
|
||||
) {
|
||||
self.as_ref()
|
||||
.get_vals_in_value_range(input_indexes, input_doc_ids, output, value_range)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn min_value(&self) -> T {
|
||||
self.as_ref().min_value()
|
||||
@@ -234,7 +599,7 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
|
||||
#[inline(always)]
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
range: RangeInclusive<T>,
|
||||
range: ValueRange<T>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::ColumnValues;
|
||||
use crate::column::ValueRange;
|
||||
use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
|
||||
|
||||
struct MonotonicMappingColumn<C, T, Input> {
|
||||
@@ -80,16 +81,52 @@ where
|
||||
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
range: RangeInclusive<Output>,
|
||||
range: ValueRange<Output>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
self.from_column.get_row_ids_for_value_range(
|
||||
self.monotonic_mapping.inverse(range.start().clone())
|
||||
..=self.monotonic_mapping.inverse(range.end().clone()),
|
||||
doc_id_range,
|
||||
positions,
|
||||
)
|
||||
match range {
|
||||
ValueRange::Inclusive(range) => self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::Inclusive(
|
||||
self.monotonic_mapping.inverse(range.start().clone())
|
||||
..=self.monotonic_mapping.inverse(range.end().clone()),
|
||||
),
|
||||
doc_id_range,
|
||||
positions,
|
||||
),
|
||||
ValueRange::All => self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::All,
|
||||
doc_id_range,
|
||||
positions,
|
||||
),
|
||||
ValueRange::GreaterThan(threshold, _) => self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::GreaterThan(self.monotonic_mapping.inverse(threshold), false),
|
||||
doc_id_range,
|
||||
positions,
|
||||
),
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::GreaterThanOrEqual(
|
||||
self.monotonic_mapping.inverse(threshold),
|
||||
false,
|
||||
),
|
||||
doc_id_range,
|
||||
positions,
|
||||
)
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::LessThan(self.monotonic_mapping.inverse(threshold), false),
|
||||
doc_id_range,
|
||||
positions,
|
||||
),
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
self.from_column.get_row_ids_for_value_range(
|
||||
ValueRange::LessThanOrEqual(self.monotonic_mapping.inverse(threshold), false),
|
||||
doc_id_range,
|
||||
positions,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We voluntarily do not implement get_range as it yields a regression,
|
||||
|
||||
@@ -25,6 +25,7 @@ use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker};
|
||||
|
||||
use crate::RowId;
|
||||
use crate::column::ValueRange;
|
||||
use crate::column_values::ColumnValues;
|
||||
|
||||
/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
|
||||
@@ -338,14 +339,48 @@ impl ColumnValues<u64> for CompactSpaceU64Accessor {
|
||||
#[inline]
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<u64>,
|
||||
value_range: ValueRange<u64>,
|
||||
position_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let value_range = self.0.compact_to_u128(*value_range.start() as u32)
|
||||
..=self.0.compact_to_u128(*value_range.end() as u32);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
match value_range {
|
||||
ValueRange::Inclusive(value_range) => {
|
||||
let value_range = ValueRange::Inclusive(
|
||||
self.0.compact_to_u128(*value_range.start() as u32)
|
||||
..=self.0.compact_to_u128(*value_range.end() as u32),
|
||||
);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
}
|
||||
ValueRange::All => {
|
||||
let position_range = position_range.start..position_range.end.min(self.num_vals());
|
||||
positions.extend(position_range);
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
let value_range =
|
||||
ValueRange::GreaterThan(self.0.compact_to_u128(threshold as u32), false);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
let value_range =
|
||||
ValueRange::GreaterThanOrEqual(self.0.compact_to_u128(threshold as u32), false);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
let value_range =
|
||||
ValueRange::LessThan(self.0.compact_to_u128(threshold as u32), false);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
}
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
let value_range =
|
||||
ValueRange::LessThanOrEqual(self.0.compact_to_u128(threshold as u32), false);
|
||||
self.0
|
||||
.get_row_ids_for_value_range(value_range, position_range, positions)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -375,10 +410,47 @@ impl ColumnValues<u128> for CompactSpaceDecompressor {
|
||||
#[inline]
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
value_range: RangeInclusive<u128>,
|
||||
value_range: ValueRange<u128>,
|
||||
position_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let value_range = match value_range {
|
||||
ValueRange::Inclusive(value_range) => value_range,
|
||||
ValueRange::All => {
|
||||
let position_range = position_range.start..position_range.end.min(self.num_vals());
|
||||
positions.extend(position_range);
|
||||
return;
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
let max = self.max_value();
|
||||
if threshold >= max {
|
||||
return;
|
||||
}
|
||||
(threshold + 1)..=max
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
let max = self.max_value();
|
||||
if threshold > max {
|
||||
return;
|
||||
}
|
||||
threshold..=max
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
let min = self.min_value();
|
||||
if threshold <= min {
|
||||
return;
|
||||
}
|
||||
min..=(threshold - 1)
|
||||
}
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
let min = self.min_value();
|
||||
if threshold < min {
|
||||
return;
|
||||
}
|
||||
min..=threshold
|
||||
}
|
||||
};
|
||||
|
||||
if value_range.start() > value_range.end() {
|
||||
return;
|
||||
}
|
||||
@@ -560,7 +632,7 @@ mod tests {
|
||||
.collect::<Vec<_>>();
|
||||
let mut positions = Vec::new();
|
||||
decompressor.get_row_ids_for_value_range(
|
||||
range,
|
||||
ValueRange::Inclusive(range),
|
||||
0..decompressor.num_vals(),
|
||||
&mut positions,
|
||||
);
|
||||
@@ -604,7 +676,11 @@ mod tests {
|
||||
let val = *val;
|
||||
let pos = pos as u32;
|
||||
let mut positions = Vec::new();
|
||||
decomp.get_row_ids_for_value_range(val..=val, pos..pos + 1, &mut positions);
|
||||
decomp.get_row_ids_for_value_range(
|
||||
ValueRange::Inclusive(val..=val),
|
||||
pos..pos + 1,
|
||||
&mut positions,
|
||||
);
|
||||
assert_eq!(positions, vec![pos]);
|
||||
}
|
||||
|
||||
@@ -746,7 +822,11 @@ mod tests {
|
||||
doc_id_range: Range<u32>,
|
||||
) -> Vec<u32> {
|
||||
let mut positions = Vec::new();
|
||||
column.get_row_ids_for_value_range(value_range, doc_id_range, &mut positions);
|
||||
column.get_row_ids_for_value_range(
|
||||
ValueRange::Inclusive(value_range),
|
||||
doc_id_range,
|
||||
&mut positions,
|
||||
);
|
||||
positions
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ use common::{BinarySerializable, OwnedBytes};
|
||||
use fastdivide::DividerU64;
|
||||
use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
|
||||
|
||||
use crate::column::ValueRange;
|
||||
use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
|
||||
use crate::{ColumnValues, RowId};
|
||||
|
||||
@@ -66,24 +67,273 @@ impl ColumnValues for BitpackedReader {
|
||||
self.stats.num_rows
|
||||
}
|
||||
|
||||
fn get_vals_in_value_range(
|
||||
&self,
|
||||
input_indexes: &[u32],
|
||||
input_doc_ids: &[u32],
|
||||
output: &mut Vec<crate::ComparableDoc<Option<u64>, crate::DocId>>,
|
||||
value_range: ValueRange<u64>,
|
||||
) {
|
||||
match value_range {
|
||||
ValueRange::All => {
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(self.get_val(idx)),
|
||||
});
|
||||
}
|
||||
}
|
||||
ValueRange::Inclusive(range) => {
|
||||
if let Some(transformed_range) =
|
||||
transform_range_before_linear_transformation(&self.stats, range)
|
||||
{
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
let raw_val = self.get_val(idx);
|
||||
if transformed_range.contains(&raw_val) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(
|
||||
self.stats.min_value + self.stats.gcd.get() * raw_val,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
if threshold < self.stats.min_value {
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(self.get_val(idx)),
|
||||
});
|
||||
}
|
||||
} else if threshold >= self.stats.max_value {
|
||||
// All filtered out
|
||||
} else {
|
||||
let raw_threshold = (threshold - self.stats.min_value) / self.stats.gcd.get();
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
let raw_val = self.get_val(idx);
|
||||
if raw_val > raw_threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(
|
||||
self.stats.min_value + self.stats.gcd.get() * raw_val,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
if threshold <= self.stats.min_value {
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(self.get_val(idx)),
|
||||
});
|
||||
}
|
||||
} else if threshold > self.stats.max_value {
|
||||
// All filtered out
|
||||
} else {
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
let raw_threshold = (diff + gcd - 1) / gcd;
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
let raw_val = self.get_val(idx);
|
||||
if raw_val >= raw_threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(
|
||||
self.stats.min_value + self.stats.gcd.get() * raw_val,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
if threshold > self.stats.max_value {
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(self.get_val(idx)),
|
||||
});
|
||||
}
|
||||
} else if threshold <= self.stats.min_value {
|
||||
// All filtered out
|
||||
} else {
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
let raw_threshold = if diff % gcd == 0 {
|
||||
diff / gcd
|
||||
} else {
|
||||
diff / gcd + 1
|
||||
};
|
||||
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
let raw_val = self.get_val(idx);
|
||||
if raw_val < raw_threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(
|
||||
self.stats.min_value + self.stats.gcd.get() * raw_val,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
if threshold >= self.stats.max_value {
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(self.get_val(idx)),
|
||||
});
|
||||
}
|
||||
} else if threshold < self.stats.min_value {
|
||||
// All filtered out
|
||||
} else {
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
let raw_threshold = diff / gcd;
|
||||
|
||||
for (&idx, &doc) in input_indexes.iter().zip(input_doc_ids.iter()) {
|
||||
let raw_val = self.get_val(idx);
|
||||
if raw_val <= raw_threshold {
|
||||
output.push(crate::ComparableDoc {
|
||||
doc,
|
||||
sort_key: Some(
|
||||
self.stats.min_value + self.stats.gcd.get() * raw_val,
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn get_row_ids_for_value_range(
|
||||
&self,
|
||||
range: RangeInclusive<u64>,
|
||||
range: ValueRange<u64>,
|
||||
doc_id_range: Range<u32>,
|
||||
positions: &mut Vec<u32>,
|
||||
) {
|
||||
let Some(transformed_range) =
|
||||
transform_range_before_linear_transformation(&self.stats, range)
|
||||
else {
|
||||
positions.clear();
|
||||
return;
|
||||
};
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
match range {
|
||||
ValueRange::All => {
|
||||
positions.extend(doc_id_range);
|
||||
return;
|
||||
}
|
||||
ValueRange::Inclusive(range) => {
|
||||
let Some(transformed_range) =
|
||||
transform_range_before_linear_transformation(&self.stats, range)
|
||||
else {
|
||||
positions.clear();
|
||||
return;
|
||||
};
|
||||
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
}
|
||||
ValueRange::GreaterThan(threshold, _) => {
|
||||
if threshold < self.stats.min_value {
|
||||
positions.extend(doc_id_range);
|
||||
return;
|
||||
}
|
||||
if threshold >= self.stats.max_value {
|
||||
return;
|
||||
}
|
||||
let raw_threshold = (threshold - self.stats.min_value) / self.stats.gcd.get();
|
||||
let max_raw = (self.stats.max_value - self.stats.min_value) / self.stats.gcd.get();
|
||||
let transformed_range = (raw_threshold + 1)..=max_raw;
|
||||
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(threshold, _) => {
|
||||
if threshold <= self.stats.min_value {
|
||||
positions.extend(doc_id_range);
|
||||
return;
|
||||
}
|
||||
if threshold > self.stats.max_value {
|
||||
return;
|
||||
}
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
let raw_threshold = (diff + gcd - 1) / gcd;
|
||||
// We want raw >= raw_threshold.
|
||||
let max_raw = (self.stats.max_value - self.stats.min_value) / self.stats.gcd.get();
|
||||
let transformed_range = raw_threshold..=max_raw;
|
||||
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
}
|
||||
ValueRange::LessThan(threshold, _) => {
|
||||
if threshold > self.stats.max_value {
|
||||
positions.extend(doc_id_range);
|
||||
return;
|
||||
}
|
||||
if threshold <= self.stats.min_value {
|
||||
return;
|
||||
}
|
||||
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
// We want raw < raw_threshold_limit
|
||||
// raw <= raw_threshold_limit - 1
|
||||
let raw_threshold_limit = if diff % gcd == 0 {
|
||||
diff / gcd
|
||||
} else {
|
||||
diff / gcd + 1
|
||||
};
|
||||
|
||||
if raw_threshold_limit == 0 {
|
||||
return;
|
||||
}
|
||||
let transformed_range = 0..=(raw_threshold_limit - 1);
|
||||
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
}
|
||||
ValueRange::LessThanOrEqual(threshold, _) => {
|
||||
if threshold >= self.stats.max_value {
|
||||
positions.extend(doc_id_range);
|
||||
return;
|
||||
}
|
||||
if threshold < self.stats.min_value {
|
||||
return;
|
||||
}
|
||||
let diff = threshold - self.stats.min_value;
|
||||
let gcd = self.stats.gcd.get();
|
||||
// We want raw <= raw_threshold.
|
||||
let raw_threshold = diff / gcd;
|
||||
let transformed_range = 0..=raw_threshold;
|
||||
|
||||
self.bit_unpacker.get_ids_for_value_range(
|
||||
transformed_range,
|
||||
doc_id_range,
|
||||
&self.data,
|
||||
positions,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ pub(crate) fn create_and_validate<TColumnCodec: ColumnCodec>(
|
||||
.collect();
|
||||
let mut positions = Vec::new();
|
||||
reader.get_row_ids_for_value_range(
|
||||
vals[test_rand_idx]..=vals[test_rand_idx],
|
||||
crate::column::ValueRange::Inclusive(vals[test_rand_idx]..=vals[test_rand_idx]),
|
||||
0..vals.len() as u32,
|
||||
&mut positions,
|
||||
);
|
||||
|
||||
@@ -29,6 +29,7 @@ mod column;
|
||||
pub mod column_index;
|
||||
pub mod column_values;
|
||||
mod columnar;
|
||||
mod comparable_doc;
|
||||
mod dictionary;
|
||||
mod dynamic_column;
|
||||
mod iterable;
|
||||
@@ -36,7 +37,7 @@ pub(crate) mod utils;
|
||||
mod value;
|
||||
|
||||
pub use block_accessor::ColumnBlockAccessor;
|
||||
pub use column::{BytesColumn, Column, StrColumn};
|
||||
pub use column::{BytesColumn, Column, StrColumn, ValueRange};
|
||||
pub use column_index::ColumnIndex;
|
||||
pub use column_values::{
|
||||
ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
|
||||
@@ -45,6 +46,7 @@ pub use columnar::{
|
||||
CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
|
||||
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
|
||||
};
|
||||
pub use comparable_doc::ComparableDoc;
|
||||
use sstable::VoidSSTable;
|
||||
pub use value::{NumericalType, NumericalValue};
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn};
|
||||
use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn, ValueRange};
|
||||
use common::json_path_writer::JSON_PATH_SEGMENT_SEP_STR;
|
||||
use common::DateTime;
|
||||
use regex::Regex;
|
||||
@@ -16,7 +17,7 @@ use crate::aggregation::intermediate_agg_result::{
|
||||
};
|
||||
use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
|
||||
use crate::aggregation::AggregationError;
|
||||
use crate::collector::sort_key::ReverseComparator;
|
||||
use crate::collector::sort_key::{Comparator, ReverseComparator};
|
||||
use crate::collector::TopNComputer;
|
||||
use crate::schema::OwnedValue;
|
||||
use crate::{DocAddress, DocId, SegmentOrdinal};
|
||||
@@ -383,7 +384,7 @@ impl From<FastFieldValue> for OwnedValue {
|
||||
|
||||
/// Holds a fast field value in its u64 representation, and the order in which it should be sorted.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
struct DocValueAndOrder {
|
||||
pub(crate) struct DocValueAndOrder {
|
||||
/// A fast field value in its u64 representation.
|
||||
value: Option<u64>,
|
||||
/// Sort order for the value
|
||||
@@ -455,6 +456,37 @@ impl PartialEq for DocSortValuesAndFields {
|
||||
|
||||
impl Eq for DocSortValuesAndFields {}
|
||||
|
||||
impl Comparator<DocSortValuesAndFields> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &DocSortValuesAndFields, rhs: &DocSortValuesAndFields) -> Ordering {
|
||||
rhs.cmp(lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: DocSortValuesAndFields,
|
||||
) -> ValueRange<DocSortValuesAndFields> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct TopHitsSegmentSortKey(pub Vec<DocValueAndOrder>);
|
||||
|
||||
impl Comparator<TopHitsSegmentSortKey> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &TopHitsSegmentSortKey, rhs: &TopHitsSegmentSortKey) -> Ordering {
|
||||
rhs.cmp(lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: TopHitsSegmentSortKey,
|
||||
) -> ValueRange<TopHitsSegmentSortKey> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
/// The TopHitsCollector used for collecting over segments and merging results.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
pub struct TopHitsTopNComputer {
|
||||
@@ -518,7 +550,7 @@ impl TopHitsTopNComputer {
|
||||
pub(crate) struct TopHitsSegmentCollector {
|
||||
segment_ordinal: SegmentOrdinal,
|
||||
accessor_idx: usize,
|
||||
top_n: TopNComputer<Vec<DocValueAndOrder>, DocAddress, ReverseComparator>,
|
||||
top_n: TopNComputer<TopHitsSegmentSortKey, DocAddress, ReverseComparator>,
|
||||
}
|
||||
|
||||
impl TopHitsSegmentCollector {
|
||||
@@ -539,13 +571,15 @@ impl TopHitsSegmentCollector {
|
||||
req: &TopHitsAggregationReq,
|
||||
) -> TopHitsTopNComputer {
|
||||
let mut top_hits_computer = TopHitsTopNComputer::new(req);
|
||||
// Map TopHitsSegmentSortKey back to Vec<DocValueAndOrder> if needed or use directly
|
||||
// The TopNComputer here stores TopHitsSegmentSortKey.
|
||||
let top_results = self.top_n.into_vec();
|
||||
|
||||
for res in top_results {
|
||||
let doc_value_fields = req.get_document_field_data(value_accessors, res.doc.doc_id);
|
||||
top_hits_computer.collect(
|
||||
DocSortValuesAndFields {
|
||||
sorts: res.sort_key,
|
||||
sorts: res.sort_key.0,
|
||||
doc_value_fields,
|
||||
},
|
||||
res.doc,
|
||||
@@ -579,7 +613,7 @@ impl TopHitsSegmentCollector {
|
||||
.collect();
|
||||
|
||||
self.top_n.push(
|
||||
sorts,
|
||||
TopHitsSegmentSortKey(sorts),
|
||||
DocAddress {
|
||||
segment_ord: self.segment_ordinal,
|
||||
doc_id,
|
||||
|
||||
@@ -96,10 +96,9 @@ mod histogram_collector;
|
||||
pub use histogram_collector::HistogramCollector;
|
||||
|
||||
mod multi_collector;
|
||||
pub use self::multi_collector::{FruitHandle, MultiCollector, MultiFruit};
|
||||
pub use columnar::ComparableDoc;
|
||||
|
||||
mod top_collector;
|
||||
pub use self::top_collector::ComparableDoc;
|
||||
pub use self::multi_collector::{FruitHandle, MultiCollector, MultiFruit};
|
||||
|
||||
mod top_score_collector;
|
||||
pub use self::top_score_collector::{TopDocs, TopNComputer};
|
||||
|
||||
@@ -39,6 +39,7 @@ pub(crate) mod tests {
|
||||
use crate::collector::sort_key::{
|
||||
SortByErasedType, SortBySimilarityScore, SortByStaticFastValue, SortByString,
|
||||
};
|
||||
use crate::collector::top_score_collector::compare_for_top_k;
|
||||
use crate::collector::{ComparableDoc, DocSetCollector, TopDocs};
|
||||
use crate::indexer::NoMergePolicy;
|
||||
use crate::query::{AllQuery, QueryParser};
|
||||
@@ -389,6 +390,52 @@ pub(crate) mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_order_by_compound_fast_fields() -> crate::Result<()> {
|
||||
let index = make_index()?;
|
||||
|
||||
type CompoundSortKey = (Option<String>, Option<f64>);
|
||||
|
||||
fn assert_query(
|
||||
index: &Index,
|
||||
city_order: Order,
|
||||
altitude_order: Order,
|
||||
expected: Vec<(CompoundSortKey, u64)>,
|
||||
) -> crate::Result<()> {
|
||||
let searcher = index.reader()?.searcher();
|
||||
let ids = id_mapping(&searcher);
|
||||
|
||||
let top_collector = TopDocs::with_limit(4).order_by((
|
||||
(SortByString::for_field("city"), city_order),
|
||||
(
|
||||
SortByStaticFastValue::<f64>::for_field("altitude"),
|
||||
altitude_order,
|
||||
),
|
||||
));
|
||||
let actual = searcher
|
||||
.search(&AllQuery, &top_collector)?
|
||||
.into_iter()
|
||||
.map(|(key, doc)| (key, ids[&doc]))
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(actual, expected);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
assert_query(
|
||||
&index,
|
||||
Order::Asc,
|
||||
Order::Desc,
|
||||
vec![
|
||||
((Some("austin".to_owned()), Some(149.0)), 0),
|
||||
((Some("greenville".to_owned()), Some(27.0)), 1),
|
||||
((Some("tokyo".to_owned()), Some(40.0)), 2),
|
||||
((None, Some(0.0)), 3),
|
||||
],
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
proptest! {
|
||||
@@ -451,4 +498,197 @@ pub(crate) mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_order_by_compound_prop(
|
||||
city_order in prop_oneof!(Just(Order::Desc), Just(Order::Asc)),
|
||||
altitude_order in prop_oneof!(Just(Order::Desc), Just(Order::Asc)),
|
||||
limit in 1..20_usize,
|
||||
offset in 0..20_usize,
|
||||
segments_data in proptest::collection::vec(
|
||||
proptest::collection::vec(
|
||||
(proptest::option::of("[a-c]"), proptest::option::of(0..50u64)),
|
||||
1..10_usize // segment size
|
||||
),
|
||||
1..4_usize // num segments
|
||||
)
|
||||
) {
|
||||
use crate::collector::sort_key::ComparatorEnum;
|
||||
use crate::TantivyDocument;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let city = schema_builder.add_text_field("city", TEXT | FAST);
|
||||
let altitude = schema_builder.add_u64_field("altitude", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
for segment_data in segments_data.into_iter() {
|
||||
for (city_val, altitude_val) in segment_data.into_iter() {
|
||||
let mut doc = TantivyDocument::default();
|
||||
if let Some(c) = city_val {
|
||||
doc.add_text(city, c);
|
||||
}
|
||||
if let Some(a) = altitude_val {
|
||||
doc.add_u64(altitude, a);
|
||||
}
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
|
||||
let top_collector = TopDocs::with_limit(limit)
|
||||
.and_offset(offset)
|
||||
.order_by((
|
||||
(SortByString::for_field("city"), city_order),
|
||||
(
|
||||
SortByStaticFastValue::<u64>::for_field("altitude"),
|
||||
altitude_order,
|
||||
),
|
||||
));
|
||||
|
||||
let actual_results = searcher.search(&AllQuery, &top_collector).unwrap();
|
||||
let actual_doc_ids: Vec<DocAddress> =
|
||||
actual_results.into_iter().map(|(_, doc)| doc).collect();
|
||||
|
||||
// Verification logic
|
||||
let all_docs_collector = DocSetCollector;
|
||||
let all_docs = searcher.search(&AllQuery, &all_docs_collector).unwrap();
|
||||
|
||||
let docs_with_keys: Vec<((Option<String>, Option<u64>), DocAddress)> = all_docs
|
||||
.into_iter()
|
||||
.map(|doc_addr| {
|
||||
let reader = searcher.segment_reader(doc_addr.segment_ord);
|
||||
|
||||
let city_val = if let Some(col) = reader.fast_fields().str("city").unwrap() {
|
||||
let ord = col.ords().first(doc_addr.doc_id);
|
||||
if let Some(ord) = ord {
|
||||
let mut out = Vec::new();
|
||||
col.dictionary().ord_to_term(ord, &mut out).unwrap();
|
||||
String::from_utf8(out).ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let alt_val = if let Some((col, _)) = reader.fast_fields().u64_lenient("altitude").unwrap() {
|
||||
col.first(doc_addr.doc_id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
((city_val, alt_val), doc_addr)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let city_comparator = ComparatorEnum::from(city_order);
|
||||
let alt_comparator = ComparatorEnum::from(altitude_order);
|
||||
let comparator = (city_comparator, alt_comparator);
|
||||
|
||||
let mut comparable_docs: Vec<ComparableDoc<_, _>> = docs_with_keys
|
||||
.into_iter()
|
||||
.map(|(sort_key, doc)| ComparableDoc { sort_key, doc })
|
||||
.collect();
|
||||
|
||||
comparable_docs.sort_by(|l, r| compare_for_top_k(&comparator, l, r));
|
||||
|
||||
let expected_results = comparable_docs
|
||||
.into_iter()
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expected_doc_ids: Vec<DocAddress> =
|
||||
expected_results.into_iter().map(|cd| cd.doc).collect();
|
||||
|
||||
prop_assert_eq!(actual_doc_ids, expected_doc_ids);
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_order_by_u64_prop(
|
||||
order in prop_oneof!(Just(Order::Desc), Just(Order::Asc)),
|
||||
limit in 1..20_usize,
|
||||
offset in 0..20_usize,
|
||||
segments_data in proptest::collection::vec(
|
||||
proptest::collection::vec(
|
||||
proptest::option::of(0..100u64),
|
||||
1..1000_usize // segment size
|
||||
),
|
||||
1..4_usize // num segments
|
||||
)
|
||||
) {
|
||||
use crate::collector::sort_key::ComparatorEnum;
|
||||
use crate::TantivyDocument;
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
for segment_data in segments_data.into_iter() {
|
||||
for val in segment_data.into_iter() {
|
||||
let mut doc = TantivyDocument::default();
|
||||
if let Some(v) = val {
|
||||
doc.add_u64(field, v);
|
||||
}
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
|
||||
let top_collector = TopDocs::with_limit(limit)
|
||||
.and_offset(offset)
|
||||
.order_by((SortByStaticFastValue::<u64>::for_field("field"), order));
|
||||
|
||||
let actual_results = searcher.search(&AllQuery, &top_collector).unwrap();
|
||||
let actual_doc_ids: Vec<DocAddress> =
|
||||
actual_results.into_iter().map(|(_, doc)| doc).collect();
|
||||
|
||||
// Verification logic
|
||||
let all_docs_collector = DocSetCollector;
|
||||
let all_docs = searcher.search(&AllQuery, &all_docs_collector).unwrap();
|
||||
|
||||
let docs_with_keys: Vec<(Option<u64>, DocAddress)> = all_docs
|
||||
.into_iter()
|
||||
.map(|doc_addr| {
|
||||
let reader = searcher.segment_reader(doc_addr.segment_ord);
|
||||
let val = if let Some((col, _)) = reader.fast_fields().u64_lenient("field").unwrap() {
|
||||
col.first(doc_addr.doc_id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
(val, doc_addr)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let comparator = ComparatorEnum::from(order);
|
||||
let mut comparable_docs: Vec<ComparableDoc<_, _>> = docs_with_keys
|
||||
.into_iter()
|
||||
.map(|(sort_key, doc)| ComparableDoc { sort_key, doc })
|
||||
.collect();
|
||||
|
||||
comparable_docs.sort_by(|l, r| compare_for_top_k(&comparator, l, r));
|
||||
|
||||
let expected_results = comparable_docs
|
||||
.into_iter()
|
||||
.skip(offset)
|
||||
.take(limit)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expected_doc_ids: Vec<DocAddress> =
|
||||
expected_results.into_iter().map(|cd| cd.doc).collect();
|
||||
|
||||
prop_assert_eq!(actual_doc_ids, expected_doc_ids);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use columnar::MonotonicallyMappableToU64;
|
||||
use columnar::{ComparableDoc, MonotonicallyMappableToU64, ValueRange};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
@@ -69,6 +69,9 @@ fn compare_owned_value<const NULLS_FIRST: bool>(lhs: &OwnedValue, rhs: &OwnedVal
|
||||
pub trait Comparator<T>: Send + Sync + std::fmt::Debug + Default {
|
||||
/// Return the order between two values.
|
||||
fn compare(&self, lhs: &T, rhs: &T) -> Ordering;
|
||||
|
||||
/// Return a `ValueRange` that matches all values that are greater than the provided threshold.
|
||||
fn threshold_to_valuerange(&self, threshold: T) -> ValueRange<T>;
|
||||
}
|
||||
|
||||
/// Compare values naturally (e.g. 1 < 2).
|
||||
@@ -86,6 +89,10 @@ impl<T: PartialOrd> Comparator<T> for NaturalComparator {
|
||||
fn compare(&self, lhs: &T, rhs: &T) -> Ordering {
|
||||
lhs.partial_cmp(rhs).unwrap_or(Ordering::Equal)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: T) -> ValueRange<T> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
/// A (partial) implementation of comparison for OwnedValue.
|
||||
@@ -97,6 +104,10 @@ impl Comparator<OwnedValue> for NaturalComparator {
|
||||
fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
|
||||
compare_owned_value::</* NULLS_FIRST= */ true>(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: OwnedValue) -> ValueRange<OwnedValue> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare values in reverse (e.g. 2 < 1).
|
||||
@@ -114,13 +125,69 @@ impl Comparator<OwnedValue> for NaturalComparator {
|
||||
#[derive(Debug, Copy, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct ReverseComparator;
|
||||
|
||||
impl<T> Comparator<T> for ReverseComparator
|
||||
where NaturalComparator: Comparator<T>
|
||||
macro_rules! impl_reverse_comparator_primitive {
|
||||
($($t:ty),*) => {
|
||||
$(
|
||||
impl Comparator<$t> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &$t, rhs: &$t) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: $t) -> ValueRange<$t> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_reverse_comparator_primitive!(
|
||||
bool,
|
||||
u8,
|
||||
u16,
|
||||
u32,
|
||||
u64,
|
||||
u128,
|
||||
usize,
|
||||
i8,
|
||||
i16,
|
||||
i32,
|
||||
i64,
|
||||
i128,
|
||||
isize,
|
||||
f32,
|
||||
f64,
|
||||
String,
|
||||
crate::DateTime,
|
||||
Vec<u8>,
|
||||
crate::schema::Facet
|
||||
);
|
||||
|
||||
impl<T: PartialOrd + Send + Sync + std::fmt::Debug + Clone + 'static> Comparator<Option<T>>
|
||||
for ReverseComparator
|
||||
{
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &T, rhs: &T) -> Ordering {
|
||||
fn compare(&self, lhs: &Option<T>, rhs: &Option<T>) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
let is_some = threshold.is_some();
|
||||
ValueRange::LessThan(threshold, is_some)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<OwnedValue> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: OwnedValue) -> ValueRange<OwnedValue> {
|
||||
let is_not_null = !matches!(threshold, OwnedValue::Null);
|
||||
ValueRange::LessThan(threshold, is_not_null)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare values in reverse, but treating `None` as lower than `Some`.
|
||||
@@ -147,6 +214,14 @@ where ReverseComparator: Comparator<T>
|
||||
(Some(lhs), Some(rhs)) => ReverseComparator.compare(lhs, rhs),
|
||||
}
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
if threshold.is_some() {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
} else {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<u32> for ReverseNoneIsLowerComparator {
|
||||
@@ -154,6 +229,10 @@ impl Comparator<u32> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &u32, rhs: &u32) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: u32) -> ValueRange<u32> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<u64> for ReverseNoneIsLowerComparator {
|
||||
@@ -161,6 +240,10 @@ impl Comparator<u64> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &u64, rhs: &u64) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: u64) -> ValueRange<u64> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<f64> for ReverseNoneIsLowerComparator {
|
||||
@@ -168,6 +251,10 @@ impl Comparator<f64> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &f64, rhs: &f64) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: f64) -> ValueRange<f64> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<f32> for ReverseNoneIsLowerComparator {
|
||||
@@ -175,6 +262,10 @@ impl Comparator<f32> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &f32, rhs: &f32) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: f32) -> ValueRange<f32> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<i64> for ReverseNoneIsLowerComparator {
|
||||
@@ -182,6 +273,10 @@ impl Comparator<i64> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &i64, rhs: &i64) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: i64) -> ValueRange<i64> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<String> for ReverseNoneIsLowerComparator {
|
||||
@@ -189,6 +284,10 @@ impl Comparator<String> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &String, rhs: &String) -> Ordering {
|
||||
ReverseComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: String) -> ValueRange<String> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<OwnedValue> for ReverseNoneIsLowerComparator {
|
||||
@@ -196,6 +295,10 @@ impl Comparator<OwnedValue> for ReverseNoneIsLowerComparator {
|
||||
fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
|
||||
compare_owned_value::</* NULLS_FIRST= */ false>(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: OwnedValue) -> ValueRange<OwnedValue> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare values naturally, but treating `None` as higher than `Some`.
|
||||
@@ -218,6 +321,15 @@ where NaturalComparator: Comparator<T>
|
||||
(Some(lhs), Some(rhs)) => NaturalComparator.compare(lhs, rhs),
|
||||
}
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
if threshold.is_some() {
|
||||
let is_some = threshold.is_some();
|
||||
ValueRange::GreaterThan(threshold, is_some)
|
||||
} else {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<u32> for NaturalNoneIsHigherComparator {
|
||||
@@ -225,6 +337,10 @@ impl Comparator<u32> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &u32, rhs: &u32) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: u32) -> ValueRange<u32> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<u64> for NaturalNoneIsHigherComparator {
|
||||
@@ -232,6 +348,10 @@ impl Comparator<u64> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &u64, rhs: &u64) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: u64) -> ValueRange<u64> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<f64> for NaturalNoneIsHigherComparator {
|
||||
@@ -239,6 +359,10 @@ impl Comparator<f64> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &f64, rhs: &f64) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: f64) -> ValueRange<f64> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<f32> for NaturalNoneIsHigherComparator {
|
||||
@@ -246,6 +370,10 @@ impl Comparator<f32> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &f32, rhs: &f32) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: f32) -> ValueRange<f32> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<i64> for NaturalNoneIsHigherComparator {
|
||||
@@ -253,6 +381,10 @@ impl Comparator<i64> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &i64, rhs: &i64) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: i64) -> ValueRange<i64> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<String> for NaturalNoneIsHigherComparator {
|
||||
@@ -260,6 +392,10 @@ impl Comparator<String> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &String, rhs: &String) -> Ordering {
|
||||
NaturalComparator.compare(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: String) -> ValueRange<String> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<OwnedValue> for NaturalNoneIsHigherComparator {
|
||||
@@ -267,6 +403,10 @@ impl Comparator<OwnedValue> for NaturalNoneIsHigherComparator {
|
||||
fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
|
||||
compare_owned_value::</* NULLS_FIRST= */ false>(lhs, rhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: OwnedValue) -> ValueRange<OwnedValue> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
/// An enum representing the different sort orders.
|
||||
@@ -308,6 +448,19 @@ where
|
||||
ComparatorEnum::NaturalNoneHigher => NaturalNoneIsHigherComparator.compare(lhs, rhs),
|
||||
}
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: T) -> ValueRange<T> {
|
||||
match self {
|
||||
ComparatorEnum::Natural => NaturalComparator.threshold_to_valuerange(threshold),
|
||||
ComparatorEnum::Reverse => ReverseComparator.threshold_to_valuerange(threshold),
|
||||
ComparatorEnum::ReverseNoneLower => {
|
||||
ReverseNoneIsLowerComparator.threshold_to_valuerange(threshold)
|
||||
}
|
||||
ComparatorEnum::NaturalNoneHigher => {
|
||||
NaturalNoneIsHigherComparator.threshold_to_valuerange(threshold)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Head, Tail, LeftComparator, RightComparator> Comparator<(Head, Tail)>
|
||||
@@ -322,6 +475,10 @@ where
|
||||
.compare(&lhs.0, &rhs.0)
|
||||
.then_with(|| self.1.compare(&lhs.1, &rhs.1))
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: (Head, Tail)) -> ValueRange<(Head, Tail)> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Type1, Type2, Type3, Comparator1, Comparator2, Comparator3> Comparator<(Type1, (Type2, Type3))>
|
||||
@@ -338,6 +495,13 @@ where
|
||||
.then_with(|| self.1.compare(&lhs.1 .0, &rhs.1 .0))
|
||||
.then_with(|| self.2.compare(&lhs.1 .1, &rhs.1 .1))
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: (Type1, (Type2, Type3)),
|
||||
) -> ValueRange<(Type1, (Type2, Type3))> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Type1, Type2, Type3, Comparator1, Comparator2, Comparator3> Comparator<(Type1, Type2, Type3)>
|
||||
@@ -354,6 +518,13 @@ where
|
||||
.then_with(|| self.1.compare(&lhs.1, &rhs.1))
|
||||
.then_with(|| self.2.compare(&lhs.2, &rhs.2))
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: (Type1, Type2, Type3),
|
||||
) -> ValueRange<(Type1, Type2, Type3)> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Type1, Type2, Type3, Type4, Comparator1, Comparator2, Comparator3, Comparator4>
|
||||
@@ -377,6 +548,13 @@ where
|
||||
.then_with(|| self.2.compare(&lhs.1 .1 .0, &rhs.1 .1 .0))
|
||||
.then_with(|| self.3.compare(&lhs.1 .1 .1, &rhs.1 .1 .1))
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: (Type1, (Type2, (Type3, Type4))),
|
||||
) -> ValueRange<(Type1, (Type2, (Type3, Type4)))> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Type1, Type2, Type3, Type4, Comparator1, Comparator2, Comparator3, Comparator4>
|
||||
@@ -400,6 +578,13 @@ where
|
||||
.then_with(|| self.2.compare(&lhs.2, &rhs.2))
|
||||
.then_with(|| self.3.compare(&lhs.3, &rhs.3))
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: (Type1, Type2, Type3, Type4),
|
||||
) -> ValueRange<(Type1, Type2, Type3, Type4)> {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl<TSortKeyComputer> SortKeyComputer for (TSortKeyComputer, ComparatorEnum)
|
||||
@@ -489,16 +674,32 @@ impl<TSegmentSortKeyComputer, TSegmentSortKey, TComparator> SegmentSortKeyComput
|
||||
where
|
||||
TSegmentSortKeyComputer: SegmentSortKeyComputer<SegmentSortKey = TSegmentSortKey>,
|
||||
TSegmentSortKey: Clone + 'static + Sync + Send,
|
||||
TComparator: Comparator<TSegmentSortKey> + 'static + Sync + Send,
|
||||
TComparator: Comparator<TSegmentSortKey> + Clone + 'static + Sync + Send,
|
||||
{
|
||||
type SortKey = TSegmentSortKeyComputer::SortKey;
|
||||
type SegmentSortKey = TSegmentSortKey;
|
||||
type SegmentComparator = TComparator;
|
||||
type Buffer = TSegmentSortKeyComputer::Buffer;
|
||||
|
||||
fn segment_comparator(&self) -> Self::SegmentComparator {
|
||||
self.comparator.clone()
|
||||
}
|
||||
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
|
||||
self.segment_sort_key_computer.segment_sort_key(doc, score)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
self.segment_sort_key_computer
|
||||
.segment_sort_keys(input_docs, output, buffer, filter)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn compare_segment_sort_key(
|
||||
&self,
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use columnar::{ColumnType, MonotonicallyMappableToU64};
|
||||
use columnar::{ColumnType, MonotonicallyMappableToU64, ValueRange};
|
||||
|
||||
use crate::collector::sort_key::sort_by_score::SortBySimilarityScoreSegmentComputer;
|
||||
use crate::collector::sort_key::{
|
||||
NaturalComparator, SortBySimilarityScore, SortByStaticFastValue, SortByString,
|
||||
};
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::collector::{ComparableDoc, SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::fastfield::FastFieldNotAvailableError;
|
||||
use crate::schema::OwnedValue;
|
||||
use crate::{DateTime, DocId, Score};
|
||||
@@ -36,12 +37,23 @@ impl SortByErasedType {
|
||||
|
||||
trait ErasedSegmentSortKeyComputer: Send + Sync {
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64>;
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Option<u64>, DocId>>,
|
||||
filter: ValueRange<Option<u64>>,
|
||||
);
|
||||
fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue;
|
||||
}
|
||||
|
||||
struct ErasedSegmentSortKeyComputerWrapper<C, F> {
|
||||
struct ErasedSegmentSortKeyComputerWrapper<C, F>
|
||||
where
|
||||
C: SegmentSortKeyComputer<SegmentSortKey = Option<u64>> + Send + Sync,
|
||||
F: Fn(C::SortKey) -> OwnedValue + Send + Sync + 'static,
|
||||
{
|
||||
inner: C,
|
||||
converter: F,
|
||||
buffer: C::Buffer,
|
||||
}
|
||||
|
||||
impl<C, F> ErasedSegmentSortKeyComputer for ErasedSegmentSortKeyComputerWrapper<C, F>
|
||||
@@ -53,6 +65,16 @@ where
|
||||
self.inner.segment_sort_key(doc, score)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Option<u64>, DocId>>,
|
||||
filter: ValueRange<Option<u64>>,
|
||||
) {
|
||||
self.inner
|
||||
.segment_sort_keys(input_docs, output, &mut self.buffer, filter)
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue {
|
||||
let val = self.inner.convert_segment_sort_key(sort_key);
|
||||
(self.converter)(val)
|
||||
@@ -60,7 +82,7 @@ where
|
||||
}
|
||||
|
||||
struct ScoreSegmentSortKeyComputer {
|
||||
segment_computer: SortBySimilarityScore,
|
||||
segment_computer: SortBySimilarityScoreSegmentComputer,
|
||||
}
|
||||
|
||||
impl ErasedSegmentSortKeyComputer for ScoreSegmentSortKeyComputer {
|
||||
@@ -69,6 +91,15 @@ impl ErasedSegmentSortKeyComputer for ScoreSegmentSortKeyComputer {
|
||||
Some(score_value.to_u64())
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
_input_docs: &[DocId],
|
||||
_output: &mut Vec<ComparableDoc<Option<u64>, DocId>>,
|
||||
_filter: ValueRange<Option<u64>>,
|
||||
) {
|
||||
unimplemented!("Batch computation not supported for score sorting")
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, sort_key: Option<u64>) -> OwnedValue {
|
||||
let score_value: u64 = sort_key.expect("This implementation always produces a score.");
|
||||
OwnedValue::F64(f64::from_u64(score_value))
|
||||
@@ -112,6 +143,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<String>| {
|
||||
val.map(OwnedValue::Str).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
ColumnType::U64 => {
|
||||
@@ -122,6 +154,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<u64>| {
|
||||
val.map(OwnedValue::U64).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
ColumnType::I64 => {
|
||||
@@ -132,6 +165,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<i64>| {
|
||||
val.map(OwnedValue::I64).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
ColumnType::F64 => {
|
||||
@@ -142,6 +176,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<f64>| {
|
||||
val.map(OwnedValue::F64).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
ColumnType::Bool => {
|
||||
@@ -152,6 +187,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<bool>| {
|
||||
val.map(OwnedValue::Bool).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
ColumnType::DateTime => {
|
||||
@@ -162,6 +198,7 @@ impl SortKeyComputer for SortByErasedType {
|
||||
converter: |val: Option<DateTime>| {
|
||||
val.map(OwnedValue::Date).unwrap_or(OwnedValue::Null)
|
||||
},
|
||||
buffer: Default::default(),
|
||||
})
|
||||
}
|
||||
column_type => {
|
||||
@@ -174,7 +211,8 @@ impl SortKeyComputer for SortByErasedType {
|
||||
}
|
||||
}
|
||||
Self::Score => Box::new(ScoreSegmentSortKeyComputer {
|
||||
segment_computer: SortBySimilarityScore,
|
||||
segment_computer: SortBySimilarityScore
|
||||
.segment_sort_key_computer(segment_reader)?,
|
||||
}),
|
||||
};
|
||||
Ok(ErasedColumnSegmentSortKeyComputer { inner })
|
||||
@@ -189,12 +227,23 @@ impl SegmentSortKeyComputer for ErasedColumnSegmentSortKeyComputer {
|
||||
type SortKey = OwnedValue;
|
||||
type SegmentSortKey = Option<u64>;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
#[inline(always)]
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Option<u64> {
|
||||
self.inner.segment_sort_key(doc, score)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
self.inner.segment_sort_keys(input_docs, output, filter)
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, segment_sort_key: Self::SegmentSortKey) -> OwnedValue {
|
||||
self.inner.convert_segment_sort_key(segment_sort_key)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use columnar::ValueRange;
|
||||
|
||||
use crate::collector::sort_key::NaturalComparator;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
|
||||
use crate::collector::{ComparableDoc, SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
|
||||
use crate::{DocAddress, DocId, Score};
|
||||
|
||||
/// Sort by similarity score.
|
||||
@@ -9,7 +11,7 @@ pub struct SortBySimilarityScore;
|
||||
impl SortKeyComputer for SortBySimilarityScore {
|
||||
type SortKey = Score;
|
||||
|
||||
type Child = SortBySimilarityScore;
|
||||
type Child = SortBySimilarityScoreSegmentComputer;
|
||||
|
||||
type Comparator = NaturalComparator;
|
||||
|
||||
@@ -21,7 +23,7 @@ impl SortKeyComputer for SortBySimilarityScore {
|
||||
&self,
|
||||
_segment_reader: &crate::SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
Ok(SortBySimilarityScore)
|
||||
Ok(SortBySimilarityScoreSegmentComputer)
|
||||
}
|
||||
|
||||
// Sorting by score is special in that it allows for the Block-Wand optimization.
|
||||
@@ -61,16 +63,29 @@ impl SortKeyComputer for SortBySimilarityScore {
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentSortKeyComputer for SortBySimilarityScore {
|
||||
pub struct SortBySimilarityScoreSegmentComputer;
|
||||
|
||||
impl SegmentSortKeyComputer for SortBySimilarityScoreSegmentComputer {
|
||||
type SortKey = Score;
|
||||
type SegmentSortKey = Score;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
#[inline(always)]
|
||||
fn segment_sort_key(&mut self, _doc: DocId, score: Score) -> Score {
|
||||
score
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
_input_docs: &[DocId],
|
||||
_output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
_filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
unimplemented!("Batch computation not supported for score sorting")
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, score: Score) -> Score {
|
||||
score
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use columnar::Column;
|
||||
use columnar::{Column, ValueRange};
|
||||
|
||||
use crate::collector::sort_key::sort_key_computer::convert_optional_u64_range_to_u64_range;
|
||||
use crate::collector::sort_key::NaturalComparator;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::collector::{ComparableDoc, SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::fastfield::{FastFieldNotAvailableError, FastValue};
|
||||
use crate::{DocId, Score, SegmentReader};
|
||||
|
||||
@@ -84,13 +85,110 @@ impl<T: FastValue> SegmentSortKeyComputer for SortByFastValueSegmentSortKeyCompu
|
||||
type SortKey = Option<T>;
|
||||
type SegmentSortKey = Option<u64>;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
#[inline(always)]
|
||||
fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> Self::SegmentSortKey {
|
||||
self.sort_column.first(doc)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
let u64_filter = convert_optional_u64_range_to_u64_range(filter);
|
||||
self.sort_column
|
||||
.first_vals_in_value_range(input_docs, output, u64_filter);
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, sort_key: Self::SegmentSortKey) -> Self::SortKey {
|
||||
sort_key.map(T::from_u64)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::schema::{Schema, FAST};
|
||||
use crate::Index;
|
||||
|
||||
#[test]
|
||||
fn test_sort_by_fast_value_batch() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field_col = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => 10u64))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => 20u64))
|
||||
.unwrap();
|
||||
index_writer.add_document(crate::doc!()).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
|
||||
let sorter = SortByStaticFastValue::<u64>::for_field("field");
|
||||
let mut computer = sorter.segment_sort_key_computer(segment_reader).unwrap();
|
||||
|
||||
let mut docs = vec![0, 1, 2];
|
||||
let mut output = Vec::new();
|
||||
let mut buffer = ();
|
||||
computer.segment_sort_keys(&mut docs, &mut output, &mut buffer, ValueRange::All);
|
||||
|
||||
assert_eq!(
|
||||
output.iter().map(|c| c.sort_key).collect::<Vec<_>>(),
|
||||
&[Some(10), Some(20), None]
|
||||
);
|
||||
assert_eq!(output.iter().map(|c| c.doc).collect::<Vec<_>>(), &[0, 1, 2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort_by_fast_value_batch_with_filter() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field_col = schema_builder.add_u64_field("field", FAST);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => 10u64))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => 20u64))
|
||||
.unwrap();
|
||||
index_writer.add_document(crate::doc!()).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
|
||||
let sorter = SortByStaticFastValue::<u64>::for_field("field");
|
||||
let mut computer = sorter.segment_sort_key_computer(segment_reader).unwrap();
|
||||
|
||||
let mut docs = vec![0, 1, 2];
|
||||
let mut output = Vec::new();
|
||||
let mut buffer = ();
|
||||
computer.segment_sort_keys(
|
||||
&mut docs,
|
||||
&mut output,
|
||||
&mut buffer,
|
||||
ValueRange::GreaterThan(Some(15u64), false /* inclusive */),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
output.iter().map(|c| c.sort_key).collect::<Vec<_>>(),
|
||||
&[Some(20)]
|
||||
);
|
||||
assert_eq!(output.iter().map(|c| c.doc).collect::<Vec<_>>(), &[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use columnar::StrColumn;
|
||||
use columnar::{StrColumn, ValueRange};
|
||||
|
||||
use crate::collector::sort_key::sort_key_computer::{
|
||||
convert_optional_u64_range_to_u64_range, range_contains_none,
|
||||
};
|
||||
use crate::collector::sort_key::NaturalComparator;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::collector::{ComparableDoc, SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::{DocId, Score};
|
||||
|
||||
@@ -50,6 +53,7 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
type SortKey = Option<String>;
|
||||
type SegmentSortKey = Option<TermOrdinal>;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
#[inline(always)]
|
||||
fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> Option<TermOrdinal> {
|
||||
@@ -57,6 +61,28 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
str_column.ords().first(doc)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
if let Some(str_column) = &self.str_column_opt {
|
||||
let u64_filter = convert_optional_u64_range_to_u64_range(filter);
|
||||
str_column
|
||||
.ords()
|
||||
.first_vals_in_value_range(input_docs, output, u64_filter);
|
||||
} else if range_contains_none(&filter) {
|
||||
for &doc in input_docs {
|
||||
output.push(ComparableDoc {
|
||||
doc,
|
||||
sort_key: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, term_ord_opt: Option<TermOrdinal>) -> Option<String> {
|
||||
// TODO: Individual lookups to the dictionary like this are very likely to repeatedly
|
||||
// decompress the same blocks. See https://github.com/quickwit-oss/tantivy/issues/2776
|
||||
@@ -70,3 +96,90 @@ impl SegmentSortKeyComputer for ByStringColumnSegmentSortKeyComputer {
|
||||
String::try_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::schema::{Schema, FAST, TEXT};
|
||||
use crate::Index;
|
||||
|
||||
#[test]
|
||||
fn test_sort_by_string_batch() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field_col = schema_builder.add_text_field("field", FAST | TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => "a"))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => "c"))
|
||||
.unwrap();
|
||||
index_writer.add_document(crate::doc!()).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
|
||||
let sorter = SortByString::for_field("field");
|
||||
let mut computer = sorter.segment_sort_key_computer(segment_reader).unwrap();
|
||||
|
||||
let mut docs = vec![0, 1, 2];
|
||||
let mut output = Vec::new();
|
||||
let mut buffer = ();
|
||||
computer.segment_sort_keys(&mut docs, &mut output, &mut buffer, ValueRange::All);
|
||||
|
||||
assert_eq!(
|
||||
output.iter().map(|c| c.sort_key).collect::<Vec<_>>(),
|
||||
&[Some(0), Some(1), None]
|
||||
);
|
||||
assert_eq!(output.iter().map(|c| c.doc).collect::<Vec<_>>(), &[0, 1, 2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort_by_string_batch_with_filter() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field_col = schema_builder.add_text_field("field", FAST | TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => "a"))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(crate::doc!(field_col => "c"))
|
||||
.unwrap();
|
||||
index_writer.add_document(crate::doc!()).unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0);
|
||||
|
||||
let sorter = SortByString::for_field("field");
|
||||
let mut computer = sorter.segment_sort_key_computer(segment_reader).unwrap();
|
||||
|
||||
let mut docs = vec![0, 1, 2];
|
||||
let mut output = Vec::new();
|
||||
// Filter: > "b". "a" is 0, "c" is 1.
|
||||
// We want > "a" (ord 0). So we filter > ord 0.
|
||||
// 0 is "a", 1 is "c".
|
||||
let mut buffer = ();
|
||||
computer.segment_sort_keys(
|
||||
&mut docs,
|
||||
&mut output,
|
||||
&mut buffer,
|
||||
ValueRange::GreaterThan(Some(0), false /* inclusive */),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
output.iter().map(|c| c.sort_key).collect::<Vec<_>>(),
|
||||
&[Some(1)]
|
||||
);
|
||||
assert_eq!(output.iter().map(|c| c.doc).collect::<Vec<_>>(), &[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use columnar::ValueRange;
|
||||
|
||||
use crate::collector::sort_key::{Comparator, NaturalComparator};
|
||||
use crate::collector::sort_key_top_collector::TopBySortKeySegmentCollector;
|
||||
use crate::collector::{default_collect_segment_impl, SegmentCollector as _, TopNComputer};
|
||||
use crate::collector::{
|
||||
default_collect_segment_impl, ComparableDoc, SegmentCollector as _, TopNComputer,
|
||||
};
|
||||
use crate::schema::Schema;
|
||||
use crate::{DocAddress, DocId, Result, Score, SegmentReader};
|
||||
|
||||
@@ -21,7 +25,10 @@ pub trait SegmentSortKeyComputer: 'static {
|
||||
type SegmentSortKey: 'static + Clone + Send + Sync + Clone;
|
||||
|
||||
/// Comparator type.
|
||||
type SegmentComparator: Comparator<Self::SegmentSortKey> + 'static;
|
||||
type SegmentComparator: Comparator<Self::SegmentSortKey> + Clone + 'static;
|
||||
|
||||
/// Buffer type used for scratch space.
|
||||
type Buffer: Default + Send + Sync + 'static;
|
||||
|
||||
/// Returns the segment sort key comparator.
|
||||
fn segment_comparator(&self) -> Self::SegmentComparator {
|
||||
@@ -31,6 +38,18 @@ pub trait SegmentSortKeyComputer: 'static {
|
||||
/// Computes the sort key for the given document and score.
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey;
|
||||
|
||||
/// Computes the sort keys for a batch of documents.
|
||||
///
|
||||
/// The computed sort keys and document IDs are pushed into the `output` vector.
|
||||
/// The `buffer` is used for scratch space.
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
);
|
||||
|
||||
/// Computes the sort key and pushes the document in a TopN Computer.
|
||||
///
|
||||
/// When using a tuple as the sorting key, the sort key is evaluated in a lazy manner.
|
||||
@@ -39,12 +58,32 @@ pub trait SegmentSortKeyComputer: 'static {
|
||||
&mut self,
|
||||
doc: DocId,
|
||||
score: Score,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C>,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
|
||||
) {
|
||||
let sort_key = self.segment_sort_key(doc, score);
|
||||
top_n_computer.push(sort_key, doc);
|
||||
}
|
||||
|
||||
fn compute_sort_keys_and_collect<C: Comparator<Self::SegmentSortKey>>(
|
||||
&mut self,
|
||||
docs: &[DocId],
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
|
||||
) {
|
||||
// The capacity of a TopNComputer is larger than 2*n + COLLECT_BLOCK_BUFFER_LEN, so we
|
||||
// should always be able to `reserve` space for the entire block.
|
||||
top_n_computer.reserve(docs.len());
|
||||
|
||||
let comparator = self.segment_comparator();
|
||||
let value_range = if let Some(threshold) = &top_n_computer.threshold {
|
||||
comparator.threshold_to_valuerange(threshold.clone())
|
||||
} else {
|
||||
ValueRange::All
|
||||
};
|
||||
|
||||
let (buffer, scratch) = top_n_computer.buffer_and_scratch();
|
||||
self.segment_sort_keys(docs, buffer, scratch, value_range);
|
||||
}
|
||||
|
||||
/// A SegmentSortKeyComputer maps to a SegmentSortKey, but it can also decide on
|
||||
/// its ordering.
|
||||
///
|
||||
@@ -58,26 +97,6 @@ pub trait SegmentSortKeyComputer: 'static {
|
||||
self.segment_comparator().compare(left, right)
|
||||
}
|
||||
|
||||
/// Implementing this method makes it possible to avoid computing
|
||||
/// a sort_key entirely if we can assess that it won't pass a threshold
|
||||
/// with a partial computation.
|
||||
///
|
||||
/// This is currently used for lexicographic sorting.
|
||||
fn accept_sort_key_lazy(
|
||||
&mut self,
|
||||
doc_id: DocId,
|
||||
score: Score,
|
||||
threshold: &Self::SegmentSortKey,
|
||||
) -> Option<(Ordering, Self::SegmentSortKey)> {
|
||||
let sort_key = self.segment_sort_key(doc_id, score);
|
||||
let cmp = self.compare_segment_sort_key(&sort_key, threshold);
|
||||
if cmp == Ordering::Less {
|
||||
None
|
||||
} else {
|
||||
Some((cmp, sort_key))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a segment level sort key into the global sort key.
|
||||
fn convert_segment_sort_key(&self, sort_key: Self::SegmentSortKey) -> Self::SortKey;
|
||||
}
|
||||
@@ -145,7 +164,8 @@ where
|
||||
TailSortKeyComputer: SortKeyComputer,
|
||||
{
|
||||
type SortKey = (HeadSortKeyComputer::SortKey, TailSortKeyComputer::SortKey);
|
||||
type Child = (HeadSortKeyComputer::Child, TailSortKeyComputer::Child);
|
||||
type Child =
|
||||
ChainSegmentSortKeyComputer<HeadSortKeyComputer::Child, TailSortKeyComputer::Child>;
|
||||
|
||||
type Comparator = (
|
||||
HeadSortKeyComputer::Comparator,
|
||||
@@ -157,10 +177,10 @@ where
|
||||
}
|
||||
|
||||
fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
|
||||
Ok((
|
||||
self.0.segment_sort_key_computer(segment_reader)?,
|
||||
self.1.segment_sort_key_computer(segment_reader)?,
|
||||
))
|
||||
Ok(ChainSegmentSortKeyComputer {
|
||||
head: self.0.segment_sort_key_computer(segment_reader)?,
|
||||
tail: self.1.segment_sort_key_computer(segment_reader)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Checks whether the schema is compatible with the sort key computer.
|
||||
@@ -178,25 +198,91 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<HeadSegmentSortKeyComputer, TailSegmentSortKeyComputer> SegmentSortKeyComputer
|
||||
for (HeadSegmentSortKeyComputer, TailSegmentSortKeyComputer)
|
||||
pub struct ChainSegmentSortKeyComputer<Head, Tail>
|
||||
where
|
||||
HeadSegmentSortKeyComputer: SegmentSortKeyComputer,
|
||||
TailSegmentSortKeyComputer: SegmentSortKeyComputer,
|
||||
Head: SegmentSortKeyComputer,
|
||||
Tail: SegmentSortKeyComputer,
|
||||
{
|
||||
type SortKey = (
|
||||
HeadSegmentSortKeyComputer::SortKey,
|
||||
TailSegmentSortKeyComputer::SortKey,
|
||||
);
|
||||
type SegmentSortKey = (
|
||||
HeadSegmentSortKeyComputer::SegmentSortKey,
|
||||
TailSegmentSortKeyComputer::SegmentSortKey,
|
||||
);
|
||||
head: Head,
|
||||
tail: Tail,
|
||||
}
|
||||
|
||||
type SegmentComparator = (
|
||||
HeadSegmentSortKeyComputer::SegmentComparator,
|
||||
TailSegmentSortKeyComputer::SegmentComparator,
|
||||
);
|
||||
pub struct ChainBuffer<HeadBuffer, TailBuffer, HeadKey, TailKey> {
|
||||
pub head: HeadBuffer,
|
||||
pub tail: TailBuffer,
|
||||
pub head_output: Vec<ComparableDoc<HeadKey, DocId>>,
|
||||
pub tail_output: Vec<ComparableDoc<TailKey, DocId>>,
|
||||
pub tail_input_docs: Vec<DocId>,
|
||||
}
|
||||
|
||||
impl<HeadBuffer: Default, TailBuffer: Default, HeadKey, TailKey> Default
|
||||
for ChainBuffer<HeadBuffer, TailBuffer, HeadKey, TailKey>
|
||||
{
|
||||
fn default() -> Self {
|
||||
ChainBuffer {
|
||||
head: HeadBuffer::default(),
|
||||
tail: TailBuffer::default(),
|
||||
head_output: Vec::new(),
|
||||
tail_output: Vec::new(),
|
||||
tail_input_docs: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Head, Tail> ChainSegmentSortKeyComputer<Head, Tail>
|
||||
where
|
||||
Head: SegmentSortKeyComputer,
|
||||
Tail: SegmentSortKeyComputer,
|
||||
{
|
||||
fn accept_sort_key_lazy(
|
||||
&mut self,
|
||||
doc_id: DocId,
|
||||
score: Score,
|
||||
threshold: &<Self as SegmentSortKeyComputer>::SegmentSortKey,
|
||||
) -> Option<(Ordering, <Self as SegmentSortKeyComputer>::SegmentSortKey)> {
|
||||
let (head_threshold, tail_threshold) = threshold;
|
||||
let head_sort_key = self.head.segment_sort_key(doc_id, score);
|
||||
let head_cmp = self
|
||||
.head
|
||||
.compare_segment_sort_key(&head_sort_key, head_threshold);
|
||||
if head_cmp == Ordering::Less {
|
||||
None
|
||||
} else if head_cmp == Ordering::Equal {
|
||||
let tail_sort_key = self.tail.segment_sort_key(doc_id, score);
|
||||
let tail_cmp = self
|
||||
.tail
|
||||
.compare_segment_sort_key(&tail_sort_key, tail_threshold);
|
||||
if tail_cmp == Ordering::Less {
|
||||
None
|
||||
} else {
|
||||
Some((tail_cmp, (head_sort_key, tail_sort_key)))
|
||||
}
|
||||
} else {
|
||||
let tail_sort_key = self.tail.segment_sort_key(doc_id, score);
|
||||
Some((head_cmp, (head_sort_key, tail_sort_key)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Head, Tail> SegmentSortKeyComputer for ChainSegmentSortKeyComputer<Head, Tail>
|
||||
where
|
||||
Head: SegmentSortKeyComputer,
|
||||
Tail: SegmentSortKeyComputer,
|
||||
{
|
||||
type SortKey = (Head::SortKey, Tail::SortKey);
|
||||
type SegmentSortKey = (Head::SegmentSortKey, Tail::SegmentSortKey);
|
||||
|
||||
type SegmentComparator = (Head::SegmentComparator, Tail::SegmentComparator);
|
||||
|
||||
type Buffer =
|
||||
ChainBuffer<Head::Buffer, Tail::Buffer, Head::SegmentSortKey, Tail::SegmentSortKey>;
|
||||
|
||||
fn segment_comparator(&self) -> Self::SegmentComparator {
|
||||
(
|
||||
self.head.segment_comparator(),
|
||||
self.tail.segment_comparator(),
|
||||
)
|
||||
}
|
||||
|
||||
/// A SegmentSortKeyComputer maps to a SegmentSortKey, but it can also decide on
|
||||
/// its ordering.
|
||||
@@ -208,9 +294,90 @@ where
|
||||
left: &Self::SegmentSortKey,
|
||||
right: &Self::SegmentSortKey,
|
||||
) -> Ordering {
|
||||
self.0
|
||||
self.head
|
||||
.compare_segment_sort_key(&left.0, &right.0)
|
||||
.then_with(|| self.1.compare_segment_sort_key(&left.1, &right.1))
|
||||
.then_with(|| self.tail.compare_segment_sort_key(&left.1, &right.1))
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
let (head_filter, threshold) = match filter {
|
||||
ValueRange::GreaterThan((head_threshold, tail_threshold), _)
|
||||
| ValueRange::LessThan((head_threshold, tail_threshold), _) => {
|
||||
let head_cmp = self.head.segment_comparator();
|
||||
let strict_head_filter = head_cmp.threshold_to_valuerange(head_threshold.clone());
|
||||
let head_filter = match strict_head_filter {
|
||||
ValueRange::GreaterThan(t, m) => ValueRange::GreaterThanOrEqual(t, m),
|
||||
ValueRange::LessThan(t, m) => ValueRange::LessThanOrEqual(t, m),
|
||||
other => other,
|
||||
};
|
||||
(head_filter, Some((head_threshold, tail_threshold)))
|
||||
}
|
||||
_ => (ValueRange::All, None),
|
||||
};
|
||||
|
||||
buffer.head_output.clear();
|
||||
self.head.segment_sort_keys(
|
||||
input_docs,
|
||||
&mut buffer.head_output,
|
||||
&mut buffer.head,
|
||||
head_filter,
|
||||
);
|
||||
|
||||
if buffer.head_output.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
buffer.tail_output.clear();
|
||||
buffer.tail_input_docs.clear();
|
||||
for cd in &buffer.head_output {
|
||||
buffer.tail_input_docs.push(cd.doc);
|
||||
}
|
||||
|
||||
self.tail.segment_sort_keys(
|
||||
&buffer.tail_input_docs,
|
||||
&mut buffer.tail_output,
|
||||
&mut buffer.tail,
|
||||
ValueRange::All,
|
||||
);
|
||||
|
||||
let head_cmp = self.head.segment_comparator();
|
||||
let tail_cmp = self.tail.segment_comparator();
|
||||
|
||||
for (head_doc, tail_doc) in buffer
|
||||
.head_output
|
||||
.drain(..)
|
||||
.zip(buffer.tail_output.drain(..))
|
||||
{
|
||||
debug_assert_eq!(head_doc.doc, tail_doc.doc);
|
||||
let doc = head_doc.doc;
|
||||
let head_key = head_doc.sort_key;
|
||||
let tail_key = tail_doc.sort_key;
|
||||
|
||||
let accept = if let Some((head_threshold, tail_threshold)) = &threshold {
|
||||
let head_ord = head_cmp.compare(&head_key, head_threshold);
|
||||
let ord = if head_ord == Ordering::Equal {
|
||||
tail_cmp.compare(&tail_key, tail_threshold)
|
||||
} else {
|
||||
head_ord
|
||||
};
|
||||
ord == Ordering::Greater
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
if accept {
|
||||
output.push(ComparableDoc {
|
||||
sort_key: (head_key, tail_key),
|
||||
doc,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -218,7 +385,7 @@ where
|
||||
&mut self,
|
||||
doc: DocId,
|
||||
score: Score,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C>,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
|
||||
) {
|
||||
let sort_key: Self::SegmentSortKey;
|
||||
if let Some(threshold) = &top_n_computer.threshold {
|
||||
@@ -235,48 +402,29 @@ where
|
||||
|
||||
#[inline(always)]
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
|
||||
let head_sort_key = self.0.segment_sort_key(doc, score);
|
||||
let tail_sort_key = self.1.segment_sort_key(doc, score);
|
||||
let head_sort_key = self.head.segment_sort_key(doc, score);
|
||||
let tail_sort_key = self.tail.segment_sort_key(doc, score);
|
||||
(head_sort_key, tail_sort_key)
|
||||
}
|
||||
|
||||
fn accept_sort_key_lazy(
|
||||
&mut self,
|
||||
doc_id: DocId,
|
||||
score: Score,
|
||||
threshold: &Self::SegmentSortKey,
|
||||
) -> Option<(Ordering, Self::SegmentSortKey)> {
|
||||
let (head_threshold, tail_threshold) = threshold;
|
||||
let (head_cmp, head_sort_key) =
|
||||
self.0.accept_sort_key_lazy(doc_id, score, head_threshold)?;
|
||||
if head_cmp == Ordering::Equal {
|
||||
let (tail_cmp, tail_sort_key) =
|
||||
self.1.accept_sort_key_lazy(doc_id, score, tail_threshold)?;
|
||||
Some((tail_cmp, (head_sort_key, tail_sort_key)))
|
||||
} else {
|
||||
let tail_sort_key = self.1.segment_sort_key(doc_id, score);
|
||||
Some((head_cmp, (head_sort_key, tail_sort_key)))
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, sort_key: Self::SegmentSortKey) -> Self::SortKey {
|
||||
let (head_sort_key, tail_sort_key) = sort_key;
|
||||
(
|
||||
self.0.convert_segment_sort_key(head_sort_key),
|
||||
self.1.convert_segment_sort_key(tail_sort_key),
|
||||
self.head.convert_segment_sort_key(head_sort_key),
|
||||
self.tail.convert_segment_sort_key(tail_sort_key),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// This struct is used as an adapter to take a sort key computer and map its score to another
|
||||
/// new sort key.
|
||||
pub struct MappedSegmentSortKeyComputer<T, PreviousSortKey, NewSortKey> {
|
||||
pub struct MappedSegmentSortKeyComputer<T: SegmentSortKeyComputer, NewSortKey> {
|
||||
sort_key_computer: T,
|
||||
map: fn(PreviousSortKey) -> NewSortKey,
|
||||
map: fn(T::SortKey) -> NewSortKey,
|
||||
}
|
||||
|
||||
impl<T, PreviousScore, NewScore> SegmentSortKeyComputer
|
||||
for MappedSegmentSortKeyComputer<T, PreviousScore, NewScore>
|
||||
for MappedSegmentSortKeyComputer<T, NewScore>
|
||||
where
|
||||
T: SegmentSortKeyComputer<SortKey = PreviousScore>,
|
||||
PreviousScore: 'static + Clone + Send + Sync,
|
||||
@@ -285,19 +433,25 @@ where
|
||||
type SortKey = NewScore;
|
||||
type SegmentSortKey = T::SegmentSortKey;
|
||||
type SegmentComparator = T::SegmentComparator;
|
||||
type Buffer = T::Buffer;
|
||||
|
||||
fn segment_comparator(&self) -> Self::SegmentComparator {
|
||||
self.sort_key_computer.segment_comparator()
|
||||
}
|
||||
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> Self::SegmentSortKey {
|
||||
self.sort_key_computer.segment_sort_key(doc, score)
|
||||
}
|
||||
|
||||
fn accept_sort_key_lazy(
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
doc_id: DocId,
|
||||
score: Score,
|
||||
threshold: &Self::SegmentSortKey,
|
||||
) -> Option<(Ordering, Self::SegmentSortKey)> {
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
buffer: &mut Self::Buffer,
|
||||
filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
self.sort_key_computer
|
||||
.accept_sort_key_lazy(doc_id, score, threshold)
|
||||
.segment_sort_keys(input_docs, output, buffer, filter)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -305,12 +459,21 @@ where
|
||||
&mut self,
|
||||
doc: DocId,
|
||||
score: Score,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C>,
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
|
||||
) {
|
||||
self.sort_key_computer
|
||||
.compute_sort_key_and_collect(doc, score, top_n_computer);
|
||||
}
|
||||
|
||||
fn compute_sort_keys_and_collect<C: Comparator<Self::SegmentSortKey>>(
|
||||
&mut self,
|
||||
docs: &[DocId],
|
||||
top_n_computer: &mut TopNComputer<Self::SegmentSortKey, DocId, C, Self::Buffer>,
|
||||
) {
|
||||
self.sort_key_computer
|
||||
.compute_sort_keys_and_collect(docs, top_n_computer);
|
||||
}
|
||||
|
||||
fn convert_segment_sort_key(&self, segment_sort_key: Self::SegmentSortKey) -> Self::SortKey {
|
||||
(self.map)(
|
||||
self.sort_key_computer
|
||||
@@ -336,10 +499,6 @@ where
|
||||
);
|
||||
type Child = MappedSegmentSortKeyComputer<
|
||||
<(SortKeyComputer1, (SortKeyComputer2, SortKeyComputer3)) as SortKeyComputer>::Child,
|
||||
(
|
||||
SortKeyComputer1::SortKey,
|
||||
(SortKeyComputer2::SortKey, SortKeyComputer3::SortKey),
|
||||
),
|
||||
Self::SortKey,
|
||||
>;
|
||||
|
||||
@@ -363,7 +522,13 @@ where
|
||||
let sort_key_computer3 = self.2.segment_sort_key_computer(segment_reader)?;
|
||||
let map = |(sort_key1, (sort_key2, sort_key3))| (sort_key1, sort_key2, sort_key3);
|
||||
Ok(MappedSegmentSortKeyComputer {
|
||||
sort_key_computer: (sort_key_computer1, (sort_key_computer2, sort_key_computer3)),
|
||||
sort_key_computer: ChainSegmentSortKeyComputer {
|
||||
head: sort_key_computer1,
|
||||
tail: ChainSegmentSortKeyComputer {
|
||||
head: sort_key_computer2,
|
||||
tail: sort_key_computer3,
|
||||
},
|
||||
},
|
||||
map,
|
||||
})
|
||||
}
|
||||
@@ -398,13 +563,6 @@ where
|
||||
SortKeyComputer1,
|
||||
(SortKeyComputer2, (SortKeyComputer3, SortKeyComputer4)),
|
||||
) as SortKeyComputer>::Child,
|
||||
(
|
||||
SortKeyComputer1::SortKey,
|
||||
(
|
||||
SortKeyComputer2::SortKey,
|
||||
(SortKeyComputer3::SortKey, SortKeyComputer4::SortKey),
|
||||
),
|
||||
),
|
||||
Self::SortKey,
|
||||
>;
|
||||
type SortKey = (
|
||||
@@ -426,10 +584,16 @@ where
|
||||
let sort_key_computer3 = self.2.segment_sort_key_computer(segment_reader)?;
|
||||
let sort_key_computer4 = self.3.segment_sort_key_computer(segment_reader)?;
|
||||
Ok(MappedSegmentSortKeyComputer {
|
||||
sort_key_computer: (
|
||||
sort_key_computer1,
|
||||
(sort_key_computer2, (sort_key_computer3, sort_key_computer4)),
|
||||
),
|
||||
sort_key_computer: ChainSegmentSortKeyComputer {
|
||||
head: sort_key_computer1,
|
||||
tail: ChainSegmentSortKeyComputer {
|
||||
head: sort_key_computer2,
|
||||
tail: ChainSegmentSortKeyComputer {
|
||||
head: sort_key_computer3,
|
||||
tail: sort_key_computer4,
|
||||
},
|
||||
},
|
||||
},
|
||||
map: |(sort_key1, (sort_key2, (sort_key3, sort_key4)))| {
|
||||
(sort_key1, sort_key2, sort_key3, sort_key4)
|
||||
},
|
||||
@@ -452,6 +616,13 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct FuncSegmentSortKeyComputer<F, TSortKey> {
|
||||
func: F,
|
||||
_phantom: PhantomData<TSortKey>,
|
||||
}
|
||||
|
||||
impl<F, SegmentF, TSortKey> SortKeyComputer for F
|
||||
where
|
||||
F: 'static + Send + Sync + Fn(&SegmentReader) -> SegmentF,
|
||||
@@ -459,15 +630,18 @@ where
|
||||
TSortKey: 'static + PartialOrd + Clone + Send + Sync + std::fmt::Debug,
|
||||
{
|
||||
type SortKey = TSortKey;
|
||||
type Child = SegmentF;
|
||||
type Child = FuncSegmentSortKeyComputer<SegmentF, TSortKey>;
|
||||
type Comparator = NaturalComparator;
|
||||
|
||||
fn segment_sort_key_computer(&self, segment_reader: &SegmentReader) -> Result<Self::Child> {
|
||||
Ok((self)(segment_reader))
|
||||
Ok(FuncSegmentSortKeyComputer {
|
||||
func: (self)(segment_reader),
|
||||
_phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<F, TSortKey> SegmentSortKeyComputer for F
|
||||
impl<F, TSortKey> SegmentSortKeyComputer for FuncSegmentSortKeyComputer<F, TSortKey>
|
||||
where
|
||||
F: 'static + FnMut(DocId) -> TSortKey,
|
||||
TSortKey: 'static + PartialOrd + Clone + Send + Sync,
|
||||
@@ -475,9 +649,25 @@ where
|
||||
type SortKey = TSortKey;
|
||||
type SegmentSortKey = TSortKey;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
fn segment_sort_key(&mut self, doc: DocId, _score: Score) -> TSortKey {
|
||||
(self)(doc)
|
||||
(self.func)(doc)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
input_docs: &[DocId],
|
||||
output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
_filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
for &doc in input_docs {
|
||||
output.push(ComparableDoc {
|
||||
sort_key: (self.func)(doc),
|
||||
doc,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a segment level score into the global level score.
|
||||
@@ -486,13 +676,75 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn range_contains_none(range: &ValueRange<Option<u64>>) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(&None),
|
||||
ValueRange::GreaterThan(_threshold, match_nulls) => *match_nulls,
|
||||
ValueRange::GreaterThanOrEqual(_threshold, match_nulls) => *match_nulls,
|
||||
ValueRange::LessThan(_threshold, match_nulls) => *match_nulls,
|
||||
ValueRange::LessThanOrEqual(_threshold, match_nulls) => *match_nulls,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn convert_optional_u64_range_to_u64_range(
|
||||
range: ValueRange<Option<u64>>,
|
||||
) -> ValueRange<u64> {
|
||||
match range {
|
||||
ValueRange::Inclusive(r) => {
|
||||
let start = r.start().unwrap_or(0);
|
||||
let end = r.end().unwrap_or(u64::MAX);
|
||||
ValueRange::Inclusive(start..=end)
|
||||
}
|
||||
ValueRange::GreaterThan(Some(val), match_nulls) => {
|
||||
ValueRange::GreaterThan(val, match_nulls)
|
||||
}
|
||||
ValueRange::GreaterThan(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::All
|
||||
} else {
|
||||
ValueRange::Inclusive(u64::MIN..=u64::MAX)
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(Some(val), match_nulls) => {
|
||||
ValueRange::GreaterThanOrEqual(val, match_nulls)
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::All
|
||||
} else {
|
||||
ValueRange::Inclusive(u64::MIN..=u64::MAX)
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::LessThan(u64::MIN, true)
|
||||
} else {
|
||||
ValueRange::Inclusive(1..=0)
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(Some(val), match_nulls) => ValueRange::LessThan(val, match_nulls),
|
||||
ValueRange::LessThanOrEqual(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::LessThan(u64::MIN, true)
|
||||
} else {
|
||||
ValueRange::Inclusive(1..=0)
|
||||
}
|
||||
}
|
||||
ValueRange::LessThanOrEqual(Some(val), match_nulls) => {
|
||||
ValueRange::LessThanOrEqual(val, match_nulls)
|
||||
}
|
||||
ValueRange::All => ValueRange::All,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::cmp::Ordering;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer, TopNComputer};
|
||||
use crate::schema::Schema;
|
||||
use crate::{DocId, Index, Order, SegmentReader};
|
||||
|
||||
@@ -640,4 +892,178 @@ mod tests {
|
||||
(200u32, 2u32)
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_batch_score_computer_edge_case() {
|
||||
let score_computer_primary = |_segment_reader: &SegmentReader| |_doc: DocId| 200u32;
|
||||
let score_computer_secondary = |_segment_reader: &SegmentReader| |_doc: DocId| "b";
|
||||
let lazy_score_computer = (score_computer_primary, score_computer_secondary);
|
||||
let index = build_test_index();
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let mut segment_sort_key_computer = lazy_score_computer
|
||||
.segment_sort_key_computer(searcher.segment_reader(0))
|
||||
.unwrap();
|
||||
|
||||
let mut top_n_computer =
|
||||
TopNComputer::new_with_comparator(10, lazy_score_computer.comparator());
|
||||
// Threshold (200, "a"). Doc is (200, "b"). 200 == 200, "b" > "a". Should be accepted.
|
||||
top_n_computer.threshold = Some((200, "a"));
|
||||
|
||||
let docs = vec![0];
|
||||
segment_sort_key_computer.compute_sort_keys_and_collect(&docs, &mut top_n_computer);
|
||||
|
||||
let results = top_n_computer.into_sorted_vec();
|
||||
assert_eq!(results.len(), 1);
|
||||
let result = &results[0];
|
||||
assert_eq!(result.doc, 0);
|
||||
assert_eq!(result.sort_key, (200, "b"));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod proptest_tests {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
use crate::collector::sort_key::order::*;
|
||||
|
||||
// Re-implement logic to interpret ValueRange<Option<u64>> manually to verify expectations
|
||||
fn range_contains_opt(range: &ValueRange<Option<u64>>, val: &Option<u64>) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(val),
|
||||
ValueRange::GreaterThan(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val > t
|
||||
}
|
||||
}
|
||||
ValueRange::GreaterThanOrEqual(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val >= t
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val < t
|
||||
}
|
||||
}
|
||||
ValueRange::LessThanOrEqual(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val <= t
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn range_contains_u64(range: &ValueRange<u64>, val: &u64) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(val),
|
||||
ValueRange::GreaterThan(t, _) => val > t,
|
||||
ValueRange::GreaterThanOrEqual(t, _) => val >= t,
|
||||
ValueRange::LessThan(t, _) => val < t,
|
||||
ValueRange::LessThanOrEqual(t, _) => val <= t,
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_comparator_consistency_natural_none_is_lower(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<NaturalComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_reverse(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<ReverseComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_reverse_none_is_lower(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<ReverseNoneIsLowerComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_natural_none_is_higher(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<NaturalNoneIsHigherComparator>(threshold, val)?;
|
||||
}
|
||||
}
|
||||
|
||||
fn check_comparator<C: Comparator<Option<u64>>>(
|
||||
threshold: Option<u64>,
|
||||
val: Option<u64>,
|
||||
) -> std::result::Result<(), proptest::test_runner::TestCaseError> {
|
||||
let comparator = C::default();
|
||||
let range = comparator.threshold_to_valuerange(threshold);
|
||||
let ordering = comparator.compare(&val, &threshold);
|
||||
let should_be_in_range = ordering == Ordering::Greater;
|
||||
|
||||
let in_range_opt = range_contains_opt(&range, &val);
|
||||
|
||||
prop_assert_eq!(
|
||||
in_range_opt,
|
||||
should_be_in_range,
|
||||
"Comparator consistency failed for {:?}. Threshold: {:?}, Val: {:?}, Range: {:?}, \
|
||||
Ordering: {:?}. range_contains_opt says {}, but compare says {}",
|
||||
std::any::type_name::<C>(),
|
||||
threshold,
|
||||
val,
|
||||
range,
|
||||
ordering,
|
||||
in_range_opt,
|
||||
should_be_in_range
|
||||
);
|
||||
|
||||
// Check range_contains_none
|
||||
let expected_none_in_range = range_contains_opt(&range, &None);
|
||||
let actual_none_in_range = range_contains_none(&range);
|
||||
prop_assert_eq!(
|
||||
actual_none_in_range,
|
||||
expected_none_in_range,
|
||||
"range_contains_none failed for {:?}. Range: {:?}. Expected (from \
|
||||
range_contains_opt): {}, Actual: {}",
|
||||
std::any::type_name::<C>(),
|
||||
range,
|
||||
expected_none_in_range,
|
||||
actual_none_in_range
|
||||
);
|
||||
|
||||
// Check convert_optional_u64_range_to_u64_range
|
||||
let u64_range = convert_optional_u64_range_to_u64_range(range.clone());
|
||||
if let Some(v) = val {
|
||||
let in_u64_range = range_contains_u64(&u64_range, &v);
|
||||
let in_opt_range = range_contains_opt(&range, &Some(v));
|
||||
prop_assert_eq!(
|
||||
in_u64_range,
|
||||
in_opt_range,
|
||||
"convert_optional_u64_range_to_u64_range failed for {:?}. Val: {:?}, OptRange: \
|
||||
{:?}, U64Range: {:?}. Opt says {}, U64 says {}",
|
||||
std::any::type_name::<C>(),
|
||||
v,
|
||||
range,
|
||||
u64_range,
|
||||
in_opt_range,
|
||||
in_u64_range
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,7 +99,12 @@ where
|
||||
TSegmentSortKeyComputer: SegmentSortKeyComputer,
|
||||
C: Comparator<TSegmentSortKeyComputer::SegmentSortKey>,
|
||||
{
|
||||
pub(crate) topn_computer: TopNComputer<TSegmentSortKeyComputer::SegmentSortKey, DocId, C>,
|
||||
pub(crate) topn_computer: TopNComputer<
|
||||
TSegmentSortKeyComputer::SegmentSortKey,
|
||||
DocId,
|
||||
C,
|
||||
TSegmentSortKeyComputer::Buffer,
|
||||
>,
|
||||
pub(crate) segment_ord: u32,
|
||||
pub(crate) segment_sort_key_computer: TSegmentSortKeyComputer,
|
||||
}
|
||||
@@ -120,6 +125,11 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
fn collect_block(&mut self, docs: &[DocId]) {
|
||||
self.segment_sort_key_computer
|
||||
.compute_sort_keys_and_collect(docs, &mut self.topn_computer);
|
||||
}
|
||||
|
||||
fn harvest(self) -> Self::Fruit {
|
||||
let segment_ord = self.segment_ord;
|
||||
let segment_hits: Vec<(TSegmentSortKeyComputer::SortKey, DocAddress)> = self
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
|
||||
use columnar::ValueRange;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::Collector;
|
||||
@@ -10,8 +11,7 @@ use crate::collector::sort_key::{
|
||||
SortByStaticFastValue, SortByString,
|
||||
};
|
||||
use crate::collector::sort_key_top_collector::TopBySortKeyCollector;
|
||||
use crate::collector::top_collector::ComparableDoc;
|
||||
use crate::collector::{SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::collector::{ComparableDoc, SegmentSortKeyComputer, SortKeyComputer};
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::{DocAddress, DocId, Order, Score, SegmentReader};
|
||||
|
||||
@@ -481,11 +481,22 @@ where
|
||||
type SortKey = TSortKey;
|
||||
type SegmentSortKey = TSortKey;
|
||||
type SegmentComparator = NaturalComparator;
|
||||
type Buffer = ();
|
||||
|
||||
fn segment_sort_key(&mut self, doc: DocId, score: Score) -> TSortKey {
|
||||
(self.sort_key_fn)(doc, score)
|
||||
}
|
||||
|
||||
fn segment_sort_keys(
|
||||
&mut self,
|
||||
_input_docs: &[DocId],
|
||||
_output: &mut Vec<ComparableDoc<Self::SegmentSortKey, DocId>>,
|
||||
_buffer: &mut Self::Buffer,
|
||||
_filter: ValueRange<Self::SegmentSortKey>,
|
||||
) {
|
||||
unimplemented!("Batch computation is not supported for tweak score.")
|
||||
}
|
||||
|
||||
/// Convert a segment level score into the global level score.
|
||||
fn convert_segment_sort_key(&self, sort_key: Self::SegmentSortKey) -> Self::SortKey {
|
||||
sort_key
|
||||
@@ -509,12 +520,14 @@ where
|
||||
/// the ascending `DocId|DocAddress` tie-breaking behavior without additional comparisons.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(from = "TopNComputerDeser<Score, D, C>")]
|
||||
pub struct TopNComputer<Score, D, C> {
|
||||
pub struct TopNComputer<Score, D, C, Buffer = ()> {
|
||||
/// The buffer reverses sort order to get top-semantics instead of bottom-semantics
|
||||
buffer: Vec<ComparableDoc<Score, D>>,
|
||||
top_n: usize,
|
||||
pub(crate) threshold: Option<Score>,
|
||||
comparator: C,
|
||||
#[serde(skip)]
|
||||
scratch: Buffer,
|
||||
}
|
||||
|
||||
// Intermediate struct for TopNComputer for deserialization, to keep vec capacity
|
||||
@@ -526,7 +539,9 @@ struct TopNComputerDeser<Score, D, C> {
|
||||
comparator: C,
|
||||
}
|
||||
|
||||
impl<Score, D, C> From<TopNComputerDeser<Score, D, C>> for TopNComputer<Score, D, C> {
|
||||
impl<Score, D, C, Buffer> From<TopNComputerDeser<Score, D, C>> for TopNComputer<Score, D, C, Buffer>
|
||||
where Buffer: Default
|
||||
{
|
||||
fn from(mut value: TopNComputerDeser<Score, D, C>) -> Self {
|
||||
let expected_cap = value.top_n.max(1) * 2;
|
||||
let current_cap = value.buffer.capacity();
|
||||
@@ -541,12 +556,15 @@ impl<Score, D, C> From<TopNComputerDeser<Score, D, C>> for TopNComputer<Score, D
|
||||
top_n: value.top_n,
|
||||
threshold: value.threshold,
|
||||
comparator: value.comparator,
|
||||
scratch: Buffer::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Score: std::fmt::Debug, D, C> std::fmt::Debug for TopNComputer<Score, D, C>
|
||||
where C: Comparator<Score>
|
||||
impl<Score: std::fmt::Debug, D, C, Buffer> std::fmt::Debug for TopNComputer<Score, D, C, Buffer>
|
||||
where
|
||||
C: Comparator<Score>,
|
||||
Buffer: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
|
||||
f.debug_struct("TopNComputer")
|
||||
@@ -554,12 +572,13 @@ where C: Comparator<Score>
|
||||
.field("top_n", &self.top_n)
|
||||
.field("current_threshold", &self.threshold)
|
||||
.field("comparator", &self.comparator)
|
||||
.field("scratch", &self.scratch)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// Custom clone to keep capacity
|
||||
impl<Score: Clone, D: Clone, C: Clone> Clone for TopNComputer<Score, D, C> {
|
||||
impl<Score: Clone, D: Clone, C: Clone, Buffer: Clone> Clone for TopNComputer<Score, D, C, Buffer> {
|
||||
fn clone(&self) -> Self {
|
||||
let mut buffer_clone = Vec::with_capacity(self.buffer.capacity());
|
||||
buffer_clone.extend(self.buffer.iter().cloned());
|
||||
@@ -568,15 +587,17 @@ impl<Score: Clone, D: Clone, C: Clone> Clone for TopNComputer<Score, D, C> {
|
||||
top_n: self.top_n,
|
||||
threshold: self.threshold.clone(),
|
||||
comparator: self.comparator.clone(),
|
||||
scratch: self.scratch.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<TSortKey, D> TopNComputer<TSortKey, D, ReverseComparator>
|
||||
impl<TSortKey, D> TopNComputer<TSortKey, D, ReverseComparator, ()>
|
||||
where
|
||||
D: Ord,
|
||||
TSortKey: Clone,
|
||||
NaturalComparator: Comparator<TSortKey>,
|
||||
ReverseComparator: Comparator<TSortKey>,
|
||||
{
|
||||
/// Create a new `TopNComputer`.
|
||||
/// Internally it will allocate a buffer of size `2 * top_n`.
|
||||
@@ -586,7 +607,7 @@ where
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn compare_for_top_k<TSortKey, D: Ord, C: Comparator<TSortKey>>(
|
||||
pub fn compare_for_top_k<TSortKey, D: Ord, C: Comparator<TSortKey>>(
|
||||
c: &C,
|
||||
lhs: &ComparableDoc<TSortKey, D>,
|
||||
rhs: &ComparableDoc<TSortKey, D>,
|
||||
@@ -597,21 +618,26 @@ fn compare_for_top_k<TSortKey, D: Ord, C: Comparator<TSortKey>>(
|
||||
// sort by doc id
|
||||
}
|
||||
|
||||
impl<TSortKey, D, C> TopNComputer<TSortKey, D, C>
|
||||
impl<TSortKey, D, C, Buffer> TopNComputer<TSortKey, D, C, Buffer>
|
||||
where
|
||||
D: Ord,
|
||||
TSortKey: Clone,
|
||||
C: Comparator<TSortKey>,
|
||||
Buffer: Default,
|
||||
{
|
||||
/// Create a new `TopNComputer`.
|
||||
/// Internally it will allocate a buffer of size `2 * top_n`.
|
||||
/// Internally it will allocate a buffer of size `(top_n.max(1) * 2) +
|
||||
/// COLLECT_BLOCK_BUFFER_LEN`.
|
||||
pub fn new_with_comparator(top_n: usize, comparator: C) -> Self {
|
||||
let vec_cap = top_n.max(1) * 2;
|
||||
// We ensure that there is always enough space to include an entire block in the buffer if
|
||||
// need be, so that `push_block_lazy` can avoid checking capacity inside its loop.
|
||||
let vec_cap = (top_n.max(1) * 2) + crate::COLLECT_BLOCK_BUFFER_LEN;
|
||||
TopNComputer {
|
||||
buffer: Vec::with_capacity(vec_cap),
|
||||
top_n,
|
||||
threshold: None,
|
||||
comparator,
|
||||
scratch: Buffer::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -635,16 +661,28 @@ where
|
||||
// At this point, we need to have established that the doc is above the threshold.
|
||||
#[inline(always)]
|
||||
pub(crate) fn append_doc(&mut self, doc: D, sort_key: TSortKey) {
|
||||
if self.buffer.len() == self.buffer.capacity() {
|
||||
let median = self.truncate_top_n();
|
||||
self.threshold = Some(median);
|
||||
}
|
||||
// This cannot panic, because we truncate_median will at least remove one element, since
|
||||
// the min capacity is 2.
|
||||
self.reserve(1);
|
||||
// This cannot panic, because we've reserved room for one element.
|
||||
let comparable_doc = ComparableDoc { doc, sort_key };
|
||||
push_assuming_capacity(comparable_doc, &mut self.buffer);
|
||||
}
|
||||
|
||||
// Ensure that there is capacity to push `additional` more elements without resizing.
|
||||
#[inline(always)]
|
||||
pub(crate) fn reserve(&mut self, additional: usize) {
|
||||
if self.buffer.len() + additional > self.buffer.capacity() {
|
||||
let median = self.truncate_top_n();
|
||||
debug_assert!(self.buffer.len() + additional <= self.buffer.capacity());
|
||||
self.threshold = Some(median);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn buffer_and_scratch(
|
||||
&mut self,
|
||||
) -> (&mut Vec<ComparableDoc<TSortKey, D>>, &mut Buffer) {
|
||||
(&mut self.buffer, &mut self.scratch)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn truncate_top_n(&mut self) -> TSortKey {
|
||||
// Use select_nth_unstable to find the top nth score
|
||||
@@ -684,7 +722,7 @@ where
|
||||
//
|
||||
// Panics if there is not enough capacity to add an element.
|
||||
#[inline(always)]
|
||||
fn push_assuming_capacity<T>(el: T, buf: &mut Vec<T>) {
|
||||
pub fn push_assuming_capacity<T>(el: T, buf: &mut Vec<T>) {
|
||||
let prev_len = buf.len();
|
||||
assert!(prev_len < buf.capacity());
|
||||
// This is mimicking the current (non-stabilized) implementation in std.
|
||||
@@ -702,8 +740,7 @@ mod tests {
|
||||
|
||||
use super::{TopDocs, TopNComputer};
|
||||
use crate::collector::sort_key::{ComparatorEnum, NaturalComparator, ReverseComparator};
|
||||
use crate::collector::top_collector::ComparableDoc;
|
||||
use crate::collector::{Collector, DocSetCollector};
|
||||
use crate::collector::{Collector, ComparableDoc, DocSetCollector};
|
||||
use crate::query::{AllQuery, Query, QueryParser};
|
||||
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
@@ -1408,11 +1445,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_top_field_collect_string_prop(
|
||||
order in prop_oneof!(Just(Order::Desc), Just(Order::Asc)),
|
||||
limit in 1..256_usize,
|
||||
offset in 0..256_usize,
|
||||
limit in 1..32_usize,
|
||||
offset in 0..32_usize,
|
||||
segments_terms in
|
||||
proptest::collection::vec(
|
||||
proptest::collection::vec(0..32_u8, 1..32_usize),
|
||||
proptest::collection::vec(0..64_u8, 1..256_usize),
|
||||
0..8_usize,
|
||||
)
|
||||
) {
|
||||
@@ -1733,7 +1770,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_top_n_computer_not_at_capacity() {
|
||||
let mut top_n_computer = TopNComputer::new_with_comparator(4, NaturalComparator);
|
||||
let mut top_n_computer: TopNComputer<f32, u32, _, ()> =
|
||||
TopNComputer::new_with_comparator(4, NaturalComparator);
|
||||
top_n_computer.append_doc(1, 0.8);
|
||||
top_n_computer.append_doc(3, 0.2);
|
||||
top_n_computer.append_doc(5, 0.3);
|
||||
@@ -1758,7 +1796,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_top_n_computer_at_capacity() {
|
||||
let mut top_collector = TopNComputer::new_with_comparator(4, NaturalComparator);
|
||||
let mut top_collector: TopNComputer<f32, u32, _, ()> =
|
||||
TopNComputer::new_with_comparator(4, NaturalComparator);
|
||||
top_collector.append_doc(1, 0.8);
|
||||
top_collector.append_doc(3, 0.2);
|
||||
top_collector.append_doc(5, 0.3);
|
||||
@@ -1795,12 +1834,14 @@ mod tests {
|
||||
let doc_ids_collection = [4, 5, 6];
|
||||
let score = 3.3f32;
|
||||
|
||||
let mut top_collector_limit_2 = TopNComputer::new_with_comparator(2, NaturalComparator);
|
||||
let mut top_collector_limit_2: TopNComputer<f32, u32, _, ()> =
|
||||
TopNComputer::new_with_comparator(2, NaturalComparator);
|
||||
for id in &doc_ids_collection {
|
||||
top_collector_limit_2.append_doc(*id, score);
|
||||
}
|
||||
|
||||
let mut top_collector_limit_3 = TopNComputer::new_with_comparator(3, NaturalComparator);
|
||||
let mut top_collector_limit_3: TopNComputer<f32, u32, _, ()> =
|
||||
TopNComputer::new_with_comparator(3, NaturalComparator);
|
||||
for id in &doc_ids_collection {
|
||||
top_collector_limit_3.append_doc(*id, score);
|
||||
}
|
||||
@@ -1821,15 +1862,16 @@ mod bench {
|
||||
|
||||
#[bench]
|
||||
fn bench_top_segment_collector_collect_at_capacity(b: &mut Bencher) {
|
||||
let mut top_collector = TopNComputer::new_with_comparator(100, NaturalComparator);
|
||||
let mut top_collector: TopNComputer<f32, u32, _, ()> =
|
||||
TopNComputer::new_with_comparator(100, NaturalComparator);
|
||||
|
||||
for i in 0..100 {
|
||||
top_collector.append_doc(i, 0.8);
|
||||
top_collector.append_doc(i as u32, 0.8);
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
for i in 0..100 {
|
||||
top_collector.append_doc(i, 0.8);
|
||||
top_collector.append_doc(i as u32, 0.8);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ mod tests {
|
||||
use std::ops::{Range, RangeInclusive};
|
||||
use std::path::Path;
|
||||
|
||||
use columnar::StrColumn;
|
||||
use columnar::{StrColumn, ValueRange};
|
||||
use common::{ByteCount, DateTimePrecision, HasLen, TerminatingWrite};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::prelude::SliceRandom;
|
||||
@@ -944,7 +944,7 @@ mod tests {
|
||||
let test_range = |range: RangeInclusive<u64>| {
|
||||
let expected_count = numbers.iter().filter(|num| range.contains(*num)).count();
|
||||
let mut vec = vec![];
|
||||
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||
field.get_row_ids_for_value_range(ValueRange::Inclusive(range), 0..u32::MAX, &mut vec);
|
||||
assert_eq!(vec.len(), expected_count);
|
||||
};
|
||||
test_range(50..=50);
|
||||
@@ -1022,7 +1022,7 @@ mod tests {
|
||||
let test_range = |range: RangeInclusive<u64>| {
|
||||
let expected_count = numbers.iter().filter(|num| range.contains(*num)).count();
|
||||
let mut vec = vec![];
|
||||
field.get_row_ids_for_value_range(range, 0..u32::MAX, &mut vec);
|
||||
field.get_row_ids_for_value_range(ValueRange::Inclusive(range), 0..u32::MAX, &mut vec);
|
||||
assert_eq!(vec.len(), expected_count);
|
||||
};
|
||||
let test_range_variant = |start, stop| {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use core::fmt::Debug;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use columnar::Column;
|
||||
use columnar::{Column, ValueRange};
|
||||
|
||||
use crate::{DocId, DocSet, TERMINATED};
|
||||
|
||||
@@ -41,7 +40,7 @@ impl VecCursor {
|
||||
|
||||
pub(crate) struct RangeDocSet<T> {
|
||||
/// The range filter on the values.
|
||||
value_range: RangeInclusive<T>,
|
||||
value_range: ValueRange<T>,
|
||||
column: Column<T>,
|
||||
/// The next docid start range to fetch (inclusive).
|
||||
next_fetch_start: u32,
|
||||
@@ -61,8 +60,8 @@ pub(crate) struct RangeDocSet<T> {
|
||||
|
||||
const DEFAULT_FETCH_HORIZON: u32 = 128;
|
||||
impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> RangeDocSet<T> {
|
||||
pub(crate) fn new(value_range: RangeInclusive<T>, column: Column<T>) -> Self {
|
||||
if *value_range.start() > column.max_value() || *value_range.end() < column.min_value() {
|
||||
pub(crate) fn new(value_range: ValueRange<T>, column: Column<T>) -> Self {
|
||||
if !value_range.intersects(column.min_value(), column.max_value()) {
|
||||
return Self {
|
||||
value_range,
|
||||
column,
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::ops::{Bound, RangeInclusive};
|
||||
|
||||
use columnar::{
|
||||
Cardinality, Column, ColumnType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
|
||||
NumericalType, StrColumn,
|
||||
NumericalType, StrColumn, ValueRange,
|
||||
};
|
||||
use common::bounds::{BoundsRange, TransformBound};
|
||||
|
||||
@@ -154,7 +154,7 @@ impl Weight for FastFieldRangeWeight {
|
||||
ip_addr_column.min_value(),
|
||||
ip_addr_column.max_value(),
|
||||
);
|
||||
let docset = RangeDocSet::new(value_range, ip_addr_column);
|
||||
let docset = RangeDocSet::new(ValueRange::Inclusive(value_range), ip_addr_column);
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
} else if field_type.is_str() {
|
||||
let Some(str_dict_column): Option<StrColumn> = reader.fast_fields().str(&field_name)?
|
||||
@@ -426,7 +426,7 @@ fn search_on_u64_ff(
|
||||
}
|
||||
}
|
||||
|
||||
let docset = RangeDocSet::new(value_range, column);
|
||||
let docset = RangeDocSet::new(ValueRange::Inclusive(value_range), column);
|
||||
Ok(Box::new(ConstScorer::new(docset, boost)))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user