mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Property test for Comparator/ValueRange consistency, and fixes.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn};
|
||||
use columnar::{Column, ColumnType, ColumnarReader, DynamicColumn, ValueRange};
|
||||
use common::json_path_writer::JSON_PATH_SEGMENT_SEP_STR;
|
||||
use common::DateTime;
|
||||
use regex::Regex;
|
||||
@@ -16,7 +17,7 @@ use crate::aggregation::intermediate_agg_result::{
|
||||
};
|
||||
use crate::aggregation::segment_agg_result::SegmentAggregationCollector;
|
||||
use crate::aggregation::AggregationError;
|
||||
use crate::collector::sort_key::ReverseComparator;
|
||||
use crate::collector::sort_key::{Comparator, ReverseComparator};
|
||||
use crate::collector::TopNComputer;
|
||||
use crate::schema::OwnedValue;
|
||||
use crate::{DocAddress, DocId, SegmentOrdinal};
|
||||
@@ -383,7 +384,7 @@ impl From<FastFieldValue> for OwnedValue {
|
||||
|
||||
/// Holds a fast field value in its u64 representation, and the order in which it should be sorted.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
struct DocValueAndOrder {
|
||||
pub(crate) struct DocValueAndOrder {
|
||||
/// A fast field value in its u64 representation.
|
||||
value: Option<u64>,
|
||||
/// Sort order for the value
|
||||
@@ -455,6 +456,37 @@ impl PartialEq for DocSortValuesAndFields {
|
||||
|
||||
impl Eq for DocSortValuesAndFields {}
|
||||
|
||||
impl Comparator<DocSortValuesAndFields> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &DocSortValuesAndFields, rhs: &DocSortValuesAndFields) -> Ordering {
|
||||
rhs.cmp(lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: DocSortValuesAndFields,
|
||||
) -> ValueRange<DocSortValuesAndFields> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct TopHitsSegmentSortKey(pub Vec<DocValueAndOrder>);
|
||||
|
||||
impl Comparator<TopHitsSegmentSortKey> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &TopHitsSegmentSortKey, rhs: &TopHitsSegmentSortKey) -> Ordering {
|
||||
rhs.cmp(lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(
|
||||
&self,
|
||||
threshold: TopHitsSegmentSortKey,
|
||||
) -> ValueRange<TopHitsSegmentSortKey> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
|
||||
/// The TopHitsCollector used for collecting over segments and merging results.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||
pub struct TopHitsTopNComputer {
|
||||
@@ -518,7 +550,7 @@ impl TopHitsTopNComputer {
|
||||
pub(crate) struct TopHitsSegmentCollector {
|
||||
segment_ordinal: SegmentOrdinal,
|
||||
accessor_idx: usize,
|
||||
top_n: TopNComputer<Vec<DocValueAndOrder>, DocAddress, ReverseComparator>,
|
||||
top_n: TopNComputer<TopHitsSegmentSortKey, DocAddress, ReverseComparator>,
|
||||
}
|
||||
|
||||
impl TopHitsSegmentCollector {
|
||||
@@ -539,13 +571,15 @@ impl TopHitsSegmentCollector {
|
||||
req: &TopHitsAggregationReq,
|
||||
) -> TopHitsTopNComputer {
|
||||
let mut top_hits_computer = TopHitsTopNComputer::new(req);
|
||||
// Map TopHitsSegmentSortKey back to Vec<DocValueAndOrder> if needed or use directly
|
||||
// The TopNComputer here stores TopHitsSegmentSortKey.
|
||||
let top_results = self.top_n.into_vec();
|
||||
|
||||
for res in top_results {
|
||||
let doc_value_fields = req.get_document_field_data(value_accessors, res.doc.doc_id);
|
||||
top_hits_computer.collect(
|
||||
DocSortValuesAndFields {
|
||||
sorts: res.sort_key,
|
||||
sorts: res.sort_key.0,
|
||||
doc_value_fields,
|
||||
},
|
||||
res.doc,
|
||||
@@ -579,7 +613,7 @@ impl TopHitsSegmentCollector {
|
||||
.collect();
|
||||
|
||||
self.top_n.push(
|
||||
sorts,
|
||||
TopHitsSegmentSortKey(sorts),
|
||||
DocAddress {
|
||||
segment_ord: self.segment_ordinal,
|
||||
doc_id,
|
||||
|
||||
@@ -604,7 +604,7 @@ mod tests {
|
||||
segments_data in proptest::collection::vec(
|
||||
proptest::collection::vec(
|
||||
proptest::option::of(0..100u64),
|
||||
1..10_usize // segment size
|
||||
1..1000_usize // segment size
|
||||
),
|
||||
1..4_usize // num segments
|
||||
)
|
||||
|
||||
@@ -142,16 +142,68 @@ impl Comparator<OwnedValue> for NaturalComparator {
|
||||
#[derive(Debug, Copy, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct ReverseComparator;
|
||||
|
||||
impl<T> Comparator<T> for ReverseComparator
|
||||
where NaturalComparator: Comparator<T>
|
||||
macro_rules! impl_reverse_comparator_primitive {
|
||||
($($t:ty),*) => {
|
||||
$(
|
||||
impl Comparator<$t> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &$t, rhs: &$t) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: $t) -> ValueRange<$t> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_reverse_comparator_primitive!(
|
||||
bool,
|
||||
u8,
|
||||
u16,
|
||||
u32,
|
||||
u64,
|
||||
u128,
|
||||
usize,
|
||||
i8,
|
||||
i16,
|
||||
i32,
|
||||
i64,
|
||||
i128,
|
||||
isize,
|
||||
f32,
|
||||
f64,
|
||||
String,
|
||||
crate::DateTime,
|
||||
Vec<u8>,
|
||||
crate::schema::Facet
|
||||
);
|
||||
|
||||
impl<T: PartialOrd + Send + Sync + std::fmt::Debug + Clone + 'static> Comparator<Option<T>>
|
||||
for ReverseComparator
|
||||
{
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &T, rhs: &T) -> Ordering {
|
||||
fn compare(&self, lhs: &Option<T>, rhs: &Option<T>) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: T) -> ValueRange<T> {
|
||||
ValueRange::LessThan(threshold, true)
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
let is_some = threshold.is_some();
|
||||
ValueRange::LessThan(threshold, is_some)
|
||||
}
|
||||
}
|
||||
|
||||
impl Comparator<OwnedValue> for ReverseComparator {
|
||||
#[inline(always)]
|
||||
fn compare(&self, lhs: &OwnedValue, rhs: &OwnedValue) -> Ordering {
|
||||
NaturalComparator.compare(rhs, lhs)
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: OwnedValue) -> ValueRange<OwnedValue> {
|
||||
let is_not_null = !matches!(threshold, OwnedValue::Null);
|
||||
ValueRange::LessThan(threshold, is_not_null)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,7 +233,11 @@ where ReverseComparator: Comparator<T>
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
if threshold.is_some() {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
} else {
|
||||
ValueRange::GreaterThan(threshold, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -284,7 +340,12 @@ where NaturalComparator: Comparator<T>
|
||||
}
|
||||
|
||||
fn threshold_to_valuerange(&self, threshold: Option<T>) -> ValueRange<Option<T>> {
|
||||
ValueRange::GreaterThan(threshold, true)
|
||||
if threshold.is_some() {
|
||||
let is_some = threshold.is_some();
|
||||
ValueRange::GreaterThan(threshold, is_some)
|
||||
} else {
|
||||
ValueRange::LessThan(threshold, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -743,27 +743,39 @@ pub(crate) fn range_contains_none(range: &ValueRange<Option<u64>>) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(&None),
|
||||
ValueRange::GreaterThan(threshold, match_nulls) => *match_nulls || (None > *threshold),
|
||||
ValueRange::LessThan(threshold, match_nulls) => *match_nulls || (None < *threshold),
|
||||
ValueRange::GreaterThan(_threshold, match_nulls) => *match_nulls,
|
||||
ValueRange::LessThan(_threshold, match_nulls) => *match_nulls,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn convert_optional_u64_range_to_u64_range(
|
||||
range: ValueRange<Option<u64>>,
|
||||
) -> ValueRange<u64> {
|
||||
if range_contains_none(&range) {
|
||||
return ValueRange::All;
|
||||
}
|
||||
match range {
|
||||
ValueRange::Inclusive(r) => {
|
||||
let start = r.start().unwrap_or(0);
|
||||
let end = r.end().unwrap_or(u64::MAX);
|
||||
ValueRange::Inclusive(start..=end)
|
||||
}
|
||||
ValueRange::GreaterThan(Some(val), _match_nulls) => ValueRange::GreaterThan(val, false),
|
||||
ValueRange::GreaterThan(None, _match_nulls) => ValueRange::Inclusive(u64::MIN..=u64::MAX),
|
||||
ValueRange::LessThan(None, _match_nulls) => ValueRange::Inclusive(1..=0),
|
||||
_ => ValueRange::All,
|
||||
ValueRange::GreaterThan(Some(val), match_nulls) => {
|
||||
ValueRange::GreaterThan(val, match_nulls)
|
||||
}
|
||||
ValueRange::GreaterThan(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::All
|
||||
} else {
|
||||
ValueRange::Inclusive(u64::MIN..=u64::MAX)
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(None, match_nulls) => {
|
||||
if match_nulls {
|
||||
ValueRange::LessThan(u64::MIN, true)
|
||||
} else {
|
||||
ValueRange::Inclusive(1..=0)
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(Some(val), match_nulls) => ValueRange::LessThan(val, match_nulls),
|
||||
ValueRange::All => ValueRange::All,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -922,3 +934,136 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod proptest_tests {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
use crate::collector::sort_key::order::*;
|
||||
|
||||
// Re-implement logic to interpret ValueRange<Option<u64>> manually to verify expectations
|
||||
fn range_contains_opt(range: &ValueRange<Option<u64>>, val: &Option<u64>) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(val),
|
||||
ValueRange::GreaterThan(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val > t
|
||||
}
|
||||
}
|
||||
ValueRange::LessThan(t, match_nulls) => {
|
||||
if val.is_none() {
|
||||
*match_nulls
|
||||
} else {
|
||||
val < t
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn range_contains_u64(range: &ValueRange<u64>, val: &u64) -> bool {
|
||||
match range {
|
||||
ValueRange::All => true,
|
||||
ValueRange::Inclusive(r) => r.contains(val),
|
||||
ValueRange::GreaterThan(t, _) => val > t,
|
||||
ValueRange::LessThan(t, _) => val < t,
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_comparator_consistency_natural_none_is_lower(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<NaturalComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_reverse(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<ReverseComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_reverse_none_is_lower(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<ReverseNoneIsLowerComparator>(threshold, val)?;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comparator_consistency_natural_none_is_higher(
|
||||
threshold in any::<Option<u64>>(),
|
||||
val in any::<Option<u64>>()
|
||||
) {
|
||||
check_comparator::<NaturalNoneIsHigherComparator>(threshold, val)?;
|
||||
}
|
||||
}
|
||||
|
||||
fn check_comparator<C: Comparator<Option<u64>>>(
|
||||
threshold: Option<u64>,
|
||||
val: Option<u64>,
|
||||
) -> std::result::Result<(), proptest::test_runner::TestCaseError> {
|
||||
let comparator = C::default();
|
||||
let range = comparator.threshold_to_valuerange(threshold);
|
||||
let ordering = comparator.compare(&val, &threshold);
|
||||
let should_be_in_range = ordering == Ordering::Greater;
|
||||
|
||||
let in_range_opt = range_contains_opt(&range, &val);
|
||||
|
||||
prop_assert_eq!(
|
||||
in_range_opt,
|
||||
should_be_in_range,
|
||||
"Comparator consistency failed for {:?}. Threshold: {:?}, Val: {:?}, Range: {:?}, \
|
||||
Ordering: {:?}. range_contains_opt says {}, but compare says {}",
|
||||
std::any::type_name::<C>(),
|
||||
threshold,
|
||||
val,
|
||||
range,
|
||||
ordering,
|
||||
in_range_opt,
|
||||
should_be_in_range
|
||||
);
|
||||
|
||||
// Check range_contains_none
|
||||
let expected_none_in_range = range_contains_opt(&range, &None);
|
||||
let actual_none_in_range = range_contains_none(&range);
|
||||
prop_assert_eq!(
|
||||
actual_none_in_range,
|
||||
expected_none_in_range,
|
||||
"range_contains_none failed for {:?}. Range: {:?}. Expected (from \
|
||||
range_contains_opt): {}, Actual: {}",
|
||||
std::any::type_name::<C>(),
|
||||
range,
|
||||
expected_none_in_range,
|
||||
actual_none_in_range
|
||||
);
|
||||
|
||||
// Check convert_optional_u64_range_to_u64_range
|
||||
let u64_range = convert_optional_u64_range_to_u64_range(range.clone());
|
||||
if let Some(v) = val {
|
||||
let in_u64_range = range_contains_u64(&u64_range, &v);
|
||||
let in_opt_range = range_contains_opt(&range, &Some(v));
|
||||
prop_assert_eq!(
|
||||
in_u64_range,
|
||||
in_opt_range,
|
||||
"convert_optional_u64_range_to_u64_range failed for {:?}. Val: {:?}, OptRange: \
|
||||
{:?}, U64Range: {:?}. Opt says {}, U64 says {}",
|
||||
std::any::type_name::<C>(),
|
||||
v,
|
||||
range,
|
||||
u64_range,
|
||||
in_opt_range,
|
||||
in_u64_range
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -597,6 +597,7 @@ where
|
||||
D: Ord,
|
||||
TSortKey: Clone,
|
||||
NaturalComparator: Comparator<TSortKey>,
|
||||
ReverseComparator: Comparator<TSortKey>,
|
||||
{
|
||||
/// Create a new `TopNComputer`.
|
||||
/// Internally it will allocate a buffer of size `2 * top_n`.
|
||||
|
||||
Reference in New Issue
Block a user