Support for NotNaN in fast fields

This commit is contained in:
Paul Masurel
2022-12-21 12:20:48 +09:00
parent bb48c3e488
commit 540a9972bd
4 changed files with 44 additions and 7 deletions

View File

@@ -20,6 +20,7 @@ fastdivide = "0.4"
log = "0.4"
itertools = { version = "0.10.3" }
measure_time = { version="0.8.2", optional=true}
ordered-float = "3.4"
[dev-dependencies]
more-asserts = "0.3.0"

View File

@@ -44,6 +44,8 @@ mod column;
mod gcd;
mod serialize;
pub use ordered_float;
use self::bitpacked::BitpackedCodec;
use self::blockwise_linear::BlockwiseLinearCodec;
pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};

View File

@@ -1,6 +1,7 @@
use std::marker::PhantomData;
use fastdivide::DividerU64;
use ordered_float::NotNan;
use crate::MonotonicallyMappableToU128;
@@ -192,6 +193,8 @@ impl MonotonicallyMappableToU64 for bool {
}
}
// TODO remove me.
// Tantivy should refuse NaN values and work with NotNaN internally.
impl MonotonicallyMappableToU64 for f64 {
fn to_u64(self) -> u64 {
common::f64_to_u64(self)
@@ -202,11 +205,42 @@ impl MonotonicallyMappableToU64 for f64 {
}
}
impl MonotonicallyMappableToU64 for ordered_float::NotNan<f64> {
fn to_u64(self) -> u64 {
common::f64_to_u64(self.into_inner())
}
fn from_u64(val: u64) -> Self {
NotNan::new(common::u64_to_f64(val)).expect("Invalid NotNaN f64 value.")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_u64_pos_inf() {
let inf_as_u64 = common::f64_to_u64(f64::INFINITY);
let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
assert_eq!(inf_back_to_f64, NotNan::new(f64::INFINITY).unwrap());
}
#[test]
fn test_from_u64_neg_inf() {
let inf_as_u64 = common::f64_to_u64(-f64::INFINITY);
let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
assert_eq!(inf_back_to_f64, NotNan::new(-f64::INFINITY).unwrap());
}
#[test]
#[should_panic(expected = "Invalid NotNaN")]
fn test_from_u64_nan_panics() {
let nan_as_u64 = common::f64_to_u64(f64::NAN);
NotNan::from_u64(nan_as_u64);
}
#[test]
fn strictly_monotonic_test() {
// identity mapping

View File

@@ -197,12 +197,12 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
}
#[allow(dead_code)]
pub enum ValueIndexInfo {
MultiValue(Box<dyn MultiValueIndexInfo>),
SingleValue(Box<dyn SingleValueIndexInfo>),
pub enum ValueIndexInfo<'a> {
MultiValue(Box<dyn MultiValueIndexInfo + 'a>),
SingleValue(Box<dyn SingleValueIndexInfo + 'a>),
}
impl Default for ValueIndexInfo {
impl Default for ValueIndexInfo<'static> {
fn default() -> Self {
struct Dummy {}
impl SingleValueIndexInfo for Dummy {
@@ -221,7 +221,7 @@ impl Default for ValueIndexInfo {
}
}
impl ValueIndexInfo {
impl<'a> ValueIndexInfo<'a> {
fn get_cardinality(&self) -> FastFieldCardinality {
match self {
ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi,
@@ -236,7 +236,7 @@ pub trait MultiValueIndexInfo {
/// The number of values in the column.
fn num_vals(&self) -> u32;
/// Return the start index of the values for each doc
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
}
pub trait SingleValueIndexInfo {
@@ -245,7 +245,7 @@ pub trait SingleValueIndexInfo {
/// The number of non-null values in the column.
fn num_non_nulls(&self) -> u32;
/// Return a iterator of the positions of docs with a value
fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
}
/// Serializes u128 values with the compact space codec.