Support for NotNaN in fast fields

2026-01-05 16:52:55 +00:00 · 2022-12-21 12:20:48 +09:00
parent bb48c3e488
commit 540a9972bd
4 changed files with 44 additions and 7 deletions
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -20,6 +20,7 @@ fastdivide = "0.4"
 log = "0.4"
 itertools = { version = "0.10.3" }
 measure_time = { version="0.8.2", optional=true}
+ordered-float = "3.4"

 [dev-dependencies]
 more-asserts = "0.3.0"
--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -44,6 +44,8 @@ mod column;
 mod gcd;
 mod serialize;

+pub use ordered_float;
+
 use self::bitpacked::BitpackedCodec;
 use self::blockwise_linear::BlockwiseLinearCodec;
 pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};
--- a/fastfield_codecs/src/monotonic_mapping.rs
+++ b/fastfield_codecs/src/monotonic_mapping.rs
@@ -1,6 +1,7 @@
 use std::marker::PhantomData;

 use fastdivide::DividerU64;
+use ordered_float::NotNan;

 use crate::MonotonicallyMappableToU128;

@@ -192,6 +193,8 @@ impl MonotonicallyMappableToU64 for bool {
    }
 }

+// TODO remove me.
+// Tantivy should refuse NaN values and work with NotNaN internally.
 impl MonotonicallyMappableToU64 for f64 {
    fn to_u64(self) -> u64 {
        common::f64_to_u64(self)
@@ -202,11 +205,42 @@ impl MonotonicallyMappableToU64 for f64 {
    }
 }

+impl MonotonicallyMappableToU64 for ordered_float::NotNan<f64> {
+    fn to_u64(self) -> u64 {
+        common::f64_to_u64(self.into_inner())
+    }
+
+    fn from_u64(val: u64) -> Self {
+        NotNan::new(common::u64_to_f64(val)).expect("Invalid NotNaN f64 value.")
+    }
+}
+
 #[cfg(test)]
 mod tests {

    use super::*;

+    #[test]
+    fn test_from_u64_pos_inf() {
+        let inf_as_u64 = common::f64_to_u64(f64::INFINITY);
+        let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
+        assert_eq!(inf_back_to_f64, NotNan::new(f64::INFINITY).unwrap());
+    }
+
+    #[test]
+    fn test_from_u64_neg_inf() {
+        let inf_as_u64 = common::f64_to_u64(-f64::INFINITY);
+        let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
+        assert_eq!(inf_back_to_f64, NotNan::new(-f64::INFINITY).unwrap());
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid NotNaN")]
+    fn test_from_u64_nan_panics() {
+        let nan_as_u64 = common::f64_to_u64(f64::NAN);
+        NotNan::from_u64(nan_as_u64);
+    }
+
    #[test]
    fn strictly_monotonic_test() {
        // identity mapping
--- a/fastfield_codecs/src/serialize.rs
+++ b/fastfield_codecs/src/serialize.rs
@@ -197,12 +197,12 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
 }

 #[allow(dead_code)]
-pub enum ValueIndexInfo {
-    MultiValue(Box<dyn MultiValueIndexInfo>),
-    SingleValue(Box<dyn SingleValueIndexInfo>),
+pub enum ValueIndexInfo<'a> {
+    MultiValue(Box<dyn MultiValueIndexInfo + 'a>),
+    SingleValue(Box<dyn SingleValueIndexInfo + 'a>),
 }

-impl Default for ValueIndexInfo {
+impl Default for ValueIndexInfo<'static> {
    fn default() -> Self {
        struct Dummy {}
        impl SingleValueIndexInfo for Dummy {
@@ -221,7 +221,7 @@ impl Default for ValueIndexInfo {
    }
 }

-impl ValueIndexInfo {
+impl<'a> ValueIndexInfo<'a> {
    fn get_cardinality(&self) -> FastFieldCardinality {
        match self {
            ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi,
@@ -236,7 +236,7 @@ pub trait MultiValueIndexInfo {
    /// The number of values in the column.
    fn num_vals(&self) -> u32;
    /// Return the start index of the values for each doc
-    fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
 }

 pub trait SingleValueIndexInfo {
@@ -245,7 +245,7 @@ pub trait SingleValueIndexInfo {
    /// The number of non-null values in the column.
    fn num_non_nulls(&self) -> u32;
    /// Return a iterator of the positions of docs with a value
-    fn iter(&self) -> Box<dyn Iterator<Item = u32>>;
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
 }

 /// Serializes u128 values with the compact space codec.