diff --git a/common/src/lib.rs b/common/src/lib.rs index 9dac16de1..f2cf57f9b 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -34,7 +34,8 @@ impl> HasLen for T { } } -const HIGHEST_BIT: u64 = 1 << 63; +const HIGHEST_BIT_64: u64 = 1 << 63; +const HIGHEST_BIT_32: u32 = 1 << 31; /// Maps a `i64` to `u64` /// @@ -58,13 +59,13 @@ const HIGHEST_BIT: u64 = 1 << 63; /// The reverse mapping is [`u64_to_i64()`]. #[inline] pub fn i64_to_u64(val: i64) -> u64 { - (val as u64) ^ HIGHEST_BIT + (val as u64) ^ HIGHEST_BIT_64 } /// Reverse the mapping given by [`i64_to_u64()`]. #[inline] pub fn u64_to_i64(val: u64) -> i64 { - (val ^ HIGHEST_BIT) as i64 + (val ^ HIGHEST_BIT_64) as i64 } /// Maps a `f64` to `u64` @@ -88,7 +89,7 @@ pub fn u64_to_i64(val: u64) -> i64 { pub fn f64_to_u64(val: f64) -> u64 { let bits = val.to_bits(); if val.is_sign_positive() { - bits ^ HIGHEST_BIT + bits ^ HIGHEST_BIT_64 } else { !bits } @@ -97,26 +98,148 @@ pub fn f64_to_u64(val: f64) -> u64 { /// Reverse the mapping given by [`f64_to_u64()`]. #[inline] pub fn u64_to_f64(val: u64) -> f64 { - f64::from_bits(if val & HIGHEST_BIT != 0 { - val ^ HIGHEST_BIT + f64::from_bits(if val & HIGHEST_BIT_64 != 0 { + val ^ HIGHEST_BIT_64 } else { !val }) } +/// Maps a `f32` to `u64` +/// +/// # See also +/// Similar mapping for f64 [`u64_to_f64()`]. +#[inline] +pub fn f32_to_u64(val: f32) -> u64 { + let bits = val.to_bits(); + let res32 = if val.is_sign_positive() { + bits ^ HIGHEST_BIT_32 + } else { + !bits + }; + res32 as u64 +} + +/// Reverse the mapping given by [`f32_to_u64()`]. +#[inline] +pub fn u64_to_f32(val: u64) -> f32 { + debug_assert!(val <= 1 << 32); + let val = val as u32; + f32::from_bits(if val & HIGHEST_BIT_32 != 0 { + val ^ HIGHEST_BIT_32 + } else { + !val + }) +} + +/// Maps a `f64` to a fixed point representation. +/// Lower bound is inclusive, upper bound is exclusive. +/// `precision` is the number of bits used to represent the number. +/// +/// This is a lossy, affine transformation. All provided values must be finite and non-NaN. +/// Care should be taken to not provide values which would cause loss of precision such as values +/// low enough to get sub-normal numbers, value high enough rounding would cause ±Inf to appear, or +/// a precision larger than 50b. +/// +/// # See also +/// The reverse mapping is [`fixed_point_to_f64()`]. +#[inline] +pub fn f64_to_fixed_point(val: f64, min: f64, max: f64, precision: u8) -> u64 { + debug_assert!((1..53).contains(&precision)); + debug_assert!(min < max); + + let delta = max - min; + let mult = (1u64 << precision) as f64; + let bucket_size = delta / mult; + let upper_bound = f64_next_down(max).min(max - bucket_size); + + // due to different cases of rounding error, we need to enforce upper_bound to be + // max-bucket_size, but also that upper_bound < max, which is not given for small enough + // bucket_size. + let val = val.clamp(min, upper_bound); + + let res = (val - min) / bucket_size; + if res.fract() == 0.5 { + res as u64 + } else { + // round down when getting x.5 + res.round() as u64 + } +} + +/// Reverse the mapping given by [`f64_to_fixed_point()`]. +#[inline] +pub fn fixed_point_to_f64(val: u64, min: f64, max: f64, precision: u8) -> f64 { + let delta = max - min; + let mult = (1u64 << precision) as f64; + let bucket_size = delta / mult; + + bucket_size.mul_add(val as f64, min) +} + +// taken from rfc/3173-float-next-up-down, commented out part about nan in infinity as it is not +// needed. +fn f64_next_down(this: f64) -> f64 { + const NEG_TINY_BITS: u64 = 0x8000_0000_0000_0001; + const CLEAR_SIGN_MASK: u64 = 0x7fff_ffff_ffff_ffff; + + let bits = this.to_bits(); + // if this.is_nan() || bits == f64::NEG_INFINITY.to_bits() { + // return this; + // } + let abs = bits & CLEAR_SIGN_MASK; + let next_bits = if abs == 0 { + NEG_TINY_BITS + } else if bits == abs { + bits - 1 + } else { + bits + 1 + }; + f64::from_bits(next_bits) +} + #[cfg(test)] pub mod test { + use std::cmp::Ordering; use proptest::prelude::*; - use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize}; + use super::{ + f32_to_u64, f64_to_fixed_point, f64_to_u64, fixed_point_to_f64, i64_to_u64, u64_to_f32, + u64_to_f64, u64_to_i64, BinarySerializable, FixedSize, + }; fn test_i64_converter_helper(val: i64) { assert_eq!(u64_to_i64(i64_to_u64(val)), val); } fn test_f64_converter_helper(val: f64) { - assert_eq!(u64_to_f64(f64_to_u64(val)), val); + assert_eq!(u64_to_f64(f64_to_u64(val)).total_cmp(&val), Ordering::Equal); + } + + fn test_f32_converter_helper(val: f32) { + assert_eq!(u64_to_f32(f32_to_u64(val)).total_cmp(&val), Ordering::Equal); + } + + fn test_fixed_point_converter_helper(val: f64, min: f64, max: f64, precision: u8) { + let bucket_count = 1 << precision; + + let packed = f64_to_fixed_point(val, min, max, precision); + + assert!(packed < bucket_count, "used to much bits"); + + let depacked = fixed_point_to_f64(packed, min, max, precision); + let repacked = f64_to_fixed_point(depacked, min, max, precision); + + assert_eq!(packed, repacked, "generational loss"); + + let error = (val.clamp(min, crate::f64_next_down(max)) - depacked).abs(); + + let expected = (max - min) / (bucket_count as f64); + assert!( + error <= (max - min) / (bucket_count as f64) * 2.0, + "error larger than expected" + ); } pub fn fixed_size_test() { @@ -125,12 +248,75 @@ pub mod test { assert_eq!(buffer.len(), O::SIZE_IN_BYTES); } + fn fixed_point_bound() -> proptest::num::f64::Any { + proptest::num::f64::POSITIVE + | proptest::num::f64::NEGATIVE + | proptest::num::f64::NORMAL + | proptest::num::f64::ZERO + } + proptest! { #[test] - fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) { + fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::ANY, proptest::num::f64::ANY)) { + test_f64_converter_helper(left); + test_f64_converter_helper(right); + let left_u64 = f64_to_u64(left); let right_u64 = f64_to_u64(right); - assert_eq!(left_u64 < right_u64, left < right); + + assert_eq!(left_u64.cmp(&right_u64), left.total_cmp(&right)); + } + + #[test] + fn test_f32_converter_monotonicity_proptest((left, right) in (proptest::num::f32::ANY, proptest::num::f32::ANY)) { + test_f32_converter_helper(left); + test_f32_converter_helper(right); + + let left_u64 = f32_to_u64(left); + let right_u64 = f32_to_u64(right); + assert_eq!(left_u64.cmp(&right_u64), left.total_cmp(&right)); + } + + #[test] + fn test_fixed_point_converter_proptest((left, right, min, max, precision) in + (fixed_point_bound(), fixed_point_bound(), + fixed_point_bound(), fixed_point_bound(), + proptest::num::u8::ANY)) { + // convert so all input are legal + let (min, max) = if min < max { + (min, max) + } else if min > max { + (max, min) + } else { + return Ok(()); // equals + }; + if 1 > precision || precision >= 50 { + return Ok(()); + } + + let max_full_precision = 53.0 - precision as f64; + if (max / min).abs().log2().abs() > max_full_precision { + return Ok(()); + } + // we will go in subnormal territories => loss of precision + if (((max - min).log2() - precision as f64) as i32) < f64::MIN_EXP { + return Ok(()); + } + + if (max - min).is_infinite() { + return Ok(()); + } + + test_fixed_point_converter_helper(left, min, max, precision); + test_fixed_point_converter_helper(right, min, max, precision); + + let left_u64 = f64_to_fixed_point(left, min, max, precision); + let right_u64 = f64_to_fixed_point(right, min, max, precision); + if left < right { + assert!(left_u64 <= right_u64); + } else if left > right { + assert!(left_u64 >= right_u64) + } } } @@ -168,4 +354,27 @@ pub mod test { assert!(f64_to_u64(-2.0) < f64_to_u64(1.0)); assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5)); } + + #[test] + fn test_f32_converter() { + test_f32_converter_helper(f32::INFINITY); + test_f32_converter_helper(f32::NEG_INFINITY); + test_f32_converter_helper(0.0); + test_f32_converter_helper(-0.0); + test_f32_converter_helper(1.0); + test_f32_converter_helper(-1.0); + } + + #[test] + fn test_f32_order() { + assert!(!(f32_to_u64(f32::NEG_INFINITY)..f32_to_u64(f32::INFINITY)) + .contains(&f32_to_u64(f32::NAN))); // nan is not a number + assert!(f32_to_u64(1.5) > f32_to_u64(1.0)); // same exponent, different mantissa + assert!(f32_to_u64(2.0) > f32_to_u64(1.0)); // same mantissa, different exponent + assert!(f32_to_u64(2.0) > f32_to_u64(1.5)); // different exponent and mantissa + assert!(f32_to_u64(1.0) > f32_to_u64(-1.0)); // pos > neg + assert!(f32_to_u64(-1.5) < f32_to_u64(-1.0)); + assert!(f32_to_u64(-2.0) < f32_to_u64(1.0)); + assert!(f32_to_u64(-2.0) < f32_to_u64(-1.5)); + } }