diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index 1bf7c8b9c..b5a6fa25d 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -151,9 +151,13 @@ impl LinearInterpolFastFieldSerializer { Ok(()) } } + fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 { + if num_vals <= 1 { + return 0.0; + } // We calculate the slope with f64 high precision and use the result in lower precision f32 - // This is done in order to handle estimation for very large values like i64::MAX + // This is done in order to handle estimations for very large values like i64::MAX ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32 } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 8cb7ec089..ad408af1f 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -142,7 +142,8 @@ impl FastFieldReaderCodecWrapper { /// Opens a fast field given a file. pub fn open(file: FileSlice) -> crate::Result { let mut bytes = file.read_bytes()?; - let _id = u8::deserialize(&mut bytes)?; + let id = u8::deserialize(&mut bytes)?; + assert_eq!(BitpackedFastFieldSerializer::>::ID, id); Self::open_from_bytes(bytes) } /// Opens a fast field given the bytes. diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index 8a48199a6..954823a13 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -44,7 +44,7 @@ impl CompositeFastFieldSerializer { Ok(CompositeFastFieldSerializer { composite_write }) } - /// Serialize data into a new u64 fast field. The compression will be detected automatically. + /// Serialize data into a new u64 fast field. The best compression codec will be chosen automatically. pub fn create_auto_detect_u64_fast_field( &mut self, field: Field, @@ -76,8 +76,23 @@ impl CompositeFastFieldSerializer { ); estimations.push((ratio, name, id)); } + if let Some(broken_estimation) = estimations + .iter() + .find(|estimation| estimation.0 == f32::NAN) + { + warn!( + "broken estimation for fast field codec {}", + broken_estimation.1 + ); + } + // removing nan values for codecs with broken calculations, and max values which disables codecs + estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX); estimations.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); let (_ratio, name, id) = estimations[0]; + debug!( + "choosing fast field codec {} for field_id {:?}", + name, field + ); // todo print acutal field name id.serialize(field_write)?; match name { BitpackedFastFieldSerializer::>::NAME => {