From aeea65f61de7256a018ae3a89be9e680beca9206 Mon Sep 17 00:00:00 2001 From: "cong.xie" Date: Wed, 18 Feb 2026 15:49:12 -0500 Subject: [PATCH] refactor: rewrite encoding.rs with idiomatic Rust - Replace bare constants with FlagType and BinEncodingMode enums - Use const fn for flag byte construction instead of raw bit ops - Replace if-else chain with nested match in decode_from_java_bytes - Use split_first() in read_byte for idiomatic slice consumption - Use split_at in read_f64_le to avoid TryInto on edition 2018 - Use u64::from(next) instead of `next as u64` casts - Extract assert_golden, assert_quantiles_match, bytes_to_hex helpers to reduce duplication across golden byte tests - Fix edition-2018 assert! format string compatibility - Clean up is_valid_flag_byte with let-else and match Co-authored-by: Cursor --- sketches-ddsketch/src/encoding.rs | 449 ++++++++++++++---------------- 1 file changed, 210 insertions(+), 239 deletions(-) diff --git a/sketches-ddsketch/src/encoding.rs b/sketches-ddsketch/src/encoding.rs index 280866863..7425136e3 100644 --- a/sketches-ddsketch/src/encoding.rs +++ b/sketches-ddsketch/src/encoding.rs @@ -6,7 +6,6 @@ //! serialization so that sketches produced in Rust can be deserialized //! and merged by Java consumers. -use std::convert::TryInto; use std::fmt; use crate::config::Config; @@ -14,25 +13,68 @@ use crate::ddsketch::DDSketch; use crate::store::Store; // --------------------------------------------------------------------------- -// Flag byte layout: (subflag << 2) | type_ordinal +// Flag byte layout +// +// Each flag byte packs a 2-bit type ordinal in the low bits and a 6-bit +// subflag in the upper bits: (subflag << 2) | type_ordinal +// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/Flag.java // --------------------------------------------------------------------------- -const FLAG_TYPE_SKETCH_FEATURES: u8 = 0b00; -const FLAG_TYPE_POSITIVE_STORE: u8 = 0b01; -const FLAG_TYPE_INDEX_MAPPING: u8 = 0b10; -const FLAG_TYPE_NEGATIVE_STORE: u8 = 0b11; +/// The 2-bit type field occupying the low bits of every flag byte. +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FlagType { + SketchFeatures = 0, + PositiveStore = 1, + IndexMapping = 2, + NegativeStore = 3, +} -const FLAG_INDEX_MAPPING_LOG: u8 = FLAG_TYPE_INDEX_MAPPING; // 0x02 -const FLAG_ZERO_COUNT: u8 = (1 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x04 -const FLAG_COUNT: u8 = (0x28 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0xA0 -const FLAG_SUM: u8 = (0x21 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x84 -const FLAG_MIN: u8 = (0x22 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x88 -const FLAG_MAX: u8 = (0x23 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x8C +impl FlagType { + fn from_byte(b: u8) -> Option { + match b & 0x03 { + 0 => Some(Self::SketchFeatures), + 1 => Some(Self::PositiveStore), + 2 => Some(Self::IndexMapping), + 3 => Some(Self::NegativeStore), + _ => None, + } + } +} -// BinEncodingMode subflags -const BIN_MODE_INDEX_DELTAS_AND_COUNTS: u8 = 1; -const BIN_MODE_INDEX_DELTAS: u8 = 2; -const BIN_MODE_CONTIGUOUS_COUNTS: u8 = 3; +/// Construct a flag byte from a subflag and a type. +const fn flag(subflag: u8, flag_type: FlagType) -> u8 { + (subflag << 2) | (flag_type as u8) +} + +// Pre-computed flag bytes for the sketch features we encode/decode. +const FLAG_INDEX_MAPPING_LOG: u8 = flag(0, FlagType::IndexMapping); // 0x02 +const FLAG_ZERO_COUNT: u8 = flag(1, FlagType::SketchFeatures); // 0x04 +const FLAG_COUNT: u8 = flag(0x28, FlagType::SketchFeatures); // 0xA0 +const FLAG_SUM: u8 = flag(0x21, FlagType::SketchFeatures); // 0x84 +const FLAG_MIN: u8 = flag(0x22, FlagType::SketchFeatures); // 0x88 +const FLAG_MAX: u8 = flag(0x23, FlagType::SketchFeatures); // 0x8C + +/// BinEncodingMode subflags for store flag bytes. +/// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/BinEncodingMode.java +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BinEncodingMode { + IndexDeltasAndCounts = 1, + IndexDeltas = 2, + ContiguousCounts = 3, +} + +impl BinEncodingMode { + fn from_subflag(subflag: u8) -> Option { + match subflag { + 1 => Some(Self::IndexDeltasAndCounts), + 2 => Some(Self::IndexDeltas), + 3 => Some(Self::ContiguousCounts), + _ => None, + } + } +} const VAR_DOUBLE_ROTATE_DISTANCE: u32 = 6; const MAX_VAR_LEN_64: usize = 9; @@ -51,11 +93,11 @@ pub enum DecodeError { } impl fmt::Display for DecodeError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - DecodeError::UnexpectedEof => write!(f, "unexpected end of input"), - DecodeError::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{:02X}", b), - DecodeError::InvalidData(msg) => write!(f, "invalid data: {}", msg), + Self::UnexpectedEof => write!(f, "unexpected end of input"), + Self::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{b:02X}"), + Self::InvalidData(msg) => write!(f, "invalid data: {msg}"), } } } @@ -64,6 +106,7 @@ impl std::error::Error for DecodeError {} // --------------------------------------------------------------------------- // VarEncoding — bit-exact port of Java VarEncodingHelper +// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/VarEncodingHelper.java // --------------------------------------------------------------------------- fn encode_unsigned_var_long(out: &mut Vec, mut value: u64) { @@ -81,13 +124,14 @@ fn decode_unsigned_var_long(input: &mut &[u8]) -> Result { loop { let next = read_byte(input)?; if next < 0x80 || shift == 56 { - return Ok(value | ((next as u64) << shift)); + return Ok(value | (u64::from(next) << shift)); } - value |= ((next as u64) & 0x7F) << shift; + value |= (u64::from(next) & 0x7F) << shift; shift += 7; } } +/// ZigZag encode then var-long encode. fn encode_signed_var_long(out: &mut Vec, value: i64) { let encoded = ((value >> 63) ^ (value << 1)) as u64; encode_unsigned_var_long(out, encoded); @@ -99,14 +143,14 @@ fn decode_signed_var_long(input: &mut &[u8]) -> Result { } fn double_to_var_bits(value: f64) -> u64 { - let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0_f64)); + let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0)); bits.rotate_left(VAR_DOUBLE_ROTATE_DISTANCE) } fn var_bits_to_double(bits: u64) -> f64 { f64::from_bits( bits.rotate_right(VAR_DOUBLE_ROTATE_DISTANCE) - .wrapping_add(f64::to_bits(1.0_f64)), + .wrapping_add(f64::to_bits(1.0)), ) - 1.0 } @@ -130,30 +174,31 @@ fn decode_var_double(input: &mut &[u8]) -> Result { loop { let next = read_byte(input)?; if shift == 1 { - bits |= next as u64; + bits |= u64::from(next); break; } if next < 0x80 { - bits |= (next as u64) << shift; + bits |= u64::from(next) << shift; break; } - bits |= ((next as u64) & 0x7F) << shift; + bits |= (u64::from(next) & 0x7F) << shift; shift -= 7; } Ok(var_bits_to_double(bits)) } // --------------------------------------------------------------------------- -// Helpers +// Byte-level helpers // --------------------------------------------------------------------------- fn read_byte(input: &mut &[u8]) -> Result { - if input.is_empty() { - return Err(DecodeError::UnexpectedEof); + match input.split_first() { + Some((&byte, rest)) => { + *input = rest; + Ok(byte) + } + None => Err(DecodeError::UnexpectedEof), } - let b = input[0]; - *input = &input[1..]; - Ok(b) } fn write_f64_le(out: &mut Vec, value: f64) { @@ -164,75 +209,79 @@ fn read_f64_le(input: &mut &[u8]) -> Result { if input.len() < 8 { return Err(DecodeError::UnexpectedEof); } - let bytes: [u8; 8] = input[..8].try_into().unwrap(); - *input = &input[8..]; - Ok(f64::from_le_bytes(bytes)) + let (bytes, rest) = input.split_at(8); + *input = rest; + // bytes is guaranteed to be length 8 by the split_at above. + let arr = [ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]; + Ok(f64::from_le_bytes(arr)) } // --------------------------------------------------------------------------- // Store encoding/decoding +// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/store/DenseStore.java (encode/decode methods) // --------------------------------------------------------------------------- -/// Iterate the non-zero bins in the store as (absolute_index, count) pairs. -fn non_zero_bins(store: &Store) -> Vec<(i32, u64)> { +/// Collect non-zero bins in the store as (absolute_index, count) pairs. +/// +/// Allocation is acceptable here: this runs once per encode and the Vec +/// has at most `max_num_bins` entries. +fn collect_non_zero_bins(store: &Store) -> Vec<(i32, u64)> { if store.count == 0 { return Vec::new(); } let start = (store.min_key - store.offset) as usize; - let end = (store.max_key - store.offset + 1) as usize; - let end = end.min(store.bins.len()); - let mut result = Vec::new(); - for i in start..end { - let count = store.bins[i]; - if count > 0 { - result.push((i as i32 + store.offset, count)); - } - } - result + let end = ((store.max_key - store.offset + 1) as usize).min(store.bins.len()); + store.bins[start..end] + .iter() + .enumerate() + .filter(|&(_, &count)| count > 0) + .map(|(i, &count)| (start as i32 + i as i32 + store.offset, count)) + .collect() } -fn encode_store(out: &mut Vec, store: &Store, flag_type: u8) { - let bins = non_zero_bins(store); +fn encode_store(out: &mut Vec, store: &Store, flag_type: FlagType) { + let bins = collect_non_zero_bins(store); if bins.is_empty() { return; } - // INDEX_DELTAS_AND_COUNTS mode - out.push((BIN_MODE_INDEX_DELTAS_AND_COUNTS << 2) | flag_type); + out.push(flag(BinEncodingMode::IndexDeltasAndCounts as u8, flag_type)); encode_unsigned_var_long(out, bins.len() as u64); let mut prev_index: i64 = 0; for &(index, count) in &bins { - encode_signed_var_long(out, (index as i64) - prev_index); + encode_signed_var_long(out, i64::from(index) - prev_index); encode_var_double(out, count as f64); - prev_index = index as i64; + prev_index = i64::from(index); } } fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result { - let mode = subflag; + let mode = BinEncodingMode::from_subflag(subflag).ok_or_else(|| { + DecodeError::InvalidData(format!("unknown bin encoding mode subflag: {subflag}")) + })?; let num_bins = decode_unsigned_var_long(input)? as usize; let mut store = Store::new(bin_limit); match mode { - BIN_MODE_INDEX_DELTAS_AND_COUNTS => { + BinEncodingMode::IndexDeltasAndCounts => { let mut index: i64 = 0; for _ in 0..num_bins { - let delta = decode_signed_var_long(input)?; + index += decode_signed_var_long(input)?; let count = decode_var_double(input)?; - index += delta; store.add_count(index as i32, count as u64); } } - BIN_MODE_INDEX_DELTAS => { + BinEncodingMode::IndexDeltas => { let mut index: i64 = 0; for _ in 0..num_bins { - let delta = decode_signed_var_long(input)?; - index += delta; + index += decode_signed_var_long(input)?; store.add_count(index as i32, 1); } } - BIN_MODE_CONTIGUOUS_COUNTS => { + BinEncodingMode::ContiguousCounts => { let start_index = decode_signed_var_long(input)?; let index_delta = decode_signed_var_long(input)?; let mut index = start_index; @@ -242,12 +291,6 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result { - return Err(DecodeError::InvalidData(format!( - "unknown bin encoding mode subflag: {}", - other - ))); - } } Ok(store) @@ -270,10 +313,9 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result Vec { let mut out = Vec::new(); - let count = sketch.count() as f64; - // --- Summary statistics (DDSketchWithExactSummaryStatistics.encode) --- + // Summary statistics (DDSketchWithExactSummaryStatistics.encode) if count != 0.0 { out.push(FLAG_COUNT); encode_var_double(&mut out, count); @@ -287,24 +329,18 @@ pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec { write_f64_le(&mut out, sketch.sum); } - // --- DDSketch.encode (index mapping + zero count + stores) --- - - // Index mapping (LOG layout, indexOffset = 0.0) + // DDSketch.encode: index mapping + zero count + stores out.push(FLAG_INDEX_MAPPING_LOG); write_f64_le(&mut out, sketch.config.gamma); write_f64_le(&mut out, 0.0_f64); - // Zero count if sketch.zero_count != 0 { out.push(FLAG_ZERO_COUNT); encode_var_double(&mut out, sketch.zero_count as f64); } - // Positive store - encode_store(&mut out, &sketch.store, FLAG_TYPE_POSITIVE_STORE); - - // Negative store - encode_store(&mut out, &sketch.negative_store, FLAG_TYPE_NEGATIVE_STORE); + encode_store(&mut out, &sketch.store, FlagType::PositiveStore); + encode_store(&mut out, &sketch.negative_store, FlagType::NegativeStore); out } @@ -319,12 +355,9 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result { let mut input = bytes; - // Skip optional version prefix (0x02 followed by a valid flag byte) - if input.len() >= 2 && input[0] == 0x02 { - let second = input[1]; - if is_valid_flag_byte(second) { - input = &input[1..]; - } + // Skip optional version prefix (0x02 followed by a valid flag byte). + if input.len() >= 2 && input[0] == 0x02 && is_valid_flag_byte(input[1]) { + input = &input[1..]; } let mut gamma: Option = None; @@ -336,58 +369,51 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result { let mut negative_store: Option = None; while !input.is_empty() { - let flag = read_byte(&mut input)?; - let flag_type = flag & 0x03; - let subflag = flag >> 2; + let flag_byte = read_byte(&mut input)?; + let flag_type = + FlagType::from_byte(flag_byte).ok_or(DecodeError::InvalidFlag(flag_byte))?; + let subflag = flag_byte >> 2; match flag_type { - FLAG_TYPE_INDEX_MAPPING => { + FlagType::IndexMapping => { gamma = Some(read_f64_le(&mut input)?); let _index_offset = read_f64_le(&mut input)?; } - FLAG_TYPE_SKETCH_FEATURES => { - if flag == FLAG_ZERO_COUNT { - zero_count += decode_var_double(&mut input)?; - } else if flag == FLAG_COUNT { + FlagType::SketchFeatures => match flag_byte { + FLAG_ZERO_COUNT => zero_count += decode_var_double(&mut input)?, + FLAG_COUNT => { let _count = decode_var_double(&mut input)?; - } else if flag == FLAG_SUM { - sum = read_f64_le(&mut input)?; - } else if flag == FLAG_MIN { - min = read_f64_le(&mut input)?; - } else if flag == FLAG_MAX { - max = read_f64_le(&mut input)?; - } else { - return Err(DecodeError::InvalidFlag(flag)); } - } - FLAG_TYPE_POSITIVE_STORE => { + FLAG_SUM => sum = read_f64_le(&mut input)?, + FLAG_MIN => min = read_f64_le(&mut input)?, + FLAG_MAX => max = read_f64_le(&mut input)?, + _ => return Err(DecodeError::InvalidFlag(flag_byte)), + }, + FlagType::PositiveStore => { positive_store = Some(decode_store( &mut input, subflag, DEFAULT_MAX_BINS as usize, )?); } - FLAG_TYPE_NEGATIVE_STORE => { + FlagType::NegativeStore => { negative_store = Some(decode_store( &mut input, subflag, DEFAULT_MAX_BINS as usize, )?); } - _ => { - return Err(DecodeError::InvalidFlag(flag)); - } } } let g = gamma.unwrap_or_else(|| Config::defaults().gamma); let config = Config::from_gamma(g); - let pos = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize)); + let store = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize)); let neg = negative_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize)); Ok(DDSketch { config, - store: pos, + store, negative_store: neg, min, max, @@ -397,21 +423,22 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result { } /// Check whether a byte is a valid flag byte for the DDSketch binary format. -/// Used to detect the optional version prefix. fn is_valid_flag_byte(b: u8) -> bool { - matches!( + // Known sketch-feature flags + if matches!( b, FLAG_ZERO_COUNT | FLAG_COUNT | FLAG_SUM | FLAG_MIN | FLAG_MAX | FLAG_INDEX_MAPPING_LOG - ) || { - let flag_type = b & 0x03; - let subflag = b >> 2; - (flag_type == FLAG_TYPE_POSITIVE_STORE || flag_type == FLAG_TYPE_NEGATIVE_STORE) - && (1..=3).contains(&subflag) - } || { - // INDEX_MAPPING with other layouts (LOG_LINEAR=1..LOG_QUARTIC=4) - let flag_type = b & 0x03; - let subflag = b >> 2; - flag_type == FLAG_TYPE_INDEX_MAPPING && subflag <= 4 + ) { + return true; + } + let Some(flag_type) = FlagType::from_byte(b) else { + return false; + }; + let subflag = b >> 2; + match flag_type { + FlagType::PositiveStore | FlagType::NegativeStore => (1..=3).contains(&subflag), + FlagType::IndexMapping => subflag <= 4, // LOG=0, LOG_LINEAR=1 .. LOG_QUARTIC=4 + _ => false, } } @@ -430,7 +457,7 @@ mod tests { fn test_unsigned_var_long_zero() { let mut buf = Vec::new(); encode_unsigned_var_long(&mut buf, 0); - assert_eq!(buf, vec![0x00]); + assert_eq!(buf, [0x00]); let mut input = buf.as_slice(); assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 0); @@ -441,7 +468,7 @@ mod tests { fn test_unsigned_var_long_small() { let mut buf = Vec::new(); encode_unsigned_var_long(&mut buf, 1); - assert_eq!(buf, vec![0x01]); + assert_eq!(buf, [0x01]); let mut input = buf.as_slice(); assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 1); @@ -451,7 +478,7 @@ mod tests { fn test_unsigned_var_long_128() { let mut buf = Vec::new(); encode_unsigned_var_long(&mut buf, 128); - assert_eq!(buf, vec![0x80, 0x01]); + assert_eq!(buf, [0x80, 0x01]); let mut input = buf.as_slice(); assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 128); @@ -459,7 +486,7 @@ mod tests { #[test] fn test_unsigned_var_long_roundtrip() { - for &v in &[0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] { + for v in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] { let mut buf = Vec::new(); encode_unsigned_var_long(&mut buf, v); let mut input = buf.as_slice(); @@ -471,7 +498,7 @@ mod tests { #[test] fn test_signed_var_long_roundtrip() { - for &v in &[0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] { + for v in [0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] { let mut buf = Vec::new(); encode_signed_var_long(&mut buf, v); let mut input = buf.as_slice(); @@ -483,7 +510,7 @@ mod tests { #[test] fn test_var_double_roundtrip() { - for &v in &[ + for v in [ 0.0, 1.0, 2.0, 5.0, 15.0, 42.0, 100.0, 1e-9, 1e15, 0.5, 3.14159, ] { let mut buf = Vec::new(); @@ -494,7 +521,7 @@ mod tests { (decoded - v).abs() < 1e-15 || decoded == v, "roundtrip failed for {}: got {}", v, - decoded + decoded, ); assert!(input.is_empty()); } @@ -502,7 +529,6 @@ mod tests { #[test] fn test_var_double_small_integers() { - // Small non-negative integers should encode compactly let mut buf = Vec::new(); encode_var_double(&mut buf, 1.0); assert_eq!(buf.len(), 1, "VarDouble(1.0) should be 1 byte"); @@ -518,7 +544,6 @@ mod tests { fn test_encode_empty_sketch() { let sketch = DDSketch::new(Config::defaults()); let bytes = sketch.to_java_bytes(); - // Empty sketch: no summary stats, just index mapping assert!(!bytes.is_empty()); let decoded = DDSketch::from_java_bytes(&bytes).unwrap(); @@ -543,17 +568,7 @@ mod tests { assert_eq!(decoded.max(), Some(5.0)); assert_eq!(decoded.sum(), Some(15.0)); - for q in [0.5, 0.9, 0.95, 0.99] { - let orig = sketch.quantile(q).unwrap().unwrap(); - let dec = decoded.quantile(q).unwrap().unwrap(); - assert!( - (orig - dec).abs() / orig.abs().max(1e-15) < 1e-12, - "quantile({}) mismatch: {} vs {}", - q, - orig, - dec - ); - } + assert_quantiles_match(&sketch, &decoded, &[0.5, 0.9, 0.95, 0.99]); } #[test] @@ -585,17 +600,7 @@ mod tests { assert_eq!(decoded.max(), Some(5.0)); assert_eq!(decoded.sum(), Some(3.0)); - for q in [0.0, 0.25, 0.5, 0.75, 1.0] { - let orig = sketch.quantile(q).unwrap().unwrap(); - let dec = decoded.quantile(q).unwrap().unwrap(); - assert!( - (orig - dec).abs() / orig.abs().max(1e-15) < 1e-12, - "quantile({}) mismatch: {} vs {}", - q, - orig, - dec - ); - } + assert_quantiles_match(&sketch, &decoded, &[0.0, 0.25, 0.5, 0.75, 1.0]); } #[test] @@ -655,14 +660,25 @@ mod tests { let bytes = sketch.to_java_bytes(); - // First byte should be FLAG_COUNT (0xA0) since count > 0 assert_eq!(bytes[0], FLAG_COUNT, "first byte should be COUNT flag"); - - // After count + min + max + sum blocks, we should see FLAG_INDEX_MAPPING_LOG (0x02) - let has_mapping = bytes.contains(&FLAG_INDEX_MAPPING_LOG); - assert!(has_mapping, "should contain index mapping flag"); + assert!( + bytes.contains(&FLAG_INDEX_MAPPING_LOG), + "should contain index mapping flag" + ); } + // --- Cross-language golden byte tests --- + // + // Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode() + // using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048). + + const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602"; + const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202"; + const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02"; + const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402"; + const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000"; + const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202"; + fn hex_to_bytes(hex: &str) -> Vec { (0..hex.len()) .step_by(2) @@ -670,14 +686,36 @@ mod tests { .collect() } - // Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode() - // using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048) - const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602"; - const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202"; - const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02"; - const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402"; - const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000"; - const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202"; + fn bytes_to_hex(bytes: &[u8]) -> String { + bytes.iter().map(|b| format!("{b:02x}")).collect() + } + + fn assert_golden(label: &str, sketch: &DDSketch, golden_hex: &str) { + let bytes = sketch.to_java_bytes(); + let expected = hex_to_bytes(golden_hex); + assert_eq!( + bytes, + expected, + "Rust encoding doesn't match Java golden bytes for {}.\nRust: {}\nJava: {}", + label, + bytes_to_hex(&bytes), + golden_hex, + ); + } + + fn assert_quantiles_match(a: &DDSketch, b: &DDSketch, quantiles: &[f64]) { + for &q in quantiles { + let va = a.quantile(q).unwrap().unwrap(); + let vb = b.quantile(q).unwrap().unwrap(); + assert!( + (va - vb).abs() / va.abs().max(1e-15) < 1e-12, + "quantile({}) mismatch: {} vs {}", + q, + va, + vb, + ); + } + } #[test] fn test_cross_language_simple() { @@ -685,36 +723,14 @@ mod tests { for v in [1.0, 2.0, 3.0, 4.0, 5.0] { sketch.add(v); } - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_SIMPLE); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for SIMPLE.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_SIMPLE - ); + assert_golden("SIMPLE", &sketch, GOLDEN_SIMPLE); } #[test] fn test_cross_language_single() { let mut sketch = DDSketch::new(Config::defaults()); sketch.add(42.0); - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_SINGLE); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for SINGLE.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_SINGLE - ); + assert_golden("SINGLE", &sketch, GOLDEN_SINGLE); } #[test] @@ -723,18 +739,7 @@ mod tests { for v in [-3.0, -1.0, 2.0, 5.0] { sketch.add(v); } - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_NEGATIVE); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for NEGATIVE.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_NEGATIVE - ); + assert_golden("NEGATIVE", &sketch, GOLDEN_NEGATIVE); } #[test] @@ -743,35 +748,13 @@ mod tests { for v in [0.0, 1.0, 2.0] { sketch.add(v); } - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_ZERO); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for ZERO.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_ZERO - ); + assert_golden("ZERO", &sketch, GOLDEN_ZERO); } #[test] fn test_cross_language_empty() { let sketch = DDSketch::new(Config::defaults()); - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_EMPTY); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for EMPTY.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_EMPTY - ); + assert_golden("EMPTY", &sketch, GOLDEN_EMPTY); } #[test] @@ -780,23 +763,11 @@ mod tests { for i in 1..=100 { sketch.add(i as f64); } - let bytes = sketch.to_java_bytes(); - let expected = hex_to_bytes(GOLDEN_MANY); - assert_eq!( - bytes, - expected, - "Rust encoding doesn't match Java golden bytes for MANY.\nRust: {}\nJava: {}", - bytes - .iter() - .map(|b| format!("{:02x}", b)) - .collect::(), - GOLDEN_MANY - ); + assert_golden("MANY", &sketch, GOLDEN_MANY); } #[test] fn test_decode_java_golden_bytes() { - // Verify we can decode all Java golden bytes for (name, hex) in [ ("SIMPLE", GOLDEN_SIMPLE), ("SINGLE", GOLDEN_SINGLE), @@ -838,7 +809,7 @@ mod tests { (orig_p95 - dec_p95).abs() / orig_p95 < alpha, "p95 mismatch: {} vs {}", orig_p95, - dec_p95 + dec_p95, ); } }