refactor: rewrite encoding.rs with idiomatic Rust

- Replace bare constants with FlagType and BinEncodingMode enums
- Use const fn for flag byte construction instead of raw bit ops
- Replace if-else chain with nested match in decode_from_java_bytes
- Use split_first() in read_byte for idiomatic slice consumption
- Use split_at in read_f64_le to avoid TryInto on edition 2018
- Use u64::from(next) instead of `next as u64` casts
- Extract assert_golden, assert_quantiles_match, bytes_to_hex helpers
  to reduce duplication across golden byte tests
- Fix edition-2018 assert! format string compatibility
- Clean up is_valid_flag_byte with let-else and match

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
cong.xie
2026-02-18 15:49:12 -05:00
parent 4211d5a1ed
commit aeea65f61d

View File

@@ -6,7 +6,6 @@
//! serialization so that sketches produced in Rust can be deserialized
//! and merged by Java consumers.
use std::convert::TryInto;
use std::fmt;
use crate::config::Config;
@@ -14,25 +13,68 @@ use crate::ddsketch::DDSketch;
use crate::store::Store;
// ---------------------------------------------------------------------------
// Flag byte layout: (subflag << 2) | type_ordinal
// Flag byte layout
//
// Each flag byte packs a 2-bit type ordinal in the low bits and a 6-bit
// subflag in the upper bits: (subflag << 2) | type_ordinal
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/Flag.java
// ---------------------------------------------------------------------------
const FLAG_TYPE_SKETCH_FEATURES: u8 = 0b00;
const FLAG_TYPE_POSITIVE_STORE: u8 = 0b01;
const FLAG_TYPE_INDEX_MAPPING: u8 = 0b10;
const FLAG_TYPE_NEGATIVE_STORE: u8 = 0b11;
/// The 2-bit type field occupying the low bits of every flag byte.
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FlagType {
SketchFeatures = 0,
PositiveStore = 1,
IndexMapping = 2,
NegativeStore = 3,
}
const FLAG_INDEX_MAPPING_LOG: u8 = FLAG_TYPE_INDEX_MAPPING; // 0x02
const FLAG_ZERO_COUNT: u8 = (1 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x04
const FLAG_COUNT: u8 = (0x28 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0xA0
const FLAG_SUM: u8 = (0x21 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x84
const FLAG_MIN: u8 = (0x22 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x88
const FLAG_MAX: u8 = (0x23 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x8C
impl FlagType {
fn from_byte(b: u8) -> Option<Self> {
match b & 0x03 {
0 => Some(Self::SketchFeatures),
1 => Some(Self::PositiveStore),
2 => Some(Self::IndexMapping),
3 => Some(Self::NegativeStore),
_ => None,
}
}
}
// BinEncodingMode subflags
const BIN_MODE_INDEX_DELTAS_AND_COUNTS: u8 = 1;
const BIN_MODE_INDEX_DELTAS: u8 = 2;
const BIN_MODE_CONTIGUOUS_COUNTS: u8 = 3;
/// Construct a flag byte from a subflag and a type.
const fn flag(subflag: u8, flag_type: FlagType) -> u8 {
(subflag << 2) | (flag_type as u8)
}
// Pre-computed flag bytes for the sketch features we encode/decode.
const FLAG_INDEX_MAPPING_LOG: u8 = flag(0, FlagType::IndexMapping); // 0x02
const FLAG_ZERO_COUNT: u8 = flag(1, FlagType::SketchFeatures); // 0x04
const FLAG_COUNT: u8 = flag(0x28, FlagType::SketchFeatures); // 0xA0
const FLAG_SUM: u8 = flag(0x21, FlagType::SketchFeatures); // 0x84
const FLAG_MIN: u8 = flag(0x22, FlagType::SketchFeatures); // 0x88
const FLAG_MAX: u8 = flag(0x23, FlagType::SketchFeatures); // 0x8C
/// BinEncodingMode subflags for store flag bytes.
/// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/BinEncodingMode.java
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum BinEncodingMode {
IndexDeltasAndCounts = 1,
IndexDeltas = 2,
ContiguousCounts = 3,
}
impl BinEncodingMode {
fn from_subflag(subflag: u8) -> Option<Self> {
match subflag {
1 => Some(Self::IndexDeltasAndCounts),
2 => Some(Self::IndexDeltas),
3 => Some(Self::ContiguousCounts),
_ => None,
}
}
}
const VAR_DOUBLE_ROTATE_DISTANCE: u32 = 6;
const MAX_VAR_LEN_64: usize = 9;
@@ -51,11 +93,11 @@ pub enum DecodeError {
}
impl fmt::Display for DecodeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DecodeError::UnexpectedEof => write!(f, "unexpected end of input"),
DecodeError::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{:02X}", b),
DecodeError::InvalidData(msg) => write!(f, "invalid data: {}", msg),
Self::UnexpectedEof => write!(f, "unexpected end of input"),
Self::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{b:02X}"),
Self::InvalidData(msg) => write!(f, "invalid data: {msg}"),
}
}
}
@@ -64,6 +106,7 @@ impl std::error::Error for DecodeError {}
// ---------------------------------------------------------------------------
// VarEncoding — bit-exact port of Java VarEncodingHelper
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/VarEncodingHelper.java
// ---------------------------------------------------------------------------
fn encode_unsigned_var_long(out: &mut Vec<u8>, mut value: u64) {
@@ -81,13 +124,14 @@ fn decode_unsigned_var_long(input: &mut &[u8]) -> Result<u64, DecodeError> {
loop {
let next = read_byte(input)?;
if next < 0x80 || shift == 56 {
return Ok(value | ((next as u64) << shift));
return Ok(value | (u64::from(next) << shift));
}
value |= ((next as u64) & 0x7F) << shift;
value |= (u64::from(next) & 0x7F) << shift;
shift += 7;
}
}
/// ZigZag encode then var-long encode.
fn encode_signed_var_long(out: &mut Vec<u8>, value: i64) {
let encoded = ((value >> 63) ^ (value << 1)) as u64;
encode_unsigned_var_long(out, encoded);
@@ -99,14 +143,14 @@ fn decode_signed_var_long(input: &mut &[u8]) -> Result<i64, DecodeError> {
}
fn double_to_var_bits(value: f64) -> u64 {
let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0_f64));
let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0));
bits.rotate_left(VAR_DOUBLE_ROTATE_DISTANCE)
}
fn var_bits_to_double(bits: u64) -> f64 {
f64::from_bits(
bits.rotate_right(VAR_DOUBLE_ROTATE_DISTANCE)
.wrapping_add(f64::to_bits(1.0_f64)),
.wrapping_add(f64::to_bits(1.0)),
) - 1.0
}
@@ -130,30 +174,31 @@ fn decode_var_double(input: &mut &[u8]) -> Result<f64, DecodeError> {
loop {
let next = read_byte(input)?;
if shift == 1 {
bits |= next as u64;
bits |= u64::from(next);
break;
}
if next < 0x80 {
bits |= (next as u64) << shift;
bits |= u64::from(next) << shift;
break;
}
bits |= ((next as u64) & 0x7F) << shift;
bits |= (u64::from(next) & 0x7F) << shift;
shift -= 7;
}
Ok(var_bits_to_double(bits))
}
// ---------------------------------------------------------------------------
// Helpers
// Byte-level helpers
// ---------------------------------------------------------------------------
fn read_byte(input: &mut &[u8]) -> Result<u8, DecodeError> {
if input.is_empty() {
return Err(DecodeError::UnexpectedEof);
match input.split_first() {
Some((&byte, rest)) => {
*input = rest;
Ok(byte)
}
None => Err(DecodeError::UnexpectedEof),
}
let b = input[0];
*input = &input[1..];
Ok(b)
}
fn write_f64_le(out: &mut Vec<u8>, value: f64) {
@@ -164,75 +209,79 @@ fn read_f64_le(input: &mut &[u8]) -> Result<f64, DecodeError> {
if input.len() < 8 {
return Err(DecodeError::UnexpectedEof);
}
let bytes: [u8; 8] = input[..8].try_into().unwrap();
*input = &input[8..];
Ok(f64::from_le_bytes(bytes))
let (bytes, rest) = input.split_at(8);
*input = rest;
// bytes is guaranteed to be length 8 by the split_at above.
let arr = [
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
];
Ok(f64::from_le_bytes(arr))
}
// ---------------------------------------------------------------------------
// Store encoding/decoding
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/store/DenseStore.java (encode/decode methods)
// ---------------------------------------------------------------------------
/// Iterate the non-zero bins in the store as (absolute_index, count) pairs.
fn non_zero_bins(store: &Store) -> Vec<(i32, u64)> {
/// Collect non-zero bins in the store as (absolute_index, count) pairs.
///
/// Allocation is acceptable here: this runs once per encode and the Vec
/// has at most `max_num_bins` entries.
fn collect_non_zero_bins(store: &Store) -> Vec<(i32, u64)> {
if store.count == 0 {
return Vec::new();
}
let start = (store.min_key - store.offset) as usize;
let end = (store.max_key - store.offset + 1) as usize;
let end = end.min(store.bins.len());
let mut result = Vec::new();
for i in start..end {
let count = store.bins[i];
if count > 0 {
result.push((i as i32 + store.offset, count));
}
}
result
let end = ((store.max_key - store.offset + 1) as usize).min(store.bins.len());
store.bins[start..end]
.iter()
.enumerate()
.filter(|&(_, &count)| count > 0)
.map(|(i, &count)| (start as i32 + i as i32 + store.offset, count))
.collect()
}
fn encode_store(out: &mut Vec<u8>, store: &Store, flag_type: u8) {
let bins = non_zero_bins(store);
fn encode_store(out: &mut Vec<u8>, store: &Store, flag_type: FlagType) {
let bins = collect_non_zero_bins(store);
if bins.is_empty() {
return;
}
// INDEX_DELTAS_AND_COUNTS mode
out.push((BIN_MODE_INDEX_DELTAS_AND_COUNTS << 2) | flag_type);
out.push(flag(BinEncodingMode::IndexDeltasAndCounts as u8, flag_type));
encode_unsigned_var_long(out, bins.len() as u64);
let mut prev_index: i64 = 0;
for &(index, count) in &bins {
encode_signed_var_long(out, (index as i64) - prev_index);
encode_signed_var_long(out, i64::from(index) - prev_index);
encode_var_double(out, count as f64);
prev_index = index as i64;
prev_index = i64::from(index);
}
}
fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Store, DecodeError> {
let mode = subflag;
let mode = BinEncodingMode::from_subflag(subflag).ok_or_else(|| {
DecodeError::InvalidData(format!("unknown bin encoding mode subflag: {subflag}"))
})?;
let num_bins = decode_unsigned_var_long(input)? as usize;
let mut store = Store::new(bin_limit);
match mode {
BIN_MODE_INDEX_DELTAS_AND_COUNTS => {
BinEncodingMode::IndexDeltasAndCounts => {
let mut index: i64 = 0;
for _ in 0..num_bins {
let delta = decode_signed_var_long(input)?;
index += decode_signed_var_long(input)?;
let count = decode_var_double(input)?;
index += delta;
store.add_count(index as i32, count as u64);
}
}
BIN_MODE_INDEX_DELTAS => {
BinEncodingMode::IndexDeltas => {
let mut index: i64 = 0;
for _ in 0..num_bins {
let delta = decode_signed_var_long(input)?;
index += delta;
index += decode_signed_var_long(input)?;
store.add_count(index as i32, 1);
}
}
BIN_MODE_CONTIGUOUS_COUNTS => {
BinEncodingMode::ContiguousCounts => {
let start_index = decode_signed_var_long(input)?;
let index_delta = decode_signed_var_long(input)?;
let mut index = start_index;
@@ -242,12 +291,6 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Stor
index += index_delta;
}
}
other => {
return Err(DecodeError::InvalidData(format!(
"unknown bin encoding mode subflag: {}",
other
)));
}
}
Ok(store)
@@ -270,10 +313,9 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Stor
/// 6. Negative store bins
pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec<u8> {
let mut out = Vec::new();
let count = sketch.count() as f64;
// --- Summary statistics (DDSketchWithExactSummaryStatistics.encode) ---
// Summary statistics (DDSketchWithExactSummaryStatistics.encode)
if count != 0.0 {
out.push(FLAG_COUNT);
encode_var_double(&mut out, count);
@@ -287,24 +329,18 @@ pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec<u8> {
write_f64_le(&mut out, sketch.sum);
}
// --- DDSketch.encode (index mapping + zero count + stores) ---
// Index mapping (LOG layout, indexOffset = 0.0)
// DDSketch.encode: index mapping + zero count + stores
out.push(FLAG_INDEX_MAPPING_LOG);
write_f64_le(&mut out, sketch.config.gamma);
write_f64_le(&mut out, 0.0_f64);
// Zero count
if sketch.zero_count != 0 {
out.push(FLAG_ZERO_COUNT);
encode_var_double(&mut out, sketch.zero_count as f64);
}
// Positive store
encode_store(&mut out, &sketch.store, FLAG_TYPE_POSITIVE_STORE);
// Negative store
encode_store(&mut out, &sketch.negative_store, FLAG_TYPE_NEGATIVE_STORE);
encode_store(&mut out, &sketch.store, FlagType::PositiveStore);
encode_store(&mut out, &sketch.negative_store, FlagType::NegativeStore);
out
}
@@ -319,12 +355,9 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
let mut input = bytes;
// Skip optional version prefix (0x02 followed by a valid flag byte)
if input.len() >= 2 && input[0] == 0x02 {
let second = input[1];
if is_valid_flag_byte(second) {
input = &input[1..];
}
// Skip optional version prefix (0x02 followed by a valid flag byte).
if input.len() >= 2 && input[0] == 0x02 && is_valid_flag_byte(input[1]) {
input = &input[1..];
}
let mut gamma: Option<f64> = None;
@@ -336,58 +369,51 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
let mut negative_store: Option<Store> = None;
while !input.is_empty() {
let flag = read_byte(&mut input)?;
let flag_type = flag & 0x03;
let subflag = flag >> 2;
let flag_byte = read_byte(&mut input)?;
let flag_type =
FlagType::from_byte(flag_byte).ok_or(DecodeError::InvalidFlag(flag_byte))?;
let subflag = flag_byte >> 2;
match flag_type {
FLAG_TYPE_INDEX_MAPPING => {
FlagType::IndexMapping => {
gamma = Some(read_f64_le(&mut input)?);
let _index_offset = read_f64_le(&mut input)?;
}
FLAG_TYPE_SKETCH_FEATURES => {
if flag == FLAG_ZERO_COUNT {
zero_count += decode_var_double(&mut input)?;
} else if flag == FLAG_COUNT {
FlagType::SketchFeatures => match flag_byte {
FLAG_ZERO_COUNT => zero_count += decode_var_double(&mut input)?,
FLAG_COUNT => {
let _count = decode_var_double(&mut input)?;
} else if flag == FLAG_SUM {
sum = read_f64_le(&mut input)?;
} else if flag == FLAG_MIN {
min = read_f64_le(&mut input)?;
} else if flag == FLAG_MAX {
max = read_f64_le(&mut input)?;
} else {
return Err(DecodeError::InvalidFlag(flag));
}
}
FLAG_TYPE_POSITIVE_STORE => {
FLAG_SUM => sum = read_f64_le(&mut input)?,
FLAG_MIN => min = read_f64_le(&mut input)?,
FLAG_MAX => max = read_f64_le(&mut input)?,
_ => return Err(DecodeError::InvalidFlag(flag_byte)),
},
FlagType::PositiveStore => {
positive_store = Some(decode_store(
&mut input,
subflag,
DEFAULT_MAX_BINS as usize,
)?);
}
FLAG_TYPE_NEGATIVE_STORE => {
FlagType::NegativeStore => {
negative_store = Some(decode_store(
&mut input,
subflag,
DEFAULT_MAX_BINS as usize,
)?);
}
_ => {
return Err(DecodeError::InvalidFlag(flag));
}
}
}
let g = gamma.unwrap_or_else(|| Config::defaults().gamma);
let config = Config::from_gamma(g);
let pos = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
let store = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
let neg = negative_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
Ok(DDSketch {
config,
store: pos,
store,
negative_store: neg,
min,
max,
@@ -397,21 +423,22 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
}
/// Check whether a byte is a valid flag byte for the DDSketch binary format.
/// Used to detect the optional version prefix.
fn is_valid_flag_byte(b: u8) -> bool {
matches!(
// Known sketch-feature flags
if matches!(
b,
FLAG_ZERO_COUNT | FLAG_COUNT | FLAG_SUM | FLAG_MIN | FLAG_MAX | FLAG_INDEX_MAPPING_LOG
) || {
let flag_type = b & 0x03;
let subflag = b >> 2;
(flag_type == FLAG_TYPE_POSITIVE_STORE || flag_type == FLAG_TYPE_NEGATIVE_STORE)
&& (1..=3).contains(&subflag)
} || {
// INDEX_MAPPING with other layouts (LOG_LINEAR=1..LOG_QUARTIC=4)
let flag_type = b & 0x03;
let subflag = b >> 2;
flag_type == FLAG_TYPE_INDEX_MAPPING && subflag <= 4
) {
return true;
}
let Some(flag_type) = FlagType::from_byte(b) else {
return false;
};
let subflag = b >> 2;
match flag_type {
FlagType::PositiveStore | FlagType::NegativeStore => (1..=3).contains(&subflag),
FlagType::IndexMapping => subflag <= 4, // LOG=0, LOG_LINEAR=1 .. LOG_QUARTIC=4
_ => false,
}
}
@@ -430,7 +457,7 @@ mod tests {
fn test_unsigned_var_long_zero() {
let mut buf = Vec::new();
encode_unsigned_var_long(&mut buf, 0);
assert_eq!(buf, vec![0x00]);
assert_eq!(buf, [0x00]);
let mut input = buf.as_slice();
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 0);
@@ -441,7 +468,7 @@ mod tests {
fn test_unsigned_var_long_small() {
let mut buf = Vec::new();
encode_unsigned_var_long(&mut buf, 1);
assert_eq!(buf, vec![0x01]);
assert_eq!(buf, [0x01]);
let mut input = buf.as_slice();
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 1);
@@ -451,7 +478,7 @@ mod tests {
fn test_unsigned_var_long_128() {
let mut buf = Vec::new();
encode_unsigned_var_long(&mut buf, 128);
assert_eq!(buf, vec![0x80, 0x01]);
assert_eq!(buf, [0x80, 0x01]);
let mut input = buf.as_slice();
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 128);
@@ -459,7 +486,7 @@ mod tests {
#[test]
fn test_unsigned_var_long_roundtrip() {
for &v in &[0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
for v in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
let mut buf = Vec::new();
encode_unsigned_var_long(&mut buf, v);
let mut input = buf.as_slice();
@@ -471,7 +498,7 @@ mod tests {
#[test]
fn test_signed_var_long_roundtrip() {
for &v in &[0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] {
for v in [0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] {
let mut buf = Vec::new();
encode_signed_var_long(&mut buf, v);
let mut input = buf.as_slice();
@@ -483,7 +510,7 @@ mod tests {
#[test]
fn test_var_double_roundtrip() {
for &v in &[
for v in [
0.0, 1.0, 2.0, 5.0, 15.0, 42.0, 100.0, 1e-9, 1e15, 0.5, 3.14159,
] {
let mut buf = Vec::new();
@@ -494,7 +521,7 @@ mod tests {
(decoded - v).abs() < 1e-15 || decoded == v,
"roundtrip failed for {}: got {}",
v,
decoded
decoded,
);
assert!(input.is_empty());
}
@@ -502,7 +529,6 @@ mod tests {
#[test]
fn test_var_double_small_integers() {
// Small non-negative integers should encode compactly
let mut buf = Vec::new();
encode_var_double(&mut buf, 1.0);
assert_eq!(buf.len(), 1, "VarDouble(1.0) should be 1 byte");
@@ -518,7 +544,6 @@ mod tests {
fn test_encode_empty_sketch() {
let sketch = DDSketch::new(Config::defaults());
let bytes = sketch.to_java_bytes();
// Empty sketch: no summary stats, just index mapping
assert!(!bytes.is_empty());
let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
@@ -543,17 +568,7 @@ mod tests {
assert_eq!(decoded.max(), Some(5.0));
assert_eq!(decoded.sum(), Some(15.0));
for q in [0.5, 0.9, 0.95, 0.99] {
let orig = sketch.quantile(q).unwrap().unwrap();
let dec = decoded.quantile(q).unwrap().unwrap();
assert!(
(orig - dec).abs() / orig.abs().max(1e-15) < 1e-12,
"quantile({}) mismatch: {} vs {}",
q,
orig,
dec
);
}
assert_quantiles_match(&sketch, &decoded, &[0.5, 0.9, 0.95, 0.99]);
}
#[test]
@@ -585,17 +600,7 @@ mod tests {
assert_eq!(decoded.max(), Some(5.0));
assert_eq!(decoded.sum(), Some(3.0));
for q in [0.0, 0.25, 0.5, 0.75, 1.0] {
let orig = sketch.quantile(q).unwrap().unwrap();
let dec = decoded.quantile(q).unwrap().unwrap();
assert!(
(orig - dec).abs() / orig.abs().max(1e-15) < 1e-12,
"quantile({}) mismatch: {} vs {}",
q,
orig,
dec
);
}
assert_quantiles_match(&sketch, &decoded, &[0.0, 0.25, 0.5, 0.75, 1.0]);
}
#[test]
@@ -655,14 +660,25 @@ mod tests {
let bytes = sketch.to_java_bytes();
// First byte should be FLAG_COUNT (0xA0) since count > 0
assert_eq!(bytes[0], FLAG_COUNT, "first byte should be COUNT flag");
// After count + min + max + sum blocks, we should see FLAG_INDEX_MAPPING_LOG (0x02)
let has_mapping = bytes.contains(&FLAG_INDEX_MAPPING_LOG);
assert!(has_mapping, "should contain index mapping flag");
assert!(
bytes.contains(&FLAG_INDEX_MAPPING_LOG),
"should contain index mapping flag"
);
}
// --- Cross-language golden byte tests ---
//
// Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode()
// using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048).
const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602";
const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202";
const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02";
const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402";
const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000";
const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202";
fn hex_to_bytes(hex: &str) -> Vec<u8> {
(0..hex.len())
.step_by(2)
@@ -670,14 +686,36 @@ mod tests {
.collect()
}
// Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode()
// using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048)
const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602";
const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202";
const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02";
const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402";
const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000";
const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202";
fn bytes_to_hex(bytes: &[u8]) -> String {
bytes.iter().map(|b| format!("{b:02x}")).collect()
}
fn assert_golden(label: &str, sketch: &DDSketch, golden_hex: &str) {
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(golden_hex);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for {}.\nRust: {}\nJava: {}",
label,
bytes_to_hex(&bytes),
golden_hex,
);
}
fn assert_quantiles_match(a: &DDSketch, b: &DDSketch, quantiles: &[f64]) {
for &q in quantiles {
let va = a.quantile(q).unwrap().unwrap();
let vb = b.quantile(q).unwrap().unwrap();
assert!(
(va - vb).abs() / va.abs().max(1e-15) < 1e-12,
"quantile({}) mismatch: {} vs {}",
q,
va,
vb,
);
}
}
#[test]
fn test_cross_language_simple() {
@@ -685,36 +723,14 @@ mod tests {
for v in [1.0, 2.0, 3.0, 4.0, 5.0] {
sketch.add(v);
}
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_SIMPLE);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for SIMPLE.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_SIMPLE
);
assert_golden("SIMPLE", &sketch, GOLDEN_SIMPLE);
}
#[test]
fn test_cross_language_single() {
let mut sketch = DDSketch::new(Config::defaults());
sketch.add(42.0);
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_SINGLE);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for SINGLE.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_SINGLE
);
assert_golden("SINGLE", &sketch, GOLDEN_SINGLE);
}
#[test]
@@ -723,18 +739,7 @@ mod tests {
for v in [-3.0, -1.0, 2.0, 5.0] {
sketch.add(v);
}
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_NEGATIVE);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for NEGATIVE.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_NEGATIVE
);
assert_golden("NEGATIVE", &sketch, GOLDEN_NEGATIVE);
}
#[test]
@@ -743,35 +748,13 @@ mod tests {
for v in [0.0, 1.0, 2.0] {
sketch.add(v);
}
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_ZERO);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for ZERO.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_ZERO
);
assert_golden("ZERO", &sketch, GOLDEN_ZERO);
}
#[test]
fn test_cross_language_empty() {
let sketch = DDSketch::new(Config::defaults());
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_EMPTY);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for EMPTY.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_EMPTY
);
assert_golden("EMPTY", &sketch, GOLDEN_EMPTY);
}
#[test]
@@ -780,23 +763,11 @@ mod tests {
for i in 1..=100 {
sketch.add(i as f64);
}
let bytes = sketch.to_java_bytes();
let expected = hex_to_bytes(GOLDEN_MANY);
assert_eq!(
bytes,
expected,
"Rust encoding doesn't match Java golden bytes for MANY.\nRust: {}\nJava: {}",
bytes
.iter()
.map(|b| format!("{:02x}", b))
.collect::<String>(),
GOLDEN_MANY
);
assert_golden("MANY", &sketch, GOLDEN_MANY);
}
#[test]
fn test_decode_java_golden_bytes() {
// Verify we can decode all Java golden bytes
for (name, hex) in [
("SIMPLE", GOLDEN_SIMPLE),
("SINGLE", GOLDEN_SINGLE),
@@ -838,7 +809,7 @@ mod tests {
(orig_p95 - dec_p95).abs() / orig_p95 < alpha,
"p95 mismatch: {} vs {}",
orig_p95,
dec_p95
dec_p95,
);
}
}