mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-01 16:10:42 +00:00
refactor: rewrite encoding.rs with idiomatic Rust
- Replace bare constants with FlagType and BinEncodingMode enums - Use const fn for flag byte construction instead of raw bit ops - Replace if-else chain with nested match in decode_from_java_bytes - Use split_first() in read_byte for idiomatic slice consumption - Use split_at in read_f64_le to avoid TryInto on edition 2018 - Use u64::from(next) instead of `next as u64` casts - Extract assert_golden, assert_quantiles_match, bytes_to_hex helpers to reduce duplication across golden byte tests - Fix edition-2018 assert! format string compatibility - Clean up is_valid_flag_byte with let-else and match Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -6,7 +6,6 @@
|
||||
//! serialization so that sketches produced in Rust can be deserialized
|
||||
//! and merged by Java consumers.
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::fmt;
|
||||
|
||||
use crate::config::Config;
|
||||
@@ -14,25 +13,68 @@ use crate::ddsketch::DDSketch;
|
||||
use crate::store::Store;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Flag byte layout: (subflag << 2) | type_ordinal
|
||||
// Flag byte layout
|
||||
//
|
||||
// Each flag byte packs a 2-bit type ordinal in the low bits and a 6-bit
|
||||
// subflag in the upper bits: (subflag << 2) | type_ordinal
|
||||
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/Flag.java
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FLAG_TYPE_SKETCH_FEATURES: u8 = 0b00;
|
||||
const FLAG_TYPE_POSITIVE_STORE: u8 = 0b01;
|
||||
const FLAG_TYPE_INDEX_MAPPING: u8 = 0b10;
|
||||
const FLAG_TYPE_NEGATIVE_STORE: u8 = 0b11;
|
||||
/// The 2-bit type field occupying the low bits of every flag byte.
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum FlagType {
|
||||
SketchFeatures = 0,
|
||||
PositiveStore = 1,
|
||||
IndexMapping = 2,
|
||||
NegativeStore = 3,
|
||||
}
|
||||
|
||||
const FLAG_INDEX_MAPPING_LOG: u8 = FLAG_TYPE_INDEX_MAPPING; // 0x02
|
||||
const FLAG_ZERO_COUNT: u8 = (1 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x04
|
||||
const FLAG_COUNT: u8 = (0x28 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0xA0
|
||||
const FLAG_SUM: u8 = (0x21 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x84
|
||||
const FLAG_MIN: u8 = (0x22 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x88
|
||||
const FLAG_MAX: u8 = (0x23 << 2) | FLAG_TYPE_SKETCH_FEATURES; // 0x8C
|
||||
impl FlagType {
|
||||
fn from_byte(b: u8) -> Option<Self> {
|
||||
match b & 0x03 {
|
||||
0 => Some(Self::SketchFeatures),
|
||||
1 => Some(Self::PositiveStore),
|
||||
2 => Some(Self::IndexMapping),
|
||||
3 => Some(Self::NegativeStore),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BinEncodingMode subflags
|
||||
const BIN_MODE_INDEX_DELTAS_AND_COUNTS: u8 = 1;
|
||||
const BIN_MODE_INDEX_DELTAS: u8 = 2;
|
||||
const BIN_MODE_CONTIGUOUS_COUNTS: u8 = 3;
|
||||
/// Construct a flag byte from a subflag and a type.
|
||||
const fn flag(subflag: u8, flag_type: FlagType) -> u8 {
|
||||
(subflag << 2) | (flag_type as u8)
|
||||
}
|
||||
|
||||
// Pre-computed flag bytes for the sketch features we encode/decode.
|
||||
const FLAG_INDEX_MAPPING_LOG: u8 = flag(0, FlagType::IndexMapping); // 0x02
|
||||
const FLAG_ZERO_COUNT: u8 = flag(1, FlagType::SketchFeatures); // 0x04
|
||||
const FLAG_COUNT: u8 = flag(0x28, FlagType::SketchFeatures); // 0xA0
|
||||
const FLAG_SUM: u8 = flag(0x21, FlagType::SketchFeatures); // 0x84
|
||||
const FLAG_MIN: u8 = flag(0x22, FlagType::SketchFeatures); // 0x88
|
||||
const FLAG_MAX: u8 = flag(0x23, FlagType::SketchFeatures); // 0x8C
|
||||
|
||||
/// BinEncodingMode subflags for store flag bytes.
|
||||
/// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/BinEncodingMode.java
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum BinEncodingMode {
|
||||
IndexDeltasAndCounts = 1,
|
||||
IndexDeltas = 2,
|
||||
ContiguousCounts = 3,
|
||||
}
|
||||
|
||||
impl BinEncodingMode {
|
||||
fn from_subflag(subflag: u8) -> Option<Self> {
|
||||
match subflag {
|
||||
1 => Some(Self::IndexDeltasAndCounts),
|
||||
2 => Some(Self::IndexDeltas),
|
||||
3 => Some(Self::ContiguousCounts),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const VAR_DOUBLE_ROTATE_DISTANCE: u32 = 6;
|
||||
const MAX_VAR_LEN_64: usize = 9;
|
||||
@@ -51,11 +93,11 @@ pub enum DecodeError {
|
||||
}
|
||||
|
||||
impl fmt::Display for DecodeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
DecodeError::UnexpectedEof => write!(f, "unexpected end of input"),
|
||||
DecodeError::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{:02X}", b),
|
||||
DecodeError::InvalidData(msg) => write!(f, "invalid data: {}", msg),
|
||||
Self::UnexpectedEof => write!(f, "unexpected end of input"),
|
||||
Self::InvalidFlag(b) => write!(f, "invalid flag byte: 0x{b:02X}"),
|
||||
Self::InvalidData(msg) => write!(f, "invalid data: {msg}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -64,6 +106,7 @@ impl std::error::Error for DecodeError {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// VarEncoding — bit-exact port of Java VarEncodingHelper
|
||||
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/encoding/VarEncodingHelper.java
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn encode_unsigned_var_long(out: &mut Vec<u8>, mut value: u64) {
|
||||
@@ -81,13 +124,14 @@ fn decode_unsigned_var_long(input: &mut &[u8]) -> Result<u64, DecodeError> {
|
||||
loop {
|
||||
let next = read_byte(input)?;
|
||||
if next < 0x80 || shift == 56 {
|
||||
return Ok(value | ((next as u64) << shift));
|
||||
return Ok(value | (u64::from(next) << shift));
|
||||
}
|
||||
value |= ((next as u64) & 0x7F) << shift;
|
||||
value |= (u64::from(next) & 0x7F) << shift;
|
||||
shift += 7;
|
||||
}
|
||||
}
|
||||
|
||||
/// ZigZag encode then var-long encode.
|
||||
fn encode_signed_var_long(out: &mut Vec<u8>, value: i64) {
|
||||
let encoded = ((value >> 63) ^ (value << 1)) as u64;
|
||||
encode_unsigned_var_long(out, encoded);
|
||||
@@ -99,14 +143,14 @@ fn decode_signed_var_long(input: &mut &[u8]) -> Result<i64, DecodeError> {
|
||||
}
|
||||
|
||||
fn double_to_var_bits(value: f64) -> u64 {
|
||||
let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0_f64));
|
||||
let bits = f64::to_bits(value + 1.0).wrapping_sub(f64::to_bits(1.0));
|
||||
bits.rotate_left(VAR_DOUBLE_ROTATE_DISTANCE)
|
||||
}
|
||||
|
||||
fn var_bits_to_double(bits: u64) -> f64 {
|
||||
f64::from_bits(
|
||||
bits.rotate_right(VAR_DOUBLE_ROTATE_DISTANCE)
|
||||
.wrapping_add(f64::to_bits(1.0_f64)),
|
||||
.wrapping_add(f64::to_bits(1.0)),
|
||||
) - 1.0
|
||||
}
|
||||
|
||||
@@ -130,30 +174,31 @@ fn decode_var_double(input: &mut &[u8]) -> Result<f64, DecodeError> {
|
||||
loop {
|
||||
let next = read_byte(input)?;
|
||||
if shift == 1 {
|
||||
bits |= next as u64;
|
||||
bits |= u64::from(next);
|
||||
break;
|
||||
}
|
||||
if next < 0x80 {
|
||||
bits |= (next as u64) << shift;
|
||||
bits |= u64::from(next) << shift;
|
||||
break;
|
||||
}
|
||||
bits |= ((next as u64) & 0x7F) << shift;
|
||||
bits |= (u64::from(next) & 0x7F) << shift;
|
||||
shift -= 7;
|
||||
}
|
||||
Ok(var_bits_to_double(bits))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// Byte-level helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn read_byte(input: &mut &[u8]) -> Result<u8, DecodeError> {
|
||||
if input.is_empty() {
|
||||
return Err(DecodeError::UnexpectedEof);
|
||||
match input.split_first() {
|
||||
Some((&byte, rest)) => {
|
||||
*input = rest;
|
||||
Ok(byte)
|
||||
}
|
||||
None => Err(DecodeError::UnexpectedEof),
|
||||
}
|
||||
let b = input[0];
|
||||
*input = &input[1..];
|
||||
Ok(b)
|
||||
}
|
||||
|
||||
fn write_f64_le(out: &mut Vec<u8>, value: f64) {
|
||||
@@ -164,75 +209,79 @@ fn read_f64_le(input: &mut &[u8]) -> Result<f64, DecodeError> {
|
||||
if input.len() < 8 {
|
||||
return Err(DecodeError::UnexpectedEof);
|
||||
}
|
||||
let bytes: [u8; 8] = input[..8].try_into().unwrap();
|
||||
*input = &input[8..];
|
||||
Ok(f64::from_le_bytes(bytes))
|
||||
let (bytes, rest) = input.split_at(8);
|
||||
*input = rest;
|
||||
// bytes is guaranteed to be length 8 by the split_at above.
|
||||
let arr = [
|
||||
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
|
||||
];
|
||||
Ok(f64::from_le_bytes(arr))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Store encoding/decoding
|
||||
// See: https://github.com/DataDog/sketches-java/blob/master/src/main/java/com/datadoghq/sketch/ddsketch/store/DenseStore.java (encode/decode methods)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Iterate the non-zero bins in the store as (absolute_index, count) pairs.
|
||||
fn non_zero_bins(store: &Store) -> Vec<(i32, u64)> {
|
||||
/// Collect non-zero bins in the store as (absolute_index, count) pairs.
|
||||
///
|
||||
/// Allocation is acceptable here: this runs once per encode and the Vec
|
||||
/// has at most `max_num_bins` entries.
|
||||
fn collect_non_zero_bins(store: &Store) -> Vec<(i32, u64)> {
|
||||
if store.count == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let start = (store.min_key - store.offset) as usize;
|
||||
let end = (store.max_key - store.offset + 1) as usize;
|
||||
let end = end.min(store.bins.len());
|
||||
let mut result = Vec::new();
|
||||
for i in start..end {
|
||||
let count = store.bins[i];
|
||||
if count > 0 {
|
||||
result.push((i as i32 + store.offset, count));
|
||||
}
|
||||
}
|
||||
result
|
||||
let end = ((store.max_key - store.offset + 1) as usize).min(store.bins.len());
|
||||
store.bins[start..end]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, &count)| count > 0)
|
||||
.map(|(i, &count)| (start as i32 + i as i32 + store.offset, count))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn encode_store(out: &mut Vec<u8>, store: &Store, flag_type: u8) {
|
||||
let bins = non_zero_bins(store);
|
||||
fn encode_store(out: &mut Vec<u8>, store: &Store, flag_type: FlagType) {
|
||||
let bins = collect_non_zero_bins(store);
|
||||
if bins.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// INDEX_DELTAS_AND_COUNTS mode
|
||||
out.push((BIN_MODE_INDEX_DELTAS_AND_COUNTS << 2) | flag_type);
|
||||
out.push(flag(BinEncodingMode::IndexDeltasAndCounts as u8, flag_type));
|
||||
encode_unsigned_var_long(out, bins.len() as u64);
|
||||
|
||||
let mut prev_index: i64 = 0;
|
||||
for &(index, count) in &bins {
|
||||
encode_signed_var_long(out, (index as i64) - prev_index);
|
||||
encode_signed_var_long(out, i64::from(index) - prev_index);
|
||||
encode_var_double(out, count as f64);
|
||||
prev_index = index as i64;
|
||||
prev_index = i64::from(index);
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Store, DecodeError> {
|
||||
let mode = subflag;
|
||||
let mode = BinEncodingMode::from_subflag(subflag).ok_or_else(|| {
|
||||
DecodeError::InvalidData(format!("unknown bin encoding mode subflag: {subflag}"))
|
||||
})?;
|
||||
let num_bins = decode_unsigned_var_long(input)? as usize;
|
||||
let mut store = Store::new(bin_limit);
|
||||
|
||||
match mode {
|
||||
BIN_MODE_INDEX_DELTAS_AND_COUNTS => {
|
||||
BinEncodingMode::IndexDeltasAndCounts => {
|
||||
let mut index: i64 = 0;
|
||||
for _ in 0..num_bins {
|
||||
let delta = decode_signed_var_long(input)?;
|
||||
index += decode_signed_var_long(input)?;
|
||||
let count = decode_var_double(input)?;
|
||||
index += delta;
|
||||
store.add_count(index as i32, count as u64);
|
||||
}
|
||||
}
|
||||
BIN_MODE_INDEX_DELTAS => {
|
||||
BinEncodingMode::IndexDeltas => {
|
||||
let mut index: i64 = 0;
|
||||
for _ in 0..num_bins {
|
||||
let delta = decode_signed_var_long(input)?;
|
||||
index += delta;
|
||||
index += decode_signed_var_long(input)?;
|
||||
store.add_count(index as i32, 1);
|
||||
}
|
||||
}
|
||||
BIN_MODE_CONTIGUOUS_COUNTS => {
|
||||
BinEncodingMode::ContiguousCounts => {
|
||||
let start_index = decode_signed_var_long(input)?;
|
||||
let index_delta = decode_signed_var_long(input)?;
|
||||
let mut index = start_index;
|
||||
@@ -242,12 +291,6 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Stor
|
||||
index += index_delta;
|
||||
}
|
||||
}
|
||||
other => {
|
||||
return Err(DecodeError::InvalidData(format!(
|
||||
"unknown bin encoding mode subflag: {}",
|
||||
other
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(store)
|
||||
@@ -270,10 +313,9 @@ fn decode_store(input: &mut &[u8], subflag: u8, bin_limit: usize) -> Result<Stor
|
||||
/// 6. Negative store bins
|
||||
pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec<u8> {
|
||||
let mut out = Vec::new();
|
||||
|
||||
let count = sketch.count() as f64;
|
||||
|
||||
// --- Summary statistics (DDSketchWithExactSummaryStatistics.encode) ---
|
||||
// Summary statistics (DDSketchWithExactSummaryStatistics.encode)
|
||||
if count != 0.0 {
|
||||
out.push(FLAG_COUNT);
|
||||
encode_var_double(&mut out, count);
|
||||
@@ -287,24 +329,18 @@ pub fn encode_to_java_bytes(sketch: &DDSketch) -> Vec<u8> {
|
||||
write_f64_le(&mut out, sketch.sum);
|
||||
}
|
||||
|
||||
// --- DDSketch.encode (index mapping + zero count + stores) ---
|
||||
|
||||
// Index mapping (LOG layout, indexOffset = 0.0)
|
||||
// DDSketch.encode: index mapping + zero count + stores
|
||||
out.push(FLAG_INDEX_MAPPING_LOG);
|
||||
write_f64_le(&mut out, sketch.config.gamma);
|
||||
write_f64_le(&mut out, 0.0_f64);
|
||||
|
||||
// Zero count
|
||||
if sketch.zero_count != 0 {
|
||||
out.push(FLAG_ZERO_COUNT);
|
||||
encode_var_double(&mut out, sketch.zero_count as f64);
|
||||
}
|
||||
|
||||
// Positive store
|
||||
encode_store(&mut out, &sketch.store, FLAG_TYPE_POSITIVE_STORE);
|
||||
|
||||
// Negative store
|
||||
encode_store(&mut out, &sketch.negative_store, FLAG_TYPE_NEGATIVE_STORE);
|
||||
encode_store(&mut out, &sketch.store, FlagType::PositiveStore);
|
||||
encode_store(&mut out, &sketch.negative_store, FlagType::NegativeStore);
|
||||
|
||||
out
|
||||
}
|
||||
@@ -319,12 +355,9 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
|
||||
|
||||
let mut input = bytes;
|
||||
|
||||
// Skip optional version prefix (0x02 followed by a valid flag byte)
|
||||
if input.len() >= 2 && input[0] == 0x02 {
|
||||
let second = input[1];
|
||||
if is_valid_flag_byte(second) {
|
||||
input = &input[1..];
|
||||
}
|
||||
// Skip optional version prefix (0x02 followed by a valid flag byte).
|
||||
if input.len() >= 2 && input[0] == 0x02 && is_valid_flag_byte(input[1]) {
|
||||
input = &input[1..];
|
||||
}
|
||||
|
||||
let mut gamma: Option<f64> = None;
|
||||
@@ -336,58 +369,51 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
|
||||
let mut negative_store: Option<Store> = None;
|
||||
|
||||
while !input.is_empty() {
|
||||
let flag = read_byte(&mut input)?;
|
||||
let flag_type = flag & 0x03;
|
||||
let subflag = flag >> 2;
|
||||
let flag_byte = read_byte(&mut input)?;
|
||||
let flag_type =
|
||||
FlagType::from_byte(flag_byte).ok_or(DecodeError::InvalidFlag(flag_byte))?;
|
||||
let subflag = flag_byte >> 2;
|
||||
|
||||
match flag_type {
|
||||
FLAG_TYPE_INDEX_MAPPING => {
|
||||
FlagType::IndexMapping => {
|
||||
gamma = Some(read_f64_le(&mut input)?);
|
||||
let _index_offset = read_f64_le(&mut input)?;
|
||||
}
|
||||
FLAG_TYPE_SKETCH_FEATURES => {
|
||||
if flag == FLAG_ZERO_COUNT {
|
||||
zero_count += decode_var_double(&mut input)?;
|
||||
} else if flag == FLAG_COUNT {
|
||||
FlagType::SketchFeatures => match flag_byte {
|
||||
FLAG_ZERO_COUNT => zero_count += decode_var_double(&mut input)?,
|
||||
FLAG_COUNT => {
|
||||
let _count = decode_var_double(&mut input)?;
|
||||
} else if flag == FLAG_SUM {
|
||||
sum = read_f64_le(&mut input)?;
|
||||
} else if flag == FLAG_MIN {
|
||||
min = read_f64_le(&mut input)?;
|
||||
} else if flag == FLAG_MAX {
|
||||
max = read_f64_le(&mut input)?;
|
||||
} else {
|
||||
return Err(DecodeError::InvalidFlag(flag));
|
||||
}
|
||||
}
|
||||
FLAG_TYPE_POSITIVE_STORE => {
|
||||
FLAG_SUM => sum = read_f64_le(&mut input)?,
|
||||
FLAG_MIN => min = read_f64_le(&mut input)?,
|
||||
FLAG_MAX => max = read_f64_le(&mut input)?,
|
||||
_ => return Err(DecodeError::InvalidFlag(flag_byte)),
|
||||
},
|
||||
FlagType::PositiveStore => {
|
||||
positive_store = Some(decode_store(
|
||||
&mut input,
|
||||
subflag,
|
||||
DEFAULT_MAX_BINS as usize,
|
||||
)?);
|
||||
}
|
||||
FLAG_TYPE_NEGATIVE_STORE => {
|
||||
FlagType::NegativeStore => {
|
||||
negative_store = Some(decode_store(
|
||||
&mut input,
|
||||
subflag,
|
||||
DEFAULT_MAX_BINS as usize,
|
||||
)?);
|
||||
}
|
||||
_ => {
|
||||
return Err(DecodeError::InvalidFlag(flag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let g = gamma.unwrap_or_else(|| Config::defaults().gamma);
|
||||
let config = Config::from_gamma(g);
|
||||
let pos = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
|
||||
let store = positive_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
|
||||
let neg = negative_store.unwrap_or_else(|| Store::new(config.max_num_bins as usize));
|
||||
|
||||
Ok(DDSketch {
|
||||
config,
|
||||
store: pos,
|
||||
store,
|
||||
negative_store: neg,
|
||||
min,
|
||||
max,
|
||||
@@ -397,21 +423,22 @@ pub fn decode_from_java_bytes(bytes: &[u8]) -> Result<DDSketch, DecodeError> {
|
||||
}
|
||||
|
||||
/// Check whether a byte is a valid flag byte for the DDSketch binary format.
|
||||
/// Used to detect the optional version prefix.
|
||||
fn is_valid_flag_byte(b: u8) -> bool {
|
||||
matches!(
|
||||
// Known sketch-feature flags
|
||||
if matches!(
|
||||
b,
|
||||
FLAG_ZERO_COUNT | FLAG_COUNT | FLAG_SUM | FLAG_MIN | FLAG_MAX | FLAG_INDEX_MAPPING_LOG
|
||||
) || {
|
||||
let flag_type = b & 0x03;
|
||||
let subflag = b >> 2;
|
||||
(flag_type == FLAG_TYPE_POSITIVE_STORE || flag_type == FLAG_TYPE_NEGATIVE_STORE)
|
||||
&& (1..=3).contains(&subflag)
|
||||
} || {
|
||||
// INDEX_MAPPING with other layouts (LOG_LINEAR=1..LOG_QUARTIC=4)
|
||||
let flag_type = b & 0x03;
|
||||
let subflag = b >> 2;
|
||||
flag_type == FLAG_TYPE_INDEX_MAPPING && subflag <= 4
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
let Some(flag_type) = FlagType::from_byte(b) else {
|
||||
return false;
|
||||
};
|
||||
let subflag = b >> 2;
|
||||
match flag_type {
|
||||
FlagType::PositiveStore | FlagType::NegativeStore => (1..=3).contains(&subflag),
|
||||
FlagType::IndexMapping => subflag <= 4, // LOG=0, LOG_LINEAR=1 .. LOG_QUARTIC=4
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,7 +457,7 @@ mod tests {
|
||||
fn test_unsigned_var_long_zero() {
|
||||
let mut buf = Vec::new();
|
||||
encode_unsigned_var_long(&mut buf, 0);
|
||||
assert_eq!(buf, vec![0x00]);
|
||||
assert_eq!(buf, [0x00]);
|
||||
|
||||
let mut input = buf.as_slice();
|
||||
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 0);
|
||||
@@ -441,7 +468,7 @@ mod tests {
|
||||
fn test_unsigned_var_long_small() {
|
||||
let mut buf = Vec::new();
|
||||
encode_unsigned_var_long(&mut buf, 1);
|
||||
assert_eq!(buf, vec![0x01]);
|
||||
assert_eq!(buf, [0x01]);
|
||||
|
||||
let mut input = buf.as_slice();
|
||||
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 1);
|
||||
@@ -451,7 +478,7 @@ mod tests {
|
||||
fn test_unsigned_var_long_128() {
|
||||
let mut buf = Vec::new();
|
||||
encode_unsigned_var_long(&mut buf, 128);
|
||||
assert_eq!(buf, vec![0x80, 0x01]);
|
||||
assert_eq!(buf, [0x80, 0x01]);
|
||||
|
||||
let mut input = buf.as_slice();
|
||||
assert_eq!(decode_unsigned_var_long(&mut input).unwrap(), 128);
|
||||
@@ -459,7 +486,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_unsigned_var_long_roundtrip() {
|
||||
for &v in &[0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
|
||||
for v in [0u64, 1, 127, 128, 255, 256, 16383, 16384, u64::MAX] {
|
||||
let mut buf = Vec::new();
|
||||
encode_unsigned_var_long(&mut buf, v);
|
||||
let mut input = buf.as_slice();
|
||||
@@ -471,7 +498,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_signed_var_long_roundtrip() {
|
||||
for &v in &[0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] {
|
||||
for v in [0i64, 1, -1, 63, -64, 64, -65, i64::MAX, i64::MIN] {
|
||||
let mut buf = Vec::new();
|
||||
encode_signed_var_long(&mut buf, v);
|
||||
let mut input = buf.as_slice();
|
||||
@@ -483,7 +510,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_var_double_roundtrip() {
|
||||
for &v in &[
|
||||
for v in [
|
||||
0.0, 1.0, 2.0, 5.0, 15.0, 42.0, 100.0, 1e-9, 1e15, 0.5, 3.14159,
|
||||
] {
|
||||
let mut buf = Vec::new();
|
||||
@@ -494,7 +521,7 @@ mod tests {
|
||||
(decoded - v).abs() < 1e-15 || decoded == v,
|
||||
"roundtrip failed for {}: got {}",
|
||||
v,
|
||||
decoded
|
||||
decoded,
|
||||
);
|
||||
assert!(input.is_empty());
|
||||
}
|
||||
@@ -502,7 +529,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_var_double_small_integers() {
|
||||
// Small non-negative integers should encode compactly
|
||||
let mut buf = Vec::new();
|
||||
encode_var_double(&mut buf, 1.0);
|
||||
assert_eq!(buf.len(), 1, "VarDouble(1.0) should be 1 byte");
|
||||
@@ -518,7 +544,6 @@ mod tests {
|
||||
fn test_encode_empty_sketch() {
|
||||
let sketch = DDSketch::new(Config::defaults());
|
||||
let bytes = sketch.to_java_bytes();
|
||||
// Empty sketch: no summary stats, just index mapping
|
||||
assert!(!bytes.is_empty());
|
||||
|
||||
let decoded = DDSketch::from_java_bytes(&bytes).unwrap();
|
||||
@@ -543,17 +568,7 @@ mod tests {
|
||||
assert_eq!(decoded.max(), Some(5.0));
|
||||
assert_eq!(decoded.sum(), Some(15.0));
|
||||
|
||||
for q in [0.5, 0.9, 0.95, 0.99] {
|
||||
let orig = sketch.quantile(q).unwrap().unwrap();
|
||||
let dec = decoded.quantile(q).unwrap().unwrap();
|
||||
assert!(
|
||||
(orig - dec).abs() / orig.abs().max(1e-15) < 1e-12,
|
||||
"quantile({}) mismatch: {} vs {}",
|
||||
q,
|
||||
orig,
|
||||
dec
|
||||
);
|
||||
}
|
||||
assert_quantiles_match(&sketch, &decoded, &[0.5, 0.9, 0.95, 0.99]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -585,17 +600,7 @@ mod tests {
|
||||
assert_eq!(decoded.max(), Some(5.0));
|
||||
assert_eq!(decoded.sum(), Some(3.0));
|
||||
|
||||
for q in [0.0, 0.25, 0.5, 0.75, 1.0] {
|
||||
let orig = sketch.quantile(q).unwrap().unwrap();
|
||||
let dec = decoded.quantile(q).unwrap().unwrap();
|
||||
assert!(
|
||||
(orig - dec).abs() / orig.abs().max(1e-15) < 1e-12,
|
||||
"quantile({}) mismatch: {} vs {}",
|
||||
q,
|
||||
orig,
|
||||
dec
|
||||
);
|
||||
}
|
||||
assert_quantiles_match(&sketch, &decoded, &[0.0, 0.25, 0.5, 0.75, 1.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -655,14 +660,25 @@ mod tests {
|
||||
|
||||
let bytes = sketch.to_java_bytes();
|
||||
|
||||
// First byte should be FLAG_COUNT (0xA0) since count > 0
|
||||
assert_eq!(bytes[0], FLAG_COUNT, "first byte should be COUNT flag");
|
||||
|
||||
// After count + min + max + sum blocks, we should see FLAG_INDEX_MAPPING_LOG (0x02)
|
||||
let has_mapping = bytes.contains(&FLAG_INDEX_MAPPING_LOG);
|
||||
assert!(has_mapping, "should contain index mapping flag");
|
||||
assert!(
|
||||
bytes.contains(&FLAG_INDEX_MAPPING_LOG),
|
||||
"should contain index mapping flag"
|
||||
);
|
||||
}
|
||||
|
||||
// --- Cross-language golden byte tests ---
|
||||
//
|
||||
// Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode()
|
||||
// using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048).
|
||||
|
||||
const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602";
|
||||
const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202";
|
||||
const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02";
|
||||
const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402";
|
||||
const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000";
|
||||
const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202";
|
||||
|
||||
fn hex_to_bytes(hex: &str) -> Vec<u8> {
|
||||
(0..hex.len())
|
||||
.step_by(2)
|
||||
@@ -670,14 +686,36 @@ mod tests {
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Golden bytes generated by Java's DDSketchWithExactSummaryStatistics.encode()
|
||||
// using LogarithmicMapping(0.01) + CollapsingLowestDenseStore(2048)
|
||||
const GOLDEN_SIMPLE: &str = "a00588000000000000f03f8c0000000000001440840000000000002e4002fd4a815abf52f03f000000000000000005050002440228021e021602";
|
||||
const GOLDEN_SINGLE: &str = "a0028800000000000045408c000000000000454084000000000000454002fd4a815abf52f03f00000000000000000501f40202";
|
||||
const GOLDEN_NEGATIVE: &str = "a084408800000000000008c08c000000000000144084000000000000084002fd4a815abf52f03f0000000000000000050244025c02070200026c02";
|
||||
const GOLDEN_ZERO: &str = "a0048800000000000000008c000000000000004084000000000000084002fd4a815abf52f03f00000000000000000402050200024402";
|
||||
const GOLDEN_EMPTY: &str = "02fd4a815abf52f03f0000000000000000";
|
||||
const GOLDEN_MANY: &str = "a08d1488000000000000f03f8c0000000000005940840000000000bab34002fd4a815abf52f03f000000000000000005550002440228021e021602120210020c020c020c0208020a020802060208020602060206020602040206020402040204020402040204020402040204020202040202020402020204020202020204020202020202020402020202020202020202020202020202020202020202020202020202020203020202020202020302020202020302020202020302020203020202030202020302030202020302030203020202030203020302030202";
|
||||
fn bytes_to_hex(bytes: &[u8]) -> String {
|
||||
bytes.iter().map(|b| format!("{b:02x}")).collect()
|
||||
}
|
||||
|
||||
fn assert_golden(label: &str, sketch: &DDSketch, golden_hex: &str) {
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(golden_hex);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for {}.\nRust: {}\nJava: {}",
|
||||
label,
|
||||
bytes_to_hex(&bytes),
|
||||
golden_hex,
|
||||
);
|
||||
}
|
||||
|
||||
fn assert_quantiles_match(a: &DDSketch, b: &DDSketch, quantiles: &[f64]) {
|
||||
for &q in quantiles {
|
||||
let va = a.quantile(q).unwrap().unwrap();
|
||||
let vb = b.quantile(q).unwrap().unwrap();
|
||||
assert!(
|
||||
(va - vb).abs() / va.abs().max(1e-15) < 1e-12,
|
||||
"quantile({}) mismatch: {} vs {}",
|
||||
q,
|
||||
va,
|
||||
vb,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cross_language_simple() {
|
||||
@@ -685,36 +723,14 @@ mod tests {
|
||||
for v in [1.0, 2.0, 3.0, 4.0, 5.0] {
|
||||
sketch.add(v);
|
||||
}
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_SIMPLE);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for SIMPLE.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_SIMPLE
|
||||
);
|
||||
assert_golden("SIMPLE", &sketch, GOLDEN_SIMPLE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cross_language_single() {
|
||||
let mut sketch = DDSketch::new(Config::defaults());
|
||||
sketch.add(42.0);
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_SINGLE);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for SINGLE.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_SINGLE
|
||||
);
|
||||
assert_golden("SINGLE", &sketch, GOLDEN_SINGLE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -723,18 +739,7 @@ mod tests {
|
||||
for v in [-3.0, -1.0, 2.0, 5.0] {
|
||||
sketch.add(v);
|
||||
}
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_NEGATIVE);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for NEGATIVE.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_NEGATIVE
|
||||
);
|
||||
assert_golden("NEGATIVE", &sketch, GOLDEN_NEGATIVE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -743,35 +748,13 @@ mod tests {
|
||||
for v in [0.0, 1.0, 2.0] {
|
||||
sketch.add(v);
|
||||
}
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_ZERO);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for ZERO.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_ZERO
|
||||
);
|
||||
assert_golden("ZERO", &sketch, GOLDEN_ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cross_language_empty() {
|
||||
let sketch = DDSketch::new(Config::defaults());
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_EMPTY);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for EMPTY.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_EMPTY
|
||||
);
|
||||
assert_golden("EMPTY", &sketch, GOLDEN_EMPTY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -780,23 +763,11 @@ mod tests {
|
||||
for i in 1..=100 {
|
||||
sketch.add(i as f64);
|
||||
}
|
||||
let bytes = sketch.to_java_bytes();
|
||||
let expected = hex_to_bytes(GOLDEN_MANY);
|
||||
assert_eq!(
|
||||
bytes,
|
||||
expected,
|
||||
"Rust encoding doesn't match Java golden bytes for MANY.\nRust: {}\nJava: {}",
|
||||
bytes
|
||||
.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect::<String>(),
|
||||
GOLDEN_MANY
|
||||
);
|
||||
assert_golden("MANY", &sketch, GOLDEN_MANY);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_java_golden_bytes() {
|
||||
// Verify we can decode all Java golden bytes
|
||||
for (name, hex) in [
|
||||
("SIMPLE", GOLDEN_SIMPLE),
|
||||
("SINGLE", GOLDEN_SINGLE),
|
||||
@@ -838,7 +809,7 @@ mod tests {
|
||||
(orig_p95 - dec_p95).abs() / orig_p95 < alpha,
|
||||
"p95 mismatch: {} vs {}",
|
||||
orig_p95,
|
||||
dec_p95
|
||||
dec_p95,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user