From faae1db0665837c95e58e776b89553f7b05e68ed Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 4 Sep 2025 20:42:38 -0700 Subject: [PATCH] feat: humanize analyze numbers (#6889) * feat: humanize analyze numbers Signed-off-by: Ruihang Xia * update cargo lock Signed-off-by: Ruihang Xia * tailing space Signed-off-by: Ruihang Xia * use readable size Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 2 + src/common/recordbatch/Cargo.toml | 2 + src/common/recordbatch/src/adapter.rs | 29 ++++++++- src/common/time/src/util.rs | 93 +++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 0e7dd01de2..b961de3745 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2471,9 +2471,11 @@ name = "common-recordbatch" version = "0.18.0" dependencies = [ "arc-swap", + "common-base", "common-error", "common-macro", "common-telemetry", + "common-time", "datafusion", "datafusion-common", "datatypes", diff --git a/src/common/recordbatch/Cargo.toml b/src/common/recordbatch/Cargo.toml index 4b0629db16..70db559e50 100644 --- a/src/common/recordbatch/Cargo.toml +++ b/src/common/recordbatch/Cargo.toml @@ -9,9 +9,11 @@ workspace = true [dependencies] arc-swap = "1.6" +common-base.workspace = true common-error.workspace = true common-macro.workspace = true common-telemetry.workspace = true +common-time.workspace = true datafusion.workspace = true datafusion-common.workspace = true datatypes.workspace = true diff --git a/src/common/recordbatch/src/adapter.rs b/src/common/recordbatch/src/adapter.rs index 03c24a810c..d73db16401 100644 --- a/src/common/recordbatch/src/adapter.rs +++ b/src/common/recordbatch/src/adapter.rs @@ -20,6 +20,8 @@ use std::str::FromStr; use std::sync::Arc; use std::task::{Context, Poll}; +use common_base::readable_size::ReadableSize; +use common_time::util::format_nanoseconds_human_readable; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef; use datafusion::error::Result as DfResult; @@ -443,6 +445,20 @@ pub struct RecordBatchMetrics { pub plan_metrics: Vec, } +/// Determines if a metric name represents a time measurement that should be formatted. +fn is_time_metric(metric_name: &str) -> bool { + metric_name.contains("elapsed") || metric_name.contains("time") || metric_name.contains("cost") +} + +/// Determines if a metric name represents a bytes measurement that should be formatted. +fn is_bytes_metric(metric_name: &str) -> bool { + metric_name.contains("bytes") || metric_name.contains("mem") +} + +fn format_bytes_human_readable(bytes: usize) -> String { + format!("{}", ReadableSize(bytes as u64)) +} + /// Only display `plan_metrics` with indent ` ` (2 spaces). impl Display for RecordBatchMetrics { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -455,7 +471,18 @@ impl Display for RecordBatchMetrics { indent = metric.level * 2, )?; for (label, value) in &metric.metrics { - write!(f, "{}: {}, ", label, value)?; + if is_time_metric(label) { + write!( + f, + "{}: {}, ", + label, + format_nanoseconds_human_readable(*value), + )?; + } else if is_bytes_metric(label) { + write!(f, "{}: {}, ", label, format_bytes_human_readable(*value),)?; + } else { + write!(f, "{}: {}, ", label, value)?; + } } writeln!(f, "]")?; } diff --git a/src/common/time/src/util.rs b/src/common/time/src/util.rs index ccb9e1bdd0..c9552d382f 100644 --- a/src/common/time/src/util.rs +++ b/src/common/time/src/util.rs @@ -83,6 +83,62 @@ pub(crate) fn div_ceil(this: i64, rhs: i64) -> i64 { } } +/// Formats nanoseconds into human-readable time with dynamic unit selection. +/// +/// This function automatically chooses the most appropriate unit (seconds, milliseconds, +/// microseconds, or nanoseconds) to display the time in a readable format. +/// +/// # Examples +/// +/// ``` +/// use common_time::util::format_nanoseconds_human_readable; +/// +/// assert_eq!("1.23s", format_nanoseconds_human_readable(1_234_567_890)); +/// assert_eq!("456ms", format_nanoseconds_human_readable(456_000_000)); +/// assert_eq!("789us", format_nanoseconds_human_readable(789_000)); +/// assert_eq!("123ns", format_nanoseconds_human_readable(123)); +/// ``` +pub fn format_nanoseconds_human_readable(nanos: usize) -> String { + if nanos == 0 { + return "0ns".to_string(); + } + + let nanos_i64 = nanos as i64; + + // Try seconds first (if >= 1 second) + if nanos_i64 >= 1_000_000_000 { + let secs = nanos_i64 as f64 / 1_000_000_000.0; + if secs >= 10.0 { + return format!("{:.1}s", secs); + } else { + return format!("{:.2}s", secs); + } + } + + // Try milliseconds (if >= 1 millisecond) + if nanos_i64 >= 1_000_000 { + let millis = nanos_i64 as f64 / 1_000_000.0; + if millis >= 10.0 { + return format!("{:.0}ms", millis); + } else { + return format!("{:.1}ms", millis); + } + } + + // Try microseconds (if >= 1 microsecond) + if nanos_i64 >= 1_000 { + let micros = nanos_i64 as f64 / 1_000.0; + if micros >= 10.0 { + return format!("{:.0}us", micros); + } else { + return format!("{:.1}us", micros); + } + } + + // Less than 1 microsecond, display as nanoseconds + format!("{}ns", nanos_i64) +} + #[cfg(test)] mod tests { use std::time::{self, SystemTime}; @@ -114,4 +170,41 @@ mod tests { let v0 = 9223372036854676001; assert_eq!(9223372036854677, div_ceil(v0, 1000)); } + + #[test] + fn test_format_nanoseconds_human_readable() { + // Test zero + assert_eq!("0ns", format_nanoseconds_human_readable(0)); + + // Test nanoseconds (< 1 microsecond) + assert_eq!("1ns", format_nanoseconds_human_readable(1)); + assert_eq!("123ns", format_nanoseconds_human_readable(123)); + assert_eq!("999ns", format_nanoseconds_human_readable(999)); + + // Test microseconds (>= 1 microsecond, < 1 millisecond) + assert_eq!("1.0us", format_nanoseconds_human_readable(1_000)); + assert_eq!("1.5us", format_nanoseconds_human_readable(1_500)); + assert_eq!("10us", format_nanoseconds_human_readable(10_000)); + assert_eq!("123us", format_nanoseconds_human_readable(123_000)); + assert_eq!("999us", format_nanoseconds_human_readable(999_000)); + + // Test milliseconds (>= 1 millisecond, < 1 second) + assert_eq!("1.0ms", format_nanoseconds_human_readable(1_000_000)); + assert_eq!("1.5ms", format_nanoseconds_human_readable(1_500_000)); + assert_eq!("10ms", format_nanoseconds_human_readable(10_000_000)); + assert_eq!("123ms", format_nanoseconds_human_readable(123_000_000)); + assert_eq!("999ms", format_nanoseconds_human_readable(999_000_000)); + + // Test seconds (>= 1 second) + assert_eq!("1.00s", format_nanoseconds_human_readable(1_000_000_000)); + assert_eq!("1.23s", format_nanoseconds_human_readable(1_234_567_890)); + assert_eq!("10.0s", format_nanoseconds_human_readable(10_000_000_000)); + assert_eq!("123.5s", format_nanoseconds_human_readable(123_456_789_012)); + + // Test large values + assert_eq!( + "1234.6s", + format_nanoseconds_human_readable(1_234_567_890_123) + ); + } }