feat: humanize analyze numbers (#6889)

* feat: humanize analyze numbers

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update cargo lock

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tailing space

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use readable size

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-09-04 20:42:38 -07:00
committed by GitHub
parent ba105e18b0
commit faae1db066
4 changed files with 125 additions and 1 deletions

2
Cargo.lock generated
View File

@@ -2471,9 +2471,11 @@ name = "common-recordbatch"
version = "0.18.0"
dependencies = [
"arc-swap",
"common-base",
"common-error",
"common-macro",
"common-telemetry",
"common-time",
"datafusion",
"datafusion-common",
"datatypes",

View File

@@ -9,9 +9,11 @@ workspace = true
[dependencies]
arc-swap = "1.6"
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
datafusion.workspace = true
datafusion-common.workspace = true
datatypes.workspace = true

View File

@@ -20,6 +20,8 @@ use std::str::FromStr;
use std::sync::Arc;
use std::task::{Context, Poll};
use common_base::readable_size::ReadableSize;
use common_time::util::format_nanoseconds_human_readable;
use datafusion::arrow::compute::cast;
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
@@ -443,6 +445,20 @@ pub struct RecordBatchMetrics {
pub plan_metrics: Vec<PlanMetrics>,
}
/// Determines if a metric name represents a time measurement that should be formatted.
fn is_time_metric(metric_name: &str) -> bool {
metric_name.contains("elapsed") || metric_name.contains("time") || metric_name.contains("cost")
}
/// Determines if a metric name represents a bytes measurement that should be formatted.
fn is_bytes_metric(metric_name: &str) -> bool {
metric_name.contains("bytes") || metric_name.contains("mem")
}
fn format_bytes_human_readable(bytes: usize) -> String {
format!("{}", ReadableSize(bytes as u64))
}
/// Only display `plan_metrics` with indent ` ` (2 spaces).
impl Display for RecordBatchMetrics {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -455,7 +471,18 @@ impl Display for RecordBatchMetrics {
indent = metric.level * 2,
)?;
for (label, value) in &metric.metrics {
write!(f, "{}: {}, ", label, value)?;
if is_time_metric(label) {
write!(
f,
"{}: {}, ",
label,
format_nanoseconds_human_readable(*value),
)?;
} else if is_bytes_metric(label) {
write!(f, "{}: {}, ", label, format_bytes_human_readable(*value),)?;
} else {
write!(f, "{}: {}, ", label, value)?;
}
}
writeln!(f, "]")?;
}

View File

@@ -83,6 +83,62 @@ pub(crate) fn div_ceil(this: i64, rhs: i64) -> i64 {
}
}
/// Formats nanoseconds into human-readable time with dynamic unit selection.
///
/// This function automatically chooses the most appropriate unit (seconds, milliseconds,
/// microseconds, or nanoseconds) to display the time in a readable format.
///
/// # Examples
///
/// ```
/// use common_time::util::format_nanoseconds_human_readable;
///
/// assert_eq!("1.23s", format_nanoseconds_human_readable(1_234_567_890));
/// assert_eq!("456ms", format_nanoseconds_human_readable(456_000_000));
/// assert_eq!("789us", format_nanoseconds_human_readable(789_000));
/// assert_eq!("123ns", format_nanoseconds_human_readable(123));
/// ```
pub fn format_nanoseconds_human_readable(nanos: usize) -> String {
if nanos == 0 {
return "0ns".to_string();
}
let nanos_i64 = nanos as i64;
// Try seconds first (if >= 1 second)
if nanos_i64 >= 1_000_000_000 {
let secs = nanos_i64 as f64 / 1_000_000_000.0;
if secs >= 10.0 {
return format!("{:.1}s", secs);
} else {
return format!("{:.2}s", secs);
}
}
// Try milliseconds (if >= 1 millisecond)
if nanos_i64 >= 1_000_000 {
let millis = nanos_i64 as f64 / 1_000_000.0;
if millis >= 10.0 {
return format!("{:.0}ms", millis);
} else {
return format!("{:.1}ms", millis);
}
}
// Try microseconds (if >= 1 microsecond)
if nanos_i64 >= 1_000 {
let micros = nanos_i64 as f64 / 1_000.0;
if micros >= 10.0 {
return format!("{:.0}us", micros);
} else {
return format!("{:.1}us", micros);
}
}
// Less than 1 microsecond, display as nanoseconds
format!("{}ns", nanos_i64)
}
#[cfg(test)]
mod tests {
use std::time::{self, SystemTime};
@@ -114,4 +170,41 @@ mod tests {
let v0 = 9223372036854676001;
assert_eq!(9223372036854677, div_ceil(v0, 1000));
}
#[test]
fn test_format_nanoseconds_human_readable() {
// Test zero
assert_eq!("0ns", format_nanoseconds_human_readable(0));
// Test nanoseconds (< 1 microsecond)
assert_eq!("1ns", format_nanoseconds_human_readable(1));
assert_eq!("123ns", format_nanoseconds_human_readable(123));
assert_eq!("999ns", format_nanoseconds_human_readable(999));
// Test microseconds (>= 1 microsecond, < 1 millisecond)
assert_eq!("1.0us", format_nanoseconds_human_readable(1_000));
assert_eq!("1.5us", format_nanoseconds_human_readable(1_500));
assert_eq!("10us", format_nanoseconds_human_readable(10_000));
assert_eq!("123us", format_nanoseconds_human_readable(123_000));
assert_eq!("999us", format_nanoseconds_human_readable(999_000));
// Test milliseconds (>= 1 millisecond, < 1 second)
assert_eq!("1.0ms", format_nanoseconds_human_readable(1_000_000));
assert_eq!("1.5ms", format_nanoseconds_human_readable(1_500_000));
assert_eq!("10ms", format_nanoseconds_human_readable(10_000_000));
assert_eq!("123ms", format_nanoseconds_human_readable(123_000_000));
assert_eq!("999ms", format_nanoseconds_human_readable(999_000_000));
// Test seconds (>= 1 second)
assert_eq!("1.00s", format_nanoseconds_human_readable(1_000_000_000));
assert_eq!("1.23s", format_nanoseconds_human_readable(1_234_567_890));
assert_eq!("10.0s", format_nanoseconds_human_readable(10_000_000_000));
assert_eq!("123.5s", format_nanoseconds_human_readable(123_456_789_012));
// Test large values
assert_eq!(
"1234.6s",
format_nanoseconds_human_readable(1_234_567_890_123)
);
}
}