diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index e274d24585..085540e7b9 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -984,6 +984,7 @@ pub fn short_error(e: &QueryError) -> String { } fn log_query_error(query: &str, e: &QueryError) { + // If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`. match e { QueryError::Disconnected(ConnectionError::Io(io_error)) => { if is_expected_io_error(io_error) { diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 366bd82903..b76efa5b48 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -8,6 +8,8 @@ use metrics::{ }; use once_cell::sync::Lazy; use pageserver_api::shard::TenantShardId; +use postgres_backend::{is_expected_io_error, QueryError}; +use pq_proto::framed::ConnectionError; use strum::{EnumCount, VariantNames}; use strum_macros::{IntoStaticStr, VariantNames}; use tracing::warn; @@ -1508,6 +1510,7 @@ static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| { pub(crate) struct BasebackupQueryTime { ok: Histogram, error: Histogram, + client_error: Histogram, } pub(crate) static BASEBACKUP_QUERY_TIME: Lazy = Lazy::new(|| { @@ -1521,6 +1524,7 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy = Lazy::new(| BasebackupQueryTime { ok: vec.get_metric_with_label_values(&["ok"]).unwrap(), error: vec.get_metric_with_label_values(&["error"]).unwrap(), + client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(), } }); @@ -1557,7 +1561,7 @@ impl BasebackupQueryTime { } impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> { - pub(crate) fn observe(self, res: &Result) { + pub(crate) fn observe(self, res: &Result) { let elapsed = self.start.elapsed(); let ex_throttled = self .ctx @@ -1576,10 +1580,15 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> { elapsed } }; - let metric = if res.is_ok() { - &self.parent.ok - } else { - &self.parent.error + // If you want to change categorize of a specific error, also change it in `log_query_error`. + let metric = match res { + Ok(_) => &self.parent.ok, + Err(QueryError::Disconnected(ConnectionError::Io(io_error))) + if is_expected_io_error(io_error) => + { + &self.parent.client_error + } + Err(_) => &self.parent.error, }; metric.observe(ex_throttled.as_secs_f64()); }