mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 22:42:57 +00:00
fix(page_service / batching): smgr op latency metric of dropped responses include flush time (#10756)
# Problem Say we have a batch of 10 responses to send out. Then, even with - #10728 we've still only called observe_execution_end_flush_start for the first 3 responses. The remaining 7 response timers are still ticking. When compute now closes the connection, the waiting flush fails with an error and we `drop()` the remaining 7 responses' smgr op timers. The `impl Drop for SmgrOpTimer` will observe an execution time that includes the flush time. In practice, this is supsected to produce the `+Inf` observations in the smgr op latency histogram we've seen since the introduction of pipelining, even after shipping #10728. refs: - fixup of https://github.com/neondatabase/neon/pull/10042 - fixup of https://github.com/neondatabase/neon/pull/10728 - fixes https://github.com/neondatabase/neon/issues/10754
This commit is contained in:
committed by
GitHub
parent
fcedd10226
commit
9247331c67
@@ -1366,10 +1366,7 @@ impl SmgrOpTimer {
|
||||
/// The first callers receives Some, subsequent ones None.
|
||||
///
|
||||
/// See [`SmgrOpTimerState`] for more context.
|
||||
pub(crate) fn observe_execution_end_flush_start(
|
||||
&mut self,
|
||||
at: Instant,
|
||||
) -> Option<SmgrOpFlushInProgress> {
|
||||
pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {
|
||||
// NB: unlike the other observe_* methods, this one take()s.
|
||||
#[allow(clippy::question_mark)] // maintain similar code pattern.
|
||||
let Some(mut inner) = self.0.take() else {
|
||||
@@ -1403,7 +1400,6 @@ impl SmgrOpTimer {
|
||||
..
|
||||
} = inner;
|
||||
Some(SmgrOpFlushInProgress {
|
||||
flush_started_at: at,
|
||||
global_micros: global_flush_in_progress_micros,
|
||||
per_timeline_micros: per_timeline_flush_in_progress_micros,
|
||||
})
|
||||
@@ -1419,7 +1415,6 @@ impl SmgrOpTimer {
|
||||
/// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
|
||||
/// and remove this struct from the code base.
|
||||
pub(crate) struct SmgrOpFlushInProgress {
|
||||
flush_started_at: Instant,
|
||||
global_micros: IntCounter,
|
||||
per_timeline_micros: IntCounter,
|
||||
}
|
||||
@@ -1438,12 +1433,13 @@ impl Drop for SmgrOpTimer {
|
||||
self.observe_throttle_start(now);
|
||||
self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
|
||||
self.observe_execution_start(now);
|
||||
self.observe_execution_end_flush_start(now);
|
||||
let maybe_flush_timer = self.observe_execution_end(now);
|
||||
drop(maybe_flush_timer);
|
||||
}
|
||||
}
|
||||
|
||||
impl SmgrOpFlushInProgress {
|
||||
pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
|
||||
pub(crate) async fn measure<Fut, O>(self, mut started_at: Instant, mut fut: Fut) -> O
|
||||
where
|
||||
Fut: std::future::Future<Output = O>,
|
||||
{
|
||||
@@ -1455,12 +1451,12 @@ impl SmgrOpFlushInProgress {
|
||||
let mut observe_guard = scopeguard::guard(
|
||||
|| {
|
||||
let now = Instant::now();
|
||||
let elapsed = now - self.flush_started_at;
|
||||
let elapsed = now - started_at;
|
||||
self.global_micros
|
||||
.inc_by(u64::try_from(elapsed.as_micros()).unwrap());
|
||||
self.per_timeline_micros
|
||||
.inc_by(u64::try_from(elapsed.as_micros()).unwrap());
|
||||
self.flush_started_at = now;
|
||||
started_at = now;
|
||||
},
|
||||
|mut observe| {
|
||||
observe();
|
||||
|
||||
@@ -1074,7 +1074,7 @@ impl PageServerHandler {
|
||||
};
|
||||
|
||||
// invoke handler function
|
||||
let (handler_results, span): (
|
||||
let (mut handler_results, span): (
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
|
||||
_,
|
||||
) = match batch {
|
||||
@@ -1201,7 +1201,7 @@ impl PageServerHandler {
|
||||
}
|
||||
};
|
||||
|
||||
// We purposefully don't count flush time into the smgr operaiton timer.
|
||||
// We purposefully don't count flush time into the smgr operation timer.
|
||||
//
|
||||
// The reason is that current compute client will not perform protocol processing
|
||||
// if the postgres backend process is doing things other than `->smgr_read()`.
|
||||
@@ -1218,17 +1218,32 @@ impl PageServerHandler {
|
||||
// call, which (all unmeasured) adds syscall overhead but reduces time to first byte
|
||||
// and avoids building up a "giant" contiguous userspace buffer to hold the entire response.
|
||||
// TODO: vectored socket IO would be great, but pgb_writer doesn't support that.
|
||||
//
|
||||
// Since we're flushing multiple times in the loop, but only have access to the per-op
|
||||
// timers inside the loop, we capture the flush start time here and reuse it to finish
|
||||
// each op timer.
|
||||
let flushing_start_time = Instant::now();
|
||||
let flush_timers = {
|
||||
let flushing_start_time = Instant::now();
|
||||
let mut flush_timers = Vec::with_capacity(handler_results.len());
|
||||
for handler_result in &mut handler_results {
|
||||
let flush_timer = match handler_result {
|
||||
Ok((_, timer)) => Some(
|
||||
timer
|
||||
.observe_execution_end(flushing_start_time)
|
||||
.expect("we are the first caller"),
|
||||
),
|
||||
Err(_) => {
|
||||
// TODO: measure errors
|
||||
None
|
||||
}
|
||||
};
|
||||
flush_timers.push(flush_timer);
|
||||
}
|
||||
assert_eq!(flush_timers.len(), handler_results.len());
|
||||
flush_timers
|
||||
};
|
||||
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
for handler_result in handler_results {
|
||||
let (response_msg, timer) = match handler_result {
|
||||
for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) {
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e.err {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
@@ -1252,16 +1267,14 @@ impl PageServerHandler {
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
(
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
req: e.req,
|
||||
message: e.err.to_string(),
|
||||
}),
|
||||
None, // TODO: measure errors
|
||||
)
|
||||
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
req: e.req,
|
||||
message: e.err.to_string(),
|
||||
})
|
||||
}
|
||||
},
|
||||
Ok((response_msg, timer)) => (response_msg, Some(timer)),
|
||||
Ok((response_msg, _op_timer_already_observed)) => response_msg,
|
||||
};
|
||||
|
||||
//
|
||||
@@ -1272,18 +1285,12 @@ impl PageServerHandler {
|
||||
&response_msg.serialize(protocol_version),
|
||||
))?;
|
||||
|
||||
let flushing_timer = timer.map(|mut timer| {
|
||||
timer
|
||||
.observe_execution_end_flush_start(flushing_start_time)
|
||||
.expect("we are the first caller")
|
||||
});
|
||||
|
||||
// what we want to do
|
||||
let flush_fut = pgb_writer.flush();
|
||||
// metric for how long flushing takes
|
||||
let flush_fut = match flushing_timer {
|
||||
Some(flushing_timer) => {
|
||||
futures::future::Either::Left(flushing_timer.measure(flush_fut))
|
||||
futures::future::Either::Left(flushing_timer.measure(Instant::now(), flush_fut))
|
||||
}
|
||||
None => futures::future::Either::Right(flush_fut),
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user