feat: add rough timings for basebackup (#4062)

just record the time needed for waiting the lsn and then the basebackup
in a log message in millis. this is related to ongoing investigations to
cold start performance.

this could also be a a counter. it cannot be added next to smgr
histograms, because we don't want another histogram per timeline.

the aim is to allow drilling deeper into which timelines were slow, and
to understand why some need two basebackups.
This commit is contained in:
Joonas Koivunen
2023-04-25 16:22:16 +03:00
committed by GitHub
parent fa20e37574
commit cb9473928d

View File

@@ -700,6 +700,8 @@ impl PageServerHandler {
full_backup: bool,
ctx: RequestContext,
) -> anyhow::Result<()> {
let started = std::time::Instant::now();
// check that the timeline exists
let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
@@ -712,6 +714,8 @@ impl PageServerHandler {
.context("invalid basebackup lsn")?;
}
let lsn_awaited_after = started.elapsed();
// switch client to COPYOUT
pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
pgb.flush().await?;
@@ -732,7 +736,17 @@ impl PageServerHandler {
pgb.write_message_noflush(&BeMessage::CopyDone)?;
pgb.flush().await?;
info!("basebackup complete");
let basebackup_after = started
.elapsed()
.checked_sub(lsn_awaited_after)
.unwrap_or(Duration::ZERO);
info!(
lsn_await_millis = lsn_awaited_after.as_millis(),
basebackup_millis = basebackup_after.as_millis(),
"basebackup complete"
);
Ok(())
}