From cb9473928df94148b42297fe30b0b99682609249 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 25 Apr 2023 16:22:16 +0300 Subject: [PATCH] feat: add rough timings for basebackup (#4062) just record the time needed for waiting the lsn and then the basebackup in a log message in millis. this is related to ongoing investigations to cold start performance. this could also be a a counter. it cannot be added next to smgr histograms, because we don't want another histogram per timeline. the aim is to allow drilling deeper into which timelines were slow, and to understand why some need two basebackups. --- pageserver/src/page_service.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index bd38a7a2f3..135f08e846 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -700,6 +700,8 @@ impl PageServerHandler { full_backup: bool, ctx: RequestContext, ) -> anyhow::Result<()> { + let started = std::time::Instant::now(); + // check that the timeline exists let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); @@ -712,6 +714,8 @@ impl PageServerHandler { .context("invalid basebackup lsn")?; } + let lsn_awaited_after = started.elapsed(); + // switch client to COPYOUT pgb.write_message_noflush(&BeMessage::CopyOutResponse)?; pgb.flush().await?; @@ -732,7 +736,17 @@ impl PageServerHandler { pgb.write_message_noflush(&BeMessage::CopyDone)?; pgb.flush().await?; - info!("basebackup complete"); + + let basebackup_after = started + .elapsed() + .checked_sub(lsn_awaited_after) + .unwrap_or(Duration::ZERO); + + info!( + lsn_await_millis = lsn_awaited_after.as_millis(), + basebackup_millis = basebackup_after.as_millis(), + "basebackup complete" + ); Ok(()) }