mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-28 02:20:42 +00:00
fix(metrics): time individual layer flush operations (#7109)
Currently, the flushing operation could flush multiple frozen layers to the disk and store the aggregate time in the histogram. The result is a bimodal distribution with short and over 1000-second flushes. Change it so that we record how long one layer flush takes.
This commit is contained in:
@@ -2967,7 +2967,6 @@ impl Timeline {
|
||||
}
|
||||
|
||||
trace!("waking up");
|
||||
let timer = self.metrics.flush_time_histo.start_timer();
|
||||
let flush_counter = *layer_flush_start_rx.borrow();
|
||||
let result = loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
@@ -2978,6 +2977,8 @@ impl Timeline {
|
||||
return;
|
||||
}
|
||||
|
||||
let timer = self.metrics.flush_time_histo.start_timer();
|
||||
|
||||
let layer_to_flush = {
|
||||
let guard = self.layers.read().await;
|
||||
guard.layer_map().frozen_layers.front().cloned()
|
||||
@@ -2999,13 +3000,12 @@ impl Timeline {
|
||||
break err;
|
||||
}
|
||||
}
|
||||
timer.stop_and_record();
|
||||
};
|
||||
// Notify any listeners that we're done
|
||||
let _ = self
|
||||
.layer_flush_done_tx
|
||||
.send_replace((flush_counter, result));
|
||||
|
||||
timer.stop_and_record();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3073,6 +3073,7 @@ impl Timeline {
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), FlushLayerError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// As a special case, when we have just imported an image into the repository,
|
||||
// instead of writing out a L0 delta layer, we directly write out image layer
|
||||
// files instead. This is possible as long as *all* the data imported into the
|
||||
|
||||
Reference in New Issue
Block a user