From 1c47fbae816e32e7f74e9a9412574ba56ef6549b Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Sat, 1 Jan 2022 19:08:09 +0300 Subject: [PATCH] Do not write image layers during enforced checkpoint (#1057) * Do not write image layers during enforced checkpoint refer #1056 * Add Flush option to CheckpointConfig refer #1057 --- pageserver/src/layered_repository.rs | 18 +++++++++++------- .../src/layered_repository/inmemory_layer.rs | 10 +++++++--- pageserver/src/lib.rs | 2 ++ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 3f5c092e10..ad0f086332 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -360,7 +360,8 @@ fn shutdown_timeline( .store(false, atomic::Ordering::Relaxed); walreceiver::stop_wal_receiver(timeline_id); trace!("repo shutdown. checkpoint timeline {}", timeline_id); - timeline.checkpoint(CheckpointConfig::Forced)?; + // Do not reconstruct pages to reduce shutdown time + timeline.checkpoint(CheckpointConfig::Flush)?; //TODO Wait for walredo process to shutdown too } LayeredTimelineEntry::Remote { .. } => warn!( @@ -975,12 +976,15 @@ impl Timeline for LayeredTimeline { /// metrics collection. fn checkpoint(&self, cconf: CheckpointConfig) -> Result<()> { match cconf { + CheckpointConfig::Flush => STORAGE_TIME + .with_label_values(&["flush checkpoint"]) + .observe_closure_duration(|| self.checkpoint_internal(0, false)), CheckpointConfig::Forced => STORAGE_TIME .with_label_values(&["forced checkpoint"]) - .observe_closure_duration(|| self.checkpoint_internal(0)), + .observe_closure_duration(|| self.checkpoint_internal(0, true)), CheckpointConfig::Distance(distance) => STORAGE_TIME .with_label_values(&["checkpoint"]) - .observe_closure_duration(|| self.checkpoint_internal(distance)), + .observe_closure_duration(|| self.checkpoint_internal(distance, true)), } } @@ -1429,7 +1433,7 @@ impl LayeredTimeline { /// Flush to disk all data that was written with the put_* functions /// /// NOTE: This has nothing to do with checkpoint in PostgreSQL. - fn checkpoint_internal(&self, checkpoint_distance: u64) -> Result<()> { + fn checkpoint_internal(&self, checkpoint_distance: u64, reconstruct_pages: bool) -> Result<()> { let mut write_guard = self.write_lock.lock().unwrap(); let mut layers = self.layers.lock().unwrap(); @@ -1486,7 +1490,7 @@ impl LayeredTimeline { drop(layers); drop(write_guard); - let mut this_layer_uploads = self.evict_layer(oldest_layer_id)?; + let mut this_layer_uploads = self.evict_layer(oldest_layer_id, reconstruct_pages)?; layer_uploads.append(&mut this_layer_uploads); write_guard = self.write_lock.lock().unwrap(); @@ -1566,7 +1570,7 @@ impl LayeredTimeline { Ok(()) } - fn evict_layer(&self, layer_id: LayerId) -> Result> { + fn evict_layer(&self, layer_id: LayerId, reconstruct_pages: bool) -> Result> { // Mark the layer as no longer accepting writes and record the end_lsn. // This happens in-place, no new layers are created now. // We call `get_last_record_lsn` again, which may be different from the @@ -1591,7 +1595,7 @@ impl LayeredTimeline { drop(layers); drop(write_guard); - let new_historics = oldest_layer.write_to_disk(self)?; + let new_historics = oldest_layer.write_to_disk(self, reconstruct_pages)?; write_guard = self.write_lock.lock().unwrap(); layers = self.layers.lock().unwrap(); diff --git a/pageserver/src/layered_repository/inmemory_layer.rs b/pageserver/src/layered_repository/inmemory_layer.rs index 13f6c51b90..68f98735c2 100644 --- a/pageserver/src/layered_repository/inmemory_layer.rs +++ b/pageserver/src/layered_repository/inmemory_layer.rs @@ -575,12 +575,16 @@ impl InMemoryLayer { /// Write the this frozen in-memory layer to disk. /// /// Returns new layers that replace this one. - /// If not dropped, returns a new image layer containing the page versions + /// If not dropped and reconstruct_pages is true, returns a new image layer containing the page versions /// at the `end_lsn`. Can also return a DeltaLayer that includes all the /// WAL records between start and end LSN. (The delta layer is not needed /// when a new relish is created with a single LSN, so that the start and /// end LSN are the same.) - pub fn write_to_disk(&self, timeline: &LayeredTimeline) -> Result { + pub fn write_to_disk( + &self, + timeline: &LayeredTimeline, + reconstruct_pages: bool, + ) -> Result { trace!( "write_to_disk {} get_end_lsn is {}", self.filename().display(), @@ -606,7 +610,7 @@ impl InMemoryLayer { // Figure out if we should create a delta layer, image layer, or both. let image_lsn: Option; let delta_end_lsn: Option; - if self.is_dropped() { + if self.is_dropped() || !reconstruct_pages { // The segment was dropped. Create just a delta layer containing all the // changes up to and including the drop. delta_end_lsn = Some(end_lsn_exclusive); diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index b2ae78448a..23691ea130 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -38,5 +38,7 @@ pub enum CheckpointConfig { // Flush in-memory data that is older than this Distance(u64), // Flush all in-memory data + Flush, + // Flush all in-memory data and reconstruct all page images Forced, }