From ff51e96fbd864504494ab301edfe955a2f030d47 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 27 Mar 2023 12:45:10 +0200 Subject: [PATCH] fix synthetic size for (last_record_lsn - gc_horizon) < initdb_lsn (#3874) fix synthetic size for (last_record_lsn - gc_horizon) < initdb_lsn Assume a single-timeline project. If the gc_horizon covers all WAL (last_record_lsn < gc_horizon) but we have written more data than just initdb, the synthetic size calculation worker needs to calculate the logical size at LSN initdb_lsn (Segment BranchStart). Before this patch, that calculation would incorrectly return the initial logical size calculation result that we cache in the Timeline::initial_logical_size. Presumably, because there was confusion around initdb_lsn vs. initial size calculation. The fix is to only hand out the initialized_size() only if the LSN matches. The distinction in the metrics between "init logical size" and "logical size" was also incorrect because of the above. So, remove it. There was a special case for `size != 0`. This was to cover the case of LogicalSize::empty_initial(), but `initial_part_end` is `None` in that case, so the new `LogicalSize::initialized_size()` will return None in that case as well. Lastly, to prevent confusion like this in the future, rename all occurrences of `init_lsn` to either just `lsn` or a more specific name. Co-authored-by: Joonas Koivunen Co-authored-by: Heikki Linnakangas --- pageserver/src/metrics.rs | 4 --- pageserver/src/tenant/timeline.rs | 43 ++++++++++++++----------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b5563ad186..6cb245aed7 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -586,7 +586,6 @@ pub struct TimelineMetrics { pub flush_time_histo: StorageTimeMetrics, pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, - pub init_logical_size_histo: StorageTimeMetrics, pub logical_size_histo: StorageTimeMetrics, pub load_layer_map_histo: StorageTimeMetrics, pub garbage_collect_histo: StorageTimeMetrics, @@ -619,8 +618,6 @@ impl TimelineMetrics { let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id); let create_images_time_histo = StorageTimeMetrics::new("create images", &tenant_id, &timeline_id); - let init_logical_size_histo = - StorageTimeMetrics::new("init logical size", &tenant_id, &timeline_id); let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id); let load_layer_map_histo = StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id); @@ -657,7 +654,6 @@ impl TimelineMetrics { flush_time_histo, compact_time_histo, create_images_time_histo, - init_logical_size_histo, logical_size_histo, garbage_collect_histo, load_layer_map_histo, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 33909e749b..5fde1a77e0 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -328,9 +328,13 @@ impl LogicalSize { .fetch_add(delta, AtomicOrdering::SeqCst); } - /// Returns the initialized (already calculated) value, if any. - fn initialized_size(&self) -> Option { - self.initial_logical_size.get().copied() + /// Make the value computed by initial logical size computation + /// available for re-use. This doesn't contain the incremental part. + fn initialized_size(&self, lsn: Lsn) -> Option { + match self.initial_part_end { + Some(v) if v == lsn => self.initial_logical_size.get().copied(), + _ => None, + } } } @@ -806,11 +810,11 @@ impl Timeline { let mut is_exact = true; let size = current_size.size(); - if let (CurrentLogicalSize::Approximate(_), Some(init_lsn)) = + if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) = (current_size, self.current_logical_size.initial_part_end) { is_exact = false; - self.try_spawn_size_init_task(init_lsn, ctx); + self.try_spawn_size_init_task(initial_part_end, ctx); } Ok((size, is_exact)) @@ -1688,7 +1692,7 @@ impl Timeline { Ok(()) } - fn try_spawn_size_init_task(self: &Arc, init_lsn: Lsn, ctx: &RequestContext) { + fn try_spawn_size_init_task(self: &Arc, lsn: Lsn, ctx: &RequestContext) { let permit = match Arc::clone(&self.current_logical_size.initial_size_computation) .try_acquire_owned() { @@ -1726,7 +1730,7 @@ impl Timeline { // NB: don't log errors here, task_mgr will do that. async move { let calculated_size = match self_clone - .logical_size_calculation_task(init_lsn, &background_ctx) + .logical_size_calculation_task(lsn, &background_ctx) .await { Ok(s) => s, @@ -1811,7 +1815,7 @@ impl Timeline { #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))] async fn logical_size_calculation_task( self: &Arc, - init_lsn: Lsn, + lsn: Lsn, ctx: &RequestContext, ) -> Result { let mut timeline_state_updates = self.subscribe_for_state_updates(); @@ -1822,7 +1826,7 @@ impl Timeline { let cancel = cancel.child_token(); let ctx = ctx.attached_child(); self_calculation - .calculate_logical_size(init_lsn, cancel, &ctx) + .calculate_logical_size(lsn, cancel, &ctx) .await }; let timeline_state_cancellation = async { @@ -1906,21 +1910,12 @@ impl Timeline { // need to return something Ok(0) }); - let timer = if up_to_lsn == self.initdb_lsn { - if let Some(size) = self.current_logical_size.initialized_size() { - if size != 0 { - // non-zero size means that the size has already been calculated by this method - // after startup. if the logical size is for a new timeline without layers the - // size will be zero, and we cannot use that, or this caching strategy until - // pageserver restart. - return Ok(size); - } - } - - self.metrics.init_logical_size_histo.start_timer() - } else { - self.metrics.logical_size_histo.start_timer() - }; + // See if we've already done the work for initial size calculation. + // This is a short-cut for timelines that are mostly unused. + if let Some(size) = self.current_logical_size.initialized_size(up_to_lsn) { + return Ok(size); + } + let timer = self.metrics.logical_size_histo.start_timer(); let logical_size = self .get_current_logical_size_non_incremental(up_to_lsn, cancel, ctx) .await?;