From 05326cc247b0149ff27ada881249f4945dc6b541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 6 Feb 2025 11:10:11 +0100 Subject: [PATCH] Skip gc cutoff lsn check at timeline creation if lease exists (#10685) Right now, branch creation doesn't care if a lsn lease exists or not, it just fails if the passed lsn is older than either the last or the planned gc cutoff. However, if an lsn lease exists for a given lsn, we can actually create a branch at that point: nothing has been gc'd away. This prevents race conditions that #10678 still leaves around. Related: #10639 https://github.com/neondatabase/cloud/issues/23667 --- pageserver/src/page_service.rs | 2 +- pageserver/src/tenant.rs | 32 +++++++++++++++++-------------- pageserver/src/tenant/timeline.rs | 3 +++ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index d4898532d6..24a350399d 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1690,7 +1690,7 @@ impl PageServerHandler { // to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN). if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() { let gc_info = &timeline.gc_info.read().unwrap(); - if !gc_info.leases.contains_key(&request_lsn) { + if !gc_info.lsn_covered_by_lease(request_lsn) { return Err( PageStreamError::BadRequest(format!( "tried to request a page version that was garbage collected. requested at {} gc cutoff {}", diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c1b408ed72..3c6996dd51 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -4642,22 +4642,26 @@ impl Tenant { // check against last actual 'latest_gc_cutoff' first let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn(); - src_timeline - .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn) - .context(format!( - "invalid branch start lsn: less than latest GC cutoff {}", - *latest_gc_cutoff_lsn, - )) - .map_err(CreateTimelineError::AncestorLsn)?; - - // and then the planned GC cutoff { let gc_info = src_timeline.gc_info.read().unwrap(); - let cutoff = gc_info.min_cutoff(); - if start_lsn < cutoff { - return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!( - "invalid branch start lsn: less than planned GC cutoff {cutoff}" - ))); + let planned_cutoff = gc_info.min_cutoff(); + if gc_info.lsn_covered_by_lease(start_lsn) { + tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *latest_gc_cutoff_lsn); + } else { + src_timeline + .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn) + .context(format!( + "invalid branch start lsn: less than latest GC cutoff {}", + *latest_gc_cutoff_lsn, + )) + .map_err(CreateTimelineError::AncestorLsn)?; + + // and then the planned GC cutoff + if start_lsn < planned_cutoff { + return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!( + "invalid branch start lsn: less than planned GC cutoff {planned_cutoff}" + ))); + } } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b6a349a209..45ddd38f67 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -531,6 +531,9 @@ impl GcInfo { pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool { self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes) } + pub(crate) fn lsn_covered_by_lease(&self, lsn: Lsn) -> bool { + self.leases.contains_key(&lsn) + } } /// The `GcInfo` component describing which Lsns need to be retained. Functionally, this