From bf63f129ae1b1822dfb2ec689e483f39c772c9a5 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 27 Jan 2023 00:49:02 +0200 Subject: [PATCH] Make 'branch_timeline' function more clear. Change the signature so that it takes an Arc reference to the source timeline, instead of just the ID. All the callers have an Arc reference at hand, so this is more convenient for everyone. Reorder the code a bit and improve the comments, to make it more clear what it does and why. --- pageserver/src/pgdatadir_mapping.rs | 2 +- pageserver/src/tenant.rs | 113 ++++++++++++++-------------- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 6eea023af1..6f9035305d 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1653,7 +1653,7 @@ mod tests { assert!(tline.list_rels(0, TESTDB, Lsn(0x30))?.contains(&TESTREL_A)); // Create a branch, check that the relation is visible there - repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?; + repo.branch_timeline(&tline, NEW_TIMELINE_ID, Lsn(0x30))?; let newtline = match repo.get_timeline(NEW_TIMELINE_ID)?.local_timeline() { Some(timeline) => timeline, None => panic!("Should have a local timeline"), diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 1f0219aaa3..59450cf277 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1205,13 +1205,8 @@ impl Tenant { ancestor_timeline.wait_lsn(*lsn, ctx).await?; } - self.branch_timeline( - ancestor_timeline_id, - new_timeline_id, - ancestor_start_lsn, - ctx, - ) - .await? + self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx) + .await? } None => { self.bootstrap_timeline(new_timeline_id, pg_version, ctx) @@ -2050,54 +2045,53 @@ impl Tenant { /// Branch an existing timeline async fn branch_timeline( &self, - src: TimelineId, - dst: TimelineId, + src_timeline: &Arc, + dst_id: TimelineId, start_lsn: Option, _ctx: &RequestContext, ) -> anyhow::Result> { - // We need to hold this lock to prevent GC from starting at the same time. GC scans the directory to learn - // about timelines, so otherwise a race condition is possible, where we create new timeline and GC - // concurrently removes data that is needed by the new timeline. - let _gc_cs = self.gc_cs.lock().await; - let timeline_uninit_mark = { - let timelines = self.timelines.lock().unwrap(); - self.create_timeline_uninit_mark(dst, &timelines)? - }; - - // In order for the branch creation task to not wait for GC/compaction, - // we need to make sure that the starting LSN of the child branch is not out of scope midway by - // - // 1. holding the GC lock to prevent overwritting timeline's GC data - // 2. checking both the latest GC cutoff LSN and latest GC info of the source timeline - // - // Step 2 is to avoid initializing the new branch using data removed by past GC iterations - // or in-queue GC iterations. - - let src_timeline = self.get_timeline(src, false).with_context(|| { - format!( - "No ancestor {} found for timeline {}/{}", - src, self.tenant_id, dst - ) - })?; - - let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn(); + let src_id = src_timeline.timeline_id; // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN let start_lsn = start_lsn.unwrap_or_else(|| { let lsn = src_timeline.get_last_record_lsn(); - info!("branching timeline {dst} from timeline {src} at last record LSN: {lsn}"); + info!("branching timeline {dst_id} from timeline {src_id} at last record LSN: {lsn}"); lsn }); - // Check if the starting LSN is out of scope because it is less than - // 1. the latest GC cutoff LSN or - // 2. the planned GC cutoff LSN, which is from an in-queue GC iteration. + // First acquire the GC lock so that another task cannot advance the GC + // cutoff in 'gc_info', and make 'start_lsn' invalid, while we are + // creating the branch. + let _gc_cs = self.gc_cs.lock().await; + + // Create a placeholder for the new branch. This will error + // out if the new timeline ID is already in use. + let timeline_uninit_mark = { + let timelines = self.timelines.lock().unwrap(); + self.create_timeline_uninit_mark(dst_id, &timelines)? + }; + + // Ensure that `start_lsn` is valid, i.e. the LSN is within the PITR + // horizon on the source timeline + // + // We check it against both the planned GC cutoff stored in 'gc_info', + // and the 'latest_gc_cutoff' of the last GC that was performed. The + // planned GC cutoff in 'gc_info' is normally larger than + // 'latest_gc_cutoff_lsn', but beware of corner cases like if you just + // changed the GC settings for the tenant to make the PITR window + // larger, but some of the data was already removed by an earlier GC + // iteration. + + // check against last actual 'latest_gc_cutoff' first + let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn(); src_timeline .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn) .context(format!( "invalid branch start lsn: less than latest GC cutoff {}", *latest_gc_cutoff_lsn, ))?; + + // and then the planned GC cutoff { let gc_info = src_timeline.gc_info.read().unwrap(); let cutoff = min(gc_info.pitr_cutoff, gc_info.horizon_cutoff); @@ -2108,6 +2102,12 @@ impl Tenant { } } + // + // The branch point is valid, and we are still holding the 'gc_cs' lock + // so that GC cannot advance the GC cutoff until we are finished. + // Proceed with the branch creation. + // + // Determine prev-LSN for the new timeline. We can only determine it if // the timeline was branched at the current end of the source timeline. let RecordLsn { @@ -2126,7 +2126,7 @@ impl Tenant { let metadata = TimelineMetadata::new( start_lsn, dst_prev, - Some(src), + Some(src_id), start_lsn, *src_timeline.latest_gc_cutoff_lsn.read(), // FIXME: should we hold onto this guard longer? src_timeline.initdb_lsn, @@ -2135,15 +2135,15 @@ impl Tenant { let mut timelines = self.timelines.lock().unwrap(); let new_timeline = self .prepare_timeline( - dst, + dst_id, metadata, timeline_uninit_mark, false, - Some(src_timeline), + Some(Arc::clone(src_timeline)), )? .initialize_with_lock(&mut timelines, true, true)?; drop(timelines); - info!("branched timeline {dst} from {src} at {start_lsn}"); + info!("branched timeline {dst_id} from {src_id} at {start_lsn}"); Ok(new_timeline) } @@ -2966,7 +2966,7 @@ mod tests { // Branch the history, modify relation differently on the new timeline tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x30)), &ctx) .await?; let newtline = tenant .get_timeline(NEW_TIMELINE_ID, true) @@ -3055,7 +3055,7 @@ mod tests { // try to branch at lsn 25, should fail because we already garbage collected the data match tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx) .await { Ok(_) => panic!("branching should have failed"), @@ -3078,12 +3078,13 @@ mod tests { TenantHarness::create("test_prohibit_branch_creation_on_pre_initdb_lsn")? .load() .await; - let tline = - tenant.create_empty_timeline(TIMELINE_ID, Lsn(0x50), DEFAULT_PG_VERSION, &ctx)?; - let _tline = tline.initialize(&ctx)?; + + let tline = tenant + .create_empty_timeline(TIMELINE_ID, Lsn(0x50), DEFAULT_PG_VERSION, &ctx)? + .initialize(&ctx)?; // try to branch at lsn 0x25, should fail because initdb lsn is 0x50 match tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x25)), &ctx) .await { Ok(_) => panic!("branching should have failed"), @@ -3134,7 +3135,7 @@ mod tests { make_some_layers(tline.as_ref(), Lsn(0x20)).await?; tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) .await?; let newtline = tenant .get_timeline(NEW_TIMELINE_ID, true) @@ -3158,7 +3159,7 @@ mod tests { make_some_layers(tline.as_ref(), Lsn(0x20)).await?; tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) .await?; let newtline = tenant .get_timeline(NEW_TIMELINE_ID, true) @@ -3214,7 +3215,7 @@ mod tests { make_some_layers(tline.as_ref(), Lsn(0x20)).await?; tenant - .branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) + .branch_timeline(&tline, NEW_TIMELINE_ID, Some(Lsn(0x40)), &ctx) .await?; let newtline = tenant @@ -3499,16 +3500,14 @@ mod tests { keyspace.add_key(test_key); } - let mut tline_id = TIMELINE_ID; for _ in 0..50 { let new_tline_id = TimelineId::generate(); tenant - .branch_timeline(tline_id, new_tline_id, Some(lsn), &ctx) + .branch_timeline(&tline, new_tline_id, Some(lsn), &ctx) .await?; tline = tenant .get_timeline(new_tline_id, true) .expect("Should have the branched timeline"); - tline_id = new_tline_id; for _ in 0..NUM_KEYS { lsn = Lsn(lsn.0 + 0x10); @@ -3565,18 +3564,16 @@ mod tests { let mut updated = [[Lsn(0); NUM_KEYS]; NUM_TLINES]; let mut lsn = Lsn(0); - let mut tline_id = TIMELINE_ID; #[allow(clippy::needless_range_loop)] for idx in 0..NUM_TLINES { let new_tline_id = TimelineId::generate(); tenant - .branch_timeline(tline_id, new_tline_id, Some(lsn), &ctx) + .branch_timeline(&tline, new_tline_id, Some(lsn), &ctx) .await?; tline = tenant .get_timeline(new_tline_id, true) .expect("Should have the branched timeline"); - tline_id = new_tline_id; for _ in 0..NUM_KEYS { lsn = Lsn(lsn.0 + 0x10);