From b725cc879a014e40ed40777b80e09fdd19ad46a9 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 14 Jun 2023 20:54:28 +0200 Subject: [PATCH] fixup the merge, unit tests pass, some python tests will fail --- pageserver/src/page_service.rs | 2 ++ pageserver/src/tenant.rs | 13 ++++++++++--- pageserver/src/tenant/timeline.rs | 22 ++++++++++++++++++++-- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index eab2e4fee6..22338c83e4 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -489,6 +489,8 @@ impl PageServerHandler { where IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, { + debug_assert_current_span_has_tenant_and_timeline_id(); + task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); // Create empty timeline info!("creating new timeline"); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3b3a38d51d..af269abc69 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -550,7 +550,7 @@ impl Tenant { // ensure!(new_disk_consistent_lsn.is_valid(), // "Timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn and cannot be initialized"); timeline - .load_layer_map(new_disk_consistent_lsn) + .load_layer_map(&cause, new_disk_consistent_lsn) .await .with_context(|| { format!("Failed to load layermap for timeline {tenant_id}/{timeline_id}") @@ -1149,6 +1149,7 @@ impl Tenant { init_order, ctx, ) + .instrument(info_span!("load_local_timeline", timeline_id=%timeline_id)) .await .with_context(|| format!("load local timeline {timeline_id}"))?; match timeline { @@ -1295,6 +1296,8 @@ impl Tenant { pg_version: u32, ctx: &RequestContext, ) -> anyhow::Result<(CreatingTimelineGuard, Arc)> { + debug_assert_current_span_has_tenant_and_timeline_id(); + anyhow::ensure!( self.is_active(), "Cannot create empty timelines on inactive tenant" @@ -1317,7 +1320,7 @@ impl Tenant { }); let new_metadata = TimelineMetadata::new( - Lsn(0), // TODO should this be initdb_lsn as well, at least for the handle_import_basebackup use case? + Lsn(0), None, None, Lsn(0), @@ -1374,6 +1377,7 @@ impl Tenant { None, ctx, ) + .instrument(info_span!("load_local_timeline", timeline_id=%new_timeline_id)) .await .context("load newly created on-disk timeline state")? .expect("load_local_timeline should have created the timeline"); @@ -1424,7 +1428,7 @@ impl Tenant { let (guard, tline) = self .create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx) // make the debug_assert_current_span_has_tenant_id() in create_empty_timeline() happy - .instrument(tracing::info_span!("create_test_timeline", tenant_id=%self.tenant_id)) + .instrument(tracing::info_span!("create_test_timeline", tenant_id=%self.tenant_id, timeline_id=%new_timeline_id)) .await .context("create empty timeline")?; @@ -1631,6 +1635,7 @@ impl Tenant { }; let real_timeline = self .load_local_timeline(new_timeline_id, metadata, ancestor, load_cause, None, ctx) + .instrument(info_span!("load_local_timeline", timeline_id=%new_timeline_id)) .await .context("load newly created on-disk timeline state")?; @@ -3988,6 +3993,7 @@ mod tests { match tenant .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .instrument(info_span!("create_empty_timeline", tenant_id=%tenant.tenant_id, timeline_id=%TIMELINE_ID)) .await { Ok(_) => panic!("duplicate timeline creation should fail"), @@ -4787,6 +4793,7 @@ mod tests { let initdb_lsn = Lsn(0x20); let (_guard, tline) = tenant .create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx) + .instrument(tracing::info_span!("create_empty_timeline", tenant_id=%tenant.tenant_id, timeline_id = %TIMELINE_ID)) .await?; // Spawn flush loop now so that we can set the `expect_initdb_optimization` diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 75af0455aa..ca8942b577 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -82,6 +82,7 @@ use super::layer_map::BatchedUpdates; use super::remote_timeline_client::index::IndexPart; use super::remote_timeline_client::RemoteTimelineClient; use super::storage_layer::{DeltaLayer, ImageLayer, Layer, LayerAccessStatsReset}; +use super::TimelineLoadCause; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub(super) enum FlushLoopState { @@ -1672,7 +1673,11 @@ impl Timeline { /// /// Scan the timeline directory to populate the layer map. /// - pub(super) async fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> { + pub(super) async fn load_layer_map( + &self, + cause: &TimelineLoadCause, + disk_consistent_lsn: Lsn, + ) -> anyhow::Result<()> { let mut layers = self.layers.write().await; let mut updates = layers.batch_update(); let mut num_layers = 0; @@ -1775,7 +1780,19 @@ impl Timeline { } updates.flush(); - layers.next_open_layer_at = Some(Lsn(disk_consistent_lsn.0) + 1); + + if disk_consistent_lsn == Lsn(0) { + // If disk_consistent_lsn is 0, then we're still in bootstrap/basebackup_import/create_test_timeline. + // Set next_open_layer_at to initdb_lsn because to enable the put@initdb_lsn optimization in flush_frozen_layer. + assert!(matches!(cause, TimelineLoadCause::TimelineCreate { .. })); + assert_eq!( + num_layers, 0, + "if we crash, creating timelines get removed from disk" + ); + layers.next_open_layer_at = Some(self.initdb_lsn); + } else { + layers.next_open_layer_at = Some(Lsn(disk_consistent_lsn.0) + 1); + } info!( "loaded layer map with {} layers at {}, total physical size: {}", @@ -2932,6 +2949,7 @@ impl Timeline { // files instead. This is possible as long as *all* the data imported into the // repository have the same LSN. let lsn_range = frozen_layer.get_lsn_range(); + debug!("flushing frozen layer with LSN range {:?}", lsn_range); let layer_paths_to_upload = if lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1) { #[cfg(test)]