From dc9c33139b698adbbe5f19b4eefd6cd2b5703a05 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 24 Nov 2022 15:49:04 +0200 Subject: [PATCH] On tenant load, start WAL receivers only after all timelines have been loaded. And similarly on attach. This way, if the tenant load/attach fails halfway through, we don't have any leftover WAL receivers still running on the broken tenant. --- libs/pageserver_api/src/models.rs | 3 ++- pageserver/src/tenant.rs | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index ad3a528dc7..750585b58b 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -44,7 +44,8 @@ impl TenantState { /// A state of a timeline in pageserver's memory. #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum TimelineState { - /// Timeline is fully operational, its background jobs are running. + /// Timeline is fully operational. If the containing Tenant is Active, the timeline's + /// background jobs are running otherwise they will be launched when the tenant is activated. Active, /// A timeline is recognized by pageserver, but not yet ready to operate. /// The status indicates, that the timeline could eventually go back to Active automatically: diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index bc13c3ac0d..5a837dec70 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -165,12 +165,21 @@ struct TimelineUninitMark { } impl UninitializedTimeline<'_> { - /// Ensures timeline data is valid, loads it into pageserver's memory and removes uninit mark file on success. + /// Ensures timeline data is valid, loads it into pageserver's memory and removes + /// uninit mark file on success. + /// + /// The new timeline is initialized in Active state, and its background jobs are + /// started pub fn initialize(self) -> anyhow::Result> { let mut timelines = self.owning_tenant.timelines.lock().unwrap(); self.initialize_with_lock(&mut timelines, true, true) } + /// Like `initialize`, but the caller is already holding lock on Tenant::timelines. + /// If `launch_wal_receiver` is false, the WAL receiver not launched, even though + /// timeline is initialized in Active state. This is used during tenant load and + /// attach, where the WAL receivers are launched only after all the timelines have + /// been initialized. fn initialize_with_lock( mut self, timelines: &mut HashMap>, @@ -403,7 +412,7 @@ struct RemoteStartupData { /// timelines, forked off from the same initial call to 'initdb'. impl Tenant { /// Yet another helper for timeline initialization. - /// Contains common part for `load_remote_timeline` and `load_remote_timeline` + /// Contains common part for `load_local_timeline` and `load_remote_timeline` async fn setup_timeline( &self, timeline_id: TimelineId, @@ -502,9 +511,6 @@ impl Tenant { .context("save_metadata")?; } - // Finally launch walreceiver - timeline.launch_wal_receiver(); - Ok(()) } @@ -1301,6 +1307,7 @@ impl Tenant { for timeline in not_broken_timelines { timeline.set_state(TimelineState::Active); + timeline.launch_wal_receiver(); } } }