diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index a2cebffc83..e0731ba79b 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -335,9 +335,13 @@ fn start_pageserver( // Set up remote storage client let remote_storage = create_remote_storage_client(conf)?; - // All tenant load operations carry this while they are ongoing; it will be dropped once those - // operations finish either successfully or in some other manner. However, the initial load - // will be then done, and we can start the global background tasks. + // Startup staging or optimizing: + // + // (init_done_tx, init_done_rx) are used to control when do background loops start. This is to + // avoid starving out the BACKGROUND_RUNTIME async worker threads doing heavy work, like + // initial repartitioning while we still have Loading tenants. + // + // init_done_rx is a barrier which stops waiting once all init_done_tx clones are dropped. let (init_done_tx, init_done_rx) = utils::completion::channel(); // Scan the local 'tenants/' directory and start loading the tenants diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index af6a70c4f2..a895b57092 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -267,7 +267,7 @@ impl UninitializedTimeline<'_> { // updated it for the layers that we created during the import. let mut timelines = self.owning_tenant.timelines.lock().unwrap(); let tl = self.initialize_with_lock(ctx, &mut timelines, false)?; - tl.activate(broker_client, ctx); + tl.activate(broker_client, None, ctx); Ok(tl) } @@ -879,7 +879,6 @@ impl Tenant { )) } - /// /// Load a tenant that's available on local disk /// /// This is used at pageserver startup, to rebuild the in-memory @@ -890,6 +889,8 @@ impl Tenant { /// If the loading fails for some reason, the Tenant will go into Broken /// state. /// + /// `init_done` is an optional channel used during initial load to delay background task + /// start. It is not used later. #[instrument(skip_all, fields(tenant_id=%tenant_id))] pub fn spawn_load( conf: &'static PageServerConf, @@ -1358,7 +1359,7 @@ impl Tenant { } }; - loaded_timeline.activate(broker_client, ctx); + loaded_timeline.activate(broker_client, None, ctx); if let Some(remote_client) = loaded_timeline.remote_client.as_ref() { // Wait for the upload of the 'index_part.json` file to finish, so that when we return @@ -1682,6 +1683,9 @@ impl Tenant { } /// Changes tenant status to active, unless shutdown was already requested. + /// + /// `init_done` is an optional channel used during initial load to delay background task + /// start. It is not used later. fn activate( self: &Arc, broker_client: BrokerClientChannel, @@ -1723,7 +1727,7 @@ impl Tenant { let mut activated_timelines = 0; for timeline in not_broken_timelines { - timeline.activate(broker_client.clone(), ctx); + timeline.activate(broker_client.clone(), init_done, ctx); activated_timelines += 1; } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 4318749777..05874bdd72 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -155,6 +155,8 @@ pub async fn init_tenant_mgr( Ok(()) } +/// `init_done` is an optional channel used during initial load to delay background task +/// start. It is not used later. pub fn schedule_local_tenant_processing( conf: &'static PageServerConf, tenant_path: &Path, diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 02aed11114..1bbc1b1c08 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -14,6 +14,10 @@ use tokio_util::sync::CancellationToken; use tracing::*; use utils::completion; +/// Start per tenant background loops: compaction and gc. +/// +/// `init_done` is an optional channel used during initial load to delay background task +/// start. It is not used later. pub fn start_background_loops(tenant: &Arc, init_done: Option<&completion::Barrier>) { let tenant_id = tenant.tenant_id; task_mgr::spawn( diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b45dcc4e42..3db78401f6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -57,6 +57,7 @@ use pageserver_api::reltag::RelTag; use postgres_connection::PgConnectionConfig; use postgres_ffi::to_pg_timestamp; use utils::{ + completion, id::{TenantId, TimelineId}, lsn::{AtomicLsn, Lsn, RecordLsn}, seqwait::SeqWait, @@ -928,10 +929,15 @@ impl Timeline { Ok(()) } - pub fn activate(self: &Arc, broker_client: BrokerClientChannel, ctx: &RequestContext) { + pub fn activate( + self: &Arc, + broker_client: BrokerClientChannel, + init_done: Option<&completion::Barrier>, + ctx: &RequestContext, + ) { self.launch_wal_receiver(ctx, broker_client); self.set_state(TimelineState::Active); - self.launch_eviction_task(); + self.launch_eviction_task(init_done); } pub fn set_state(&self, new_state: TimelineState) { diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 558600692e..7029d75d63 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -34,6 +34,8 @@ use crate::{ }, }; +use utils::completion; + use super::Timeline; #[derive(Default)] @@ -47,8 +49,9 @@ pub struct EvictionTaskTenantState { } impl Timeline { - pub(super) fn launch_eviction_task(self: &Arc) { + pub(super) fn launch_eviction_task(self: &Arc, init_done: Option<&completion::Barrier>) { let self_clone = Arc::clone(self); + let init_done = init_done.cloned(); task_mgr::spawn( BACKGROUND_RUNTIME.handle(), TaskKind::Eviction, @@ -57,6 +60,7 @@ impl Timeline { &format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id), false, async move { + completion::Barrier::maybe_wait(init_done).await; self_clone.eviction_task(task_mgr::shutdown_token()).await; info!("eviction task finishing"); Ok(())