diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs index 622738022a..cc93a06ccd 100644 --- a/pageserver/src/task_mgr.rs +++ b/pageserver/src/task_mgr.rs @@ -328,8 +328,8 @@ pub enum TaskKind { // Eviction. One per timeline. Eviction, - // Ingest housekeeping (flushing ephemeral layers on time threshold or disk pressure) - IngestHousekeeping, + // Tenant housekeeping (flush idle ephemeral layers, shut down idle walredo, etc.). + TenantHousekeeping, /// See [`crate::disk_usage_eviction_task`]. DiskUsageEviction, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3c6996dd51..d84cd4d278 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -20,6 +20,7 @@ use chrono::NaiveDateTime; use enumset::EnumSet; use futures::stream::FuturesUnordered; use futures::StreamExt; +use itertools::Itertools as _; use pageserver_api::models; use pageserver_api::models::CompactInfoResponse; use pageserver_api::models::LsnLease; @@ -3088,32 +3089,28 @@ impl Tenant { Ok(rx) } - // Call through to all timelines to freeze ephemeral layers if needed. Usually - // this happens during ingest: this background housekeeping is for freezing layers - // that are open but haven't been written to for some time. - async fn ingest_housekeeping(&self) { - // Scan through the hashmap and collect a list of all the timelines, - // while holding the lock. Then drop the lock and actually perform the - // compactions. We don't want to block everything else while the - // compaction runs. - let timelines = { - self.timelines - .lock() - .unwrap() - .values() - .filter_map(|timeline| { - if timeline.is_active() { - Some(timeline.clone()) - } else { - None - } - }) - .collect::>() - }; + /// Performs periodic housekeeping, via the tenant housekeeping background task. + async fn housekeeping(&self) { + // Call through to all timelines to freeze ephemeral layers as needed. This usually happens + // during ingest, but we don't want idle timelines to hold open layers for too long. + let timelines = self + .timelines + .lock() + .unwrap() + .values() + .filter(|tli| tli.is_active()) + .cloned() + .collect_vec(); - for timeline in &timelines { + for timeline in timelines { timeline.maybe_freeze_ephemeral_layer().await; } + + // Shut down walredo if idle. + const WALREDO_IDLE_TIMEOUT: Duration = Duration::from_secs(180); + if let Some(ref walredo_mgr) = self.walredo_mgr { + walredo_mgr.maybe_quiesce(WALREDO_IDLE_TIMEOUT); + } } pub fn timeline_has_no_attached_children(&self, timeline_id: TimelineId) -> bool { diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index a45eb002bd..1a6311dd9c 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -4,7 +4,6 @@ use std::cmp::max; use std::future::Future; use std::ops::{ControlFlow, RangeInclusive}; use std::pin::pin; -use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -75,7 +74,7 @@ pub(crate) enum BackgroundLoopKind { Compaction, Gc, Eviction, - IngestHouseKeeping, + TenantHouseKeeping, ConsumptionMetricsCollectMetrics, ConsumptionMetricsSyntheticSizeWorker, InitialLogicalSizeCalculation, @@ -186,10 +185,10 @@ pub fn start_background_loops(tenant: &Arc, can_start: Option<&Barrier>) task_mgr::spawn( BACKGROUND_RUNTIME.handle(), - TaskKind::IngestHousekeeping, + TaskKind::TenantHousekeeping, tenant_shard_id, None, - &format!("ingest housekeeping for tenant {tenant_shard_id}"), + &format!("housekeeping for tenant {tenant_shard_id}"), { let tenant = Arc::clone(tenant); let can_start = can_start.cloned(); @@ -201,8 +200,8 @@ pub fn start_background_loops(tenant: &Arc, can_start: Option<&Barrier>) }; TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc()); - ingest_housekeeping_loop(tenant, cancel) - .instrument(info_span!("ingest_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug())) + tenant_housekeeping_loop(tenant, cancel) + .instrument(info_span!("tenant_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug())) .await; Ok(()) } @@ -281,14 +280,6 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { ); }; - // Perhaps we did no work and the walredo process has been idle for some time: - // give it a chance to shut down to avoid leaving walredo process running indefinitely. - // TODO: move this to a separate task (housekeeping loop) that isn't affected by the back-off, - // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens. - if let Some(walredo_mgr) = &tenant.walredo_mgr { - walredo_mgr.maybe_quiesce(period * 10); - } - // Sleep if tokio::time::timeout(sleep_duration, cancel.cancelled()) .await @@ -431,42 +422,34 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { } } -/// Ingest housekeeping's main loop. -async fn ingest_housekeeping_loop(tenant: Arc, cancel: CancellationToken) { +/// Tenant housekeeping's main loop. +async fn tenant_housekeeping_loop(tenant: Arc, cancel: CancellationToken) { let mut last_throttle_flag_reset_at = Instant::now(); loop { if wait_for_active_tenant(&tenant, &cancel).await.is_break() { return; } - // We run ingest housekeeping with the same frequency as compaction: it is not worth - // having a distinct setting. But we don't run it in the same task, because compaction - // blocks on acquiring the background job semaphore. - let mut period = tenant.get_compaction_period(); + // Use the same period as compaction; it's not worth a separate setting. But if it's set to + // zero (to disable compaction), then use a reasonable default. Jitter it by 5%. + let period = match tenant.get_compaction_period() { + Duration::ZERO => humantime::parse_duration(DEFAULT_COMPACTION_PERIOD).unwrap(), + period => period, + }; - // If compaction period is set to zero (to disable it), then we will use a reasonable default - if period == Duration::ZERO { - period = humantime::Duration::from_str(DEFAULT_COMPACTION_PERIOD) - .unwrap() - .into() - } - - // Always sleep first: we do not need to do ingest housekeeping early in the lifetime of - // a tenant, since it won't have started writing any ephemeral files yet. Jitter the - // period by ±5%. let Ok(period) = sleep_jitter(period, period * 5 / 100, &cancel).await else { break; }; + // Do tenant housekeeping. let iteration = Iteration { started_at: Instant::now(), period, - kind: BackgroundLoopKind::IngestHouseKeeping, + kind: BackgroundLoopKind::TenantHouseKeeping, }; - iteration.run(tenant.ingest_housekeeping()).await; + iteration.run(tenant.housekeeping()).await; - // TODO: rename the background loop kind to something more generic, like, tenant housekeeping. - // Or just spawn another background loop for this throttle, it's not like it's super costly. + // Log any getpage throttling. info_span!(parent: None, "pagestream_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| { let now = Instant::now(); let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);