From fe21c7fe1993c8bd1ac430464ca99f9bc64b3a36 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 12 Feb 2024 21:41:23 +0000 Subject: [PATCH] pageserver: shut down WAL ingest if pathological storage amplification is detected --- pageserver/src/tenant.rs | 45 +++++++++++++++++++++++++++++-- pageserver/src/tenant/timeline.rs | 11 ++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index d946c57118..e688bff8cf 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1936,11 +1936,52 @@ impl Tenant { timelines_to_compact }; + let mut total_physical = 0; for (timeline_id, timeline) in &timelines_to_compact { - timeline + let timeline_result = timeline .compact(cancel, EnumSet::empty(), ctx) .instrument(info_span!("compact_timeline", %timeline_id)) - .await?; + .await; + + if let Some(remote_client) = &timeline.remote_client { + total_physical += remote_client.get_remote_physical_size(); + } + + timeline_result?; + } + + // Circuit breaker: if a timeline's statistics indicate a pathological storage issue, such + // as extremely high write inflation, then we will stop ingesting data for that timeline. This + // reduces the blast radius of postgres/walingest bugs that might enable one tenant to generate + // an extremely large storage size, and thereby interfere with other tenants on the same pageserver. + let synthetic_size = self.cached_synthetic_tenant_size.load(Ordering::Relaxed); + if synthetic_size > 0 { + let amplification = total_physical as f64 / synthetic_size as f64; + + // We only try to evaluate amplification once synthetic size reaches some threshold, to avoid + // noisy results on very small/new tenants. + const SIZE_THRESHOLD_FOR_AMPLIFICATION_CHECK: u64 = 1000000000; + + // Typical storage amplification is something like 3x-10x. 100x would be really extreme. + // 1000x is unthinkable: if we see an amplification this extreme, then something bad and + // dangerous is going on. + const PATHOLOGICAL_AMPLIFICATION_FACTOR: f64 = 1000.0; + + if synthetic_size > SIZE_THRESHOLD_FOR_AMPLIFICATION_CHECK + && amplification > PATHOLOGICAL_AMPLIFICATION_FACTOR + { + tracing::error!("Pathological storage amplification detected (synthetic size {synthetic_size}, physical size {total_physical}): shutting down ingest"); + for (timeline_id, timeline) in timelines_to_compact { + if tokio::time::timeout(Duration::from_secs(5), timeline.kill_wal_receiver()) + .await + .is_err() + { + tracing::error!( + "Timed out shutting down WAL intest on timeline {timeline_id}" + ); + } + } + } } Ok(()) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 625be7a644..c636c30c5f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1655,6 +1655,17 @@ impl Timeline { )); } + /// For terminating wal ingestion without tearing down the rest of the Timeline (i.e. reads to + /// already ingested data should still work) + pub(super) async fn kill_wal_receiver(&self) { + task_mgr::shutdown_tasks( + Some(TaskKind::WalReceiverManager), + Some(self.tenant_shard_id), + Some(self.timeline_id), + ) + .await; + } + /// Initialize with an empty layer map. Used when creating a new timeline. pub(super) fn init_empty_layer_map(&self, start_lsn: Lsn) { let mut layers = self.layers.try_write().expect(