Suppress wal lag timeout warnings right after tenant attachment (#9232)

As seen in https://github.com/neondatabase/cloud/issues/17335, during
releases we can have ingest lags that are above the limits for warnings.
However, such lags are part of normal pageserver startup.

Therefore, calculate a certain cooldown timestamp until which we accept
lags up to a certain size. The heuristic is chosen to grow the later we get
to fully load the tenant, and we also add 60 seconds as a grace period
after that term.
This commit is contained in:
Arpad Müller
2024-10-03 03:33:09 +02:00
committed by GitHub
parent 1b176fe74a
commit 2d8f6d7906
3 changed files with 50 additions and 1 deletions

View File

@@ -97,6 +97,7 @@ use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
use crate::tenant::remote_timeline_client::INITDB_PATH;
use crate::tenant::storage_layer::DeltaLayer;
use crate::tenant::storage_layer::ImageLayer;
use crate::walingest::WalLagCooldown;
use crate::walredo;
use crate::InitializationOrder;
use std::collections::hash_map::Entry;
@@ -319,6 +320,9 @@ pub struct Tenant {
/// background warmup.
pub(crate) activate_now_sem: tokio::sync::Semaphore,
/// Time it took for the tenant to activate. Zero if not active yet.
attach_wal_lag_cooldown: Arc<std::sync::OnceLock<WalLagCooldown>>,
// Cancellation token fires when we have entered shutdown(). This is a parent of
// Timelines' cancellation token.
pub(crate) cancel: CancellationToken,
@@ -1000,11 +1004,15 @@ impl Tenant {
// Remote preload is complete.
drop(remote_load_completion);
// We will time the duration of the attach phase unless this is a creation (attach will do no work)
let attach_start = std::time::Instant::now();
let attached = {
let _attach_timer = Some(TENANT.attach.start_timer());
tenant_clone.attach(preload, &ctx).await
};
let attach_duration = attach_start.elapsed();
_ = tenant_clone.attach_wal_lag_cooldown.set(WalLagCooldown::new(attach_start, attach_duration));
match attached {
Ok(()) => {
@@ -2754,6 +2762,7 @@ impl Tenant {
pg_version,
state,
last_aux_file_policy,
self.attach_wal_lag_cooldown.clone(),
self.cancel.child_token(),
);
@@ -2860,6 +2869,7 @@ impl Tenant {
Some(Duration::from_secs(3600 * 24)),
)),
activate_now_sem: tokio::sync::Semaphore::new(0),
attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),
cancel: CancellationToken::default(),
gate: Gate::default(),
timeline_get_throttle: Arc::new(throttle::Throttle::new(

View File

@@ -48,7 +48,6 @@ use utils::{
sync::gate::{Gate, GateGuard},
};
use std::pin::pin;
use std::sync::atomic::Ordering as AtomicOrdering;
use std::sync::{Arc, Mutex, RwLock, Weak};
use std::time::{Duration, Instant, SystemTime};
@@ -62,6 +61,7 @@ use std::{
collections::btree_map::Entry,
ops::{Deref, Range},
};
use std::{pin::pin, sync::OnceLock};
use crate::{
aux_file::AuxFileSizeEstimator,
@@ -71,6 +71,7 @@ use crate::{
metadata::TimelineMetadata,
storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
},
walingest::WalLagCooldown,
walredo,
};
use crate::{
@@ -429,6 +430,8 @@ pub struct Timeline {
pub(crate) l0_flush_global_state: L0FlushGlobalState,
pub(crate) handles: handle::PerTimelineState<crate::page_service::TenantManagerTypes>,
pub(crate) attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
}
pub struct WalReceiverInfo {
@@ -2131,6 +2134,7 @@ impl Timeline {
pg_version: u32,
state: TimelineState,
aux_file_policy: Option<AuxFilePolicy>,
attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
cancel: CancellationToken,
) -> Arc<Self> {
let disk_consistent_lsn = metadata.disk_consistent_lsn();
@@ -2272,6 +2276,8 @@ impl Timeline {
l0_flush_global_state: resources.l0_flush_global_state,
handles: Default::default(),
attach_wal_lag_cooldown,
};
if aux_file_policy == Some(AuxFilePolicy::V1) {

View File

@@ -21,7 +21,10 @@
//! redo Postgres process, but some records it can handle directly with
//! bespoken Rust code.
use std::sync::Arc;
use std::sync::OnceLock;
use std::time::Duration;
use std::time::Instant;
use std::time::SystemTime;
use pageserver_api::shard::ShardIdentity;
@@ -69,7 +72,29 @@ impl CheckPoint {
}
}
/// Temporary limitation of WAL lag warnings after attach
///
/// After tenant attach, we want to limit WAL lag warnings because
/// we don't look at the WAL until the attach is complete, which
/// might take a while.
pub struct WalLagCooldown {
/// Until when should this limitation apply at all
active_until: std::time::Instant,
/// The maximum lag to suppress. Lags above this limit get reported anyways.
max_lag: Duration,
}
impl WalLagCooldown {
pub fn new(attach_start: Instant, attach_duration: Duration) -> Self {
Self {
active_until: attach_start + attach_duration * 3 + Duration::from_secs(120),
max_lag: attach_duration * 2 + Duration::from_secs(60),
}
}
}
pub struct WalIngest {
attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
shard: ShardIdentity,
checkpoint: CheckPoint,
checkpoint_modified: bool,
@@ -103,6 +128,7 @@ impl WalIngest {
shard: *timeline.get_shard_identity(),
checkpoint,
checkpoint_modified: false,
attach_wal_lag_cooldown: timeline.attach_wal_lag_cooldown.clone(),
warn_ingest_lag: WarnIngestLag {
lag_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),
future_lsn_msg_ratelimit: RateLimit::new(std::time::Duration::from_secs(10)),
@@ -1429,6 +1455,13 @@ impl WalIngest {
Ok(lag) => {
if lag > conf.wait_lsn_timeout {
rate_limits.lag_msg_ratelimit.call2(|rate_limit_stats| {
if let Some(cooldown) = self.attach_wal_lag_cooldown.get() {
if std::time::Instant::now() < cooldown.active_until && lag <= cooldown.max_lag {
return;
}
} else {
// Still loading? We shouldn't be here
}
let lag = humantime::format_duration(lag);
warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
})