mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
enforce standby_horizon leases are always above applied_gc_cutoff (check against cutoff on upsert + block gc for lease length to allow renewals after attach)
This commit is contained in:
@@ -565,6 +565,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'basebackup_cache_enabled' as bool")?,
|
||||
standby_horizon_lease_length: settings
|
||||
.remove("standby_horizon_lease_length")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'standby_horizon_lease_length' as duration")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
|
||||
@@ -638,13 +638,15 @@ impl PageServerConf {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Test authors tend to forget about the default 10min initial lease deadline
|
||||
// Test authors tend to forget about the default 10min initial lsn lease deadline
|
||||
// when writing tests, which turns their immediate gc requests via mgmt API
|
||||
// into no-ops. Override the binary default here, such that there is no initial
|
||||
// lease deadline by default in tests. Tests that care can always override it
|
||||
// lsn lease deadline by default in tests. Tests that care can always override it
|
||||
// themselves.
|
||||
// Cf https://databricks.atlassian.net/browse/LKB-92?focusedCommentId=6722329
|
||||
config_toml.tenant_config.lsn_lease_length = Duration::from_secs(0);
|
||||
// Same argument applies to the initial standby_horizon lease deadline.
|
||||
config_toml.tenant_config.standby_horizon_lease_length = Duration::from_secs(0);
|
||||
|
||||
PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
|
||||
}
|
||||
|
||||
@@ -179,6 +179,9 @@ pub(super) struct AttachedTenantConf {
|
||||
/// The deadline before which we are blocked from GC so that
|
||||
/// leases have a chance to be renewed.
|
||||
lsn_lease_deadline: Option<tokio::time::Instant>,
|
||||
/// The deadline before which we are blocked from GC so that
|
||||
/// standby horizon leases have a chance to be renewed.
|
||||
standby_horizon_lease_deadline: Option<tokio::time::Instant>,
|
||||
}
|
||||
|
||||
impl AttachedTenantConf {
|
||||
@@ -203,10 +206,27 @@ impl AttachedTenantConf {
|
||||
None
|
||||
};
|
||||
|
||||
// Sets a deadline before which we cannot proceed to GC due to standby horizon lease.
|
||||
//
|
||||
// Similar to LSN leases, standby horizon leases are not persisted to disk. By delaying GC by lease
|
||||
// length, we guarantee that all the standby horizon leases we granted before will have a chance to renew
|
||||
// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
|
||||
let standby_horizon_lease_deadline = if location.attach_mode == AttachmentMode::Single {
|
||||
Some(
|
||||
tokio::time::Instant::now()
|
||||
+ TenantShard::get_standby_horizon_lease_length_impl(conf, &tenant_conf),
|
||||
)
|
||||
} else {
|
||||
// We don't use `standby_horizon_lease_deadline` to delay GC in AttachedMulti and AttachedStale
|
||||
// because we don't do GC in these modes.
|
||||
None
|
||||
};
|
||||
|
||||
Self {
|
||||
tenant_conf,
|
||||
location,
|
||||
lsn_lease_deadline,
|
||||
standby_horizon_lease_deadline,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +251,12 @@ impl AttachedTenantConf {
|
||||
.map(|d| tokio::time::Instant::now() < d)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_gc_blocked_by_standby_horizon_lease_deadline(&self) -> bool {
|
||||
self.standby_horizon_lease_deadline
|
||||
.map(|d| tokio::time::Instant::now() < d)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
struct TimelinePreload {
|
||||
timeline_id: TimelineId,
|
||||
@@ -3111,7 +3137,7 @@ impl TenantShard {
|
||||
return Ok(GcResult::default());
|
||||
}
|
||||
|
||||
if todo!("block for standby horizon lease deadline") {
|
||||
if conf.is_gc_blocked_by_standby_horizon_lease_deadline() {
|
||||
info!("Skipping GC because standby horizon lease deadline is not reached");
|
||||
return Ok(GcResult::default());
|
||||
}
|
||||
@@ -4280,6 +4306,7 @@ impl TenantShard {
|
||||
tenant_conf: update(attached_conf.tenant_conf.clone())?,
|
||||
location: attached_conf.location,
|
||||
lsn_lease_deadline: attached_conf.lsn_lease_deadline,
|
||||
standby_horizon_lease_deadline: attached_conf.standby_horizon_lease_deadline,
|
||||
}))
|
||||
})?;
|
||||
|
||||
|
||||
@@ -1884,8 +1884,14 @@ impl Timeline {
|
||||
.checked_add(Duration::from_secs(5 * 60))
|
||||
.unwrap());
|
||||
}
|
||||
let applied_gc_cutoff_lsn =
|
||||
todo!("think about boundary conditions? we didn't have any before though");
|
||||
let applied_gc_cutoff_lsn_guard = self.get_applied_gc_cutoff_lsn();
|
||||
if lsn < *applied_gc_cutoff_lsn_guard {
|
||||
bail!(
|
||||
"tried to request a standby horizon lease for an lsn below the applied gc cutoff. requested at {} gc cutoff {}",
|
||||
lsn,
|
||||
*applied_gc_cutoff_lsn_guard
|
||||
);
|
||||
}
|
||||
let length = self.get_standby_horizon_lease_length();
|
||||
self.standby_horizons
|
||||
.upsert_lease(lease_id, lsn, length)
|
||||
|
||||
@@ -1322,6 +1322,8 @@ class NeonEnv:
|
||||
# themselves.
|
||||
# Cf https://databricks.atlassian.net/browse/LKB-92?focusedCommentId=6722329
|
||||
tenant_config["lsn_lease_length"] = "0s"
|
||||
# Same argument applies to the initial standby_horizon lease deadline.
|
||||
tenant_config["standby_horizon_lease_length"] = "0s"
|
||||
|
||||
if self.pageserver_remote_storage is not None:
|
||||
ps_cfg["remote_storage"] = remote_storage_to_toml_dict(
|
||||
|
||||
@@ -181,6 +181,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"image_layer_creation_check_threshold": 1,
|
||||
"lsn_lease_length": "1m",
|
||||
"lsn_lease_length_for_ts": "5s",
|
||||
"standby_horizon_lease_length": "5s",
|
||||
"timeline_offloading": False,
|
||||
"rel_size_v2_enabled": True,
|
||||
"relsize_snapshot_cache_capacity": 10000,
|
||||
|
||||
Reference in New Issue
Block a user