Merge pull request #11090 from neondatabase/vlad/release-gate-previous-heatmap

Storage release 2025-03-05
This commit is contained in:
Vlad Lazar
2025-03-05 14:37:24 +00:00
committed by GitHub
4 changed files with 29 additions and 3 deletions

View File

@@ -123,6 +123,10 @@ pub struct ConfigToml {
pub enable_read_path_debugging: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub validate_wal_contiguity: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub load_previous_heatmap: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub generate_unarchival_heatmap: Option<bool>,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -523,6 +527,8 @@ impl Default for ConfigToml {
None
},
validate_wal_contiguity: None,
load_previous_heatmap: None,
generate_unarchival_heatmap: None,
}
}
}

View File

@@ -194,6 +194,13 @@ pub struct PageServerConf {
/// Interpreted protocol feature: if enabled, validate that the logical WAL received from
/// safekeepers does not have gaps.
pub validate_wal_contiguity: bool,
/// When set, the previously written to disk heatmap is loaded on tenant attach and used
/// to avoid clobbering the heatmap from new, cold, attached locations.
pub load_previous_heatmap: bool,
/// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.
pub generate_unarchival_heatmap: bool,
}
/// Token for authentication to safekeepers
@@ -358,6 +365,8 @@ impl PageServerConf {
get_vectored_concurrent_io,
enable_read_path_debugging,
validate_wal_contiguity,
load_previous_heatmap,
generate_unarchival_heatmap,
} = config_toml;
let mut conf = PageServerConf {
@@ -447,6 +456,8 @@ impl PageServerConf {
no_sync: no_sync.unwrap_or(false),
enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
load_previous_heatmap: load_previous_heatmap.unwrap_or(false),
generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(false),
};
// ------------------------------------------------------------
@@ -493,6 +504,8 @@ impl PageServerConf {
metric_collection_interval: Duration::from_secs(60),
synthetic_size_calculation_interval: Duration::from_secs(60),
background_task_maximum_delay: Duration::ZERO,
load_previous_heatmap: Some(true),
generate_unarchival_heatmap: Some(true),
..Default::default()
};
PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()

View File

@@ -1149,7 +1149,7 @@ impl Tenant {
// a previous heatmap which contains all visible layers in the layer map.
// This previous heatmap will be used whenever a fresh heatmap is generated
// for the timeline.
if matches!(cause, LoadTimelineCause::Unoffload) {
if self.conf.generate_unarchival_heatmap && matches!(cause, LoadTimelineCause::Unoffload) {
let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn()));
while let Some((tline, end_lsn)) = tline_ending_at {
let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await;
@@ -1578,6 +1578,10 @@ impl Tenant {
}
async fn read_on_disk_heatmap(&self) -> Option<(HeatMapTenant, std::time::Instant)> {
if !self.conf.load_previous_heatmap {
return None;
}
let on_disk_heatmap_path = self.conf.tenant_heatmap_path(&self.tenant_shard_id);
match tokio::fs::read_to_string(on_disk_heatmap_path).await {
Ok(heatmap) => match serde_json::from_str::<HeatMapTenant>(&heatmap) {

View File

@@ -1164,6 +1164,8 @@ class NeonEnv:
# Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
# the pageserver taking a long time to start up due to syncfs flushing other tests' data
"no_sync": True,
# Look for gaps in WAL received from safekeepeers
"validate_wal_contiguity": True,
}
# Batching (https://github.com/neondatabase/neon/issues/9377):
@@ -1176,11 +1178,12 @@ class NeonEnv:
if config.test_may_use_compatibility_snapshot_binaries:
log.info(
"Skipping WAL contiguity validation to avoid forward-compatibility related test failures"
"Skipping prev heatmap settings to avoid forward-compatibility related test failures"
)
else:
# Look for gaps in WAL received from safekeepeers
ps_cfg["validate_wal_contiguity"] = True
ps_cfg["load_previous_heatmap"] = True
ps_cfg["generate_unarchival_heatmap"] = True
get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
if get_vectored_concurrent_io is not None: