Merge pull request #11090 from neondatabase/vlad/release-gate-previous-heatmap

Storage release 2025-03-05
2026-01-13 16:32:56 +00:00 · 2025-03-05 14:37:24 +00:00
parent a1e67cfe86 c45d169527
commit 7430fb9836
4 changed files with 29 additions and 3 deletions
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -123,6 +123,10 @@ pub struct ConfigToml {
    pub enable_read_path_debugging: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub validate_wal_contiguity: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub load_previous_heatmap: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub generate_unarchival_heatmap: Option<bool>,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -523,6 +527,8 @@ impl Default for ConfigToml {
                None
            },
            validate_wal_contiguity: None,
+            load_previous_heatmap: None,
+            generate_unarchival_heatmap: None,
        }
    }
 }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -194,6 +194,13 @@ pub struct PageServerConf {
    /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
    /// safekeepers does not have gaps.
    pub validate_wal_contiguity: bool,
+
+    /// When set, the previously written to disk heatmap is loaded on tenant attach and used
+    /// to avoid clobbering the heatmap from new, cold, attached locations.
+    pub load_previous_heatmap: bool,
+
+    /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.
+    pub generate_unarchival_heatmap: bool,
 }

 /// Token for authentication to safekeepers
@@ -358,6 +365,8 @@ impl PageServerConf {
            get_vectored_concurrent_io,
            enable_read_path_debugging,
            validate_wal_contiguity,
+            load_previous_heatmap,
+            generate_unarchival_heatmap,
        } = config_toml;

        let mut conf = PageServerConf {
@@ -447,6 +456,8 @@ impl PageServerConf {
            no_sync: no_sync.unwrap_or(false),
            enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
            validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
+            load_previous_heatmap: load_previous_heatmap.unwrap_or(false),
+            generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(false),
        };

        // ------------------------------------------------------------
@@ -493,6 +504,8 @@ impl PageServerConf {
            metric_collection_interval: Duration::from_secs(60),
            synthetic_size_calculation_interval: Duration::from_secs(60),
            background_task_maximum_delay: Duration::ZERO,
+            load_previous_heatmap: Some(true),
+            generate_unarchival_heatmap: Some(true),
            ..Default::default()
        };
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1149,7 +1149,7 @@ impl Tenant {
        // a previous heatmap which contains all visible layers in the layer map.
        // This previous heatmap will be used whenever a fresh heatmap is generated
        // for the timeline.
-        if matches!(cause, LoadTimelineCause::Unoffload) {
+        if self.conf.generate_unarchival_heatmap && matches!(cause, LoadTimelineCause::Unoffload) {
            let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn()));
            while let Some((tline, end_lsn)) = tline_ending_at {
                let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await;
@@ -1578,6 +1578,10 @@ impl Tenant {
    }

    async fn read_on_disk_heatmap(&self) -> Option<(HeatMapTenant, std::time::Instant)> {
+        if !self.conf.load_previous_heatmap {
+            return None;
+        }
+
        let on_disk_heatmap_path = self.conf.tenant_heatmap_path(&self.tenant_shard_id);
        match tokio::fs::read_to_string(on_disk_heatmap_path).await {
            Ok(heatmap) => match serde_json::from_str::<HeatMapTenant>(&heatmap) {
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1164,6 +1164,8 @@ class NeonEnv:
                # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
                # the pageserver taking a long time to start up due to syncfs flushing other tests' data
                "no_sync": True,
+                # Look for gaps in WAL received from safekeepeers
+                "validate_wal_contiguity": True,
            }

            # Batching (https://github.com/neondatabase/neon/issues/9377):
@@ -1176,11 +1178,12 @@ class NeonEnv:

            if config.test_may_use_compatibility_snapshot_binaries:
                log.info(
-                    "Skipping WAL contiguity validation to avoid forward-compatibility related test failures"
+                    "Skipping prev heatmap settings to avoid forward-compatibility related test failures"
                )
            else:
                # Look for gaps in WAL received from safekeepeers
-                ps_cfg["validate_wal_contiguity"] = True
+                ps_cfg["load_previous_heatmap"] = True
+                ps_cfg["generate_unarchival_heatmap"] = True

            get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
            if get_vectored_concurrent_io is not None: