diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6c67fb9cb6..4ff87f20f1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -137,7 +137,7 @@ use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; -use super::{config::TenantConf, upload_queue::NotInitialized}; +use super::{config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized}; use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf}; use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; use super::{ @@ -2919,14 +2919,22 @@ impl Timeline { let guard = self.layers.read().await; - let resident = guard.likely_resident_layers().map(|layer| { - let last_activity_ts = layer.latest_activity(); - - HeatMapLayer::new( - layer.layer_desc().layer_name(), - layer.metadata(), - last_activity_ts, - ) + let resident = guard.likely_resident_layers().filter_map(|layer| { + match layer.visibility() { + LayerVisibilityHint::Visible => { + // Layer is visible to one or more read LSNs: elegible for inclusion in layer map + let last_activity_ts = layer.latest_activity(); + Some(HeatMapLayer::new( + layer.layer_desc().layer_name(), + layer.metadata(), + last_activity_ts, + )) + } + LayerVisibilityHint::Covered => { + // Layer is resident but unlikely to be read: not elegible for inclusion in heatmap. + None + } + } }); let layers = resident.collect(); diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 61e2204b23..5be59d3749 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -61,6 +61,7 @@ class HistoricLayerInfo: remote: bool # None for image layers, true if pageserver thinks this is an L0 delta layer l0: Optional[bool] + visible: bool @classmethod def from_json(cls, d: Dict[str, Any]) -> HistoricLayerInfo: @@ -79,6 +80,7 @@ class HistoricLayerInfo: lsn_end=d.get("lsn_end"), remote=d["remote"], l0=l0_ness, + visible=d["access_stats"]["visible"], ) diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 53f69b5b26..4b0af24480 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -2,10 +2,11 @@ import json import os import random import time -from typing import Any, Dict, Optional +from pathlib import Path +from typing import Any, Dict, Optional, Union import pytest -from fixtures.common_types import TenantId, TimelineId +from fixtures.common_types import TenantId, TenantShardId, TimelineId from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver from fixtures.pageserver.common_types import parse_layer_file_name @@ -437,6 +438,35 @@ def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder): validate_heatmap(heatmap_second) +def list_elegible_layers( + pageserver, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId +) -> list[Path]: + """ + The subset of layer filenames that are elegible for secondary download: at time of writing this + is all resident layers which are also visible. + """ + candidates = pageserver.list_layers(tenant_id, timeline_id) + + layer_map = pageserver.http_client().layer_map_info(tenant_id, timeline_id) + + # Map of layer filenames to their visibility the "layer name" is not the same as the filename: add suffix to resolve one to the other + visible_map = dict( + (f"{layer.layer_file_name}-v1-00000001", layer.visible) + for layer in layer_map.historic_layers + ) + + def is_visible(layer_file_name): + try: + return visible_map[str(layer_file_name)] + except KeyError: + # Unexpected: tests should call this when pageservers are in a quiet state such that the layer map + # matches what's on disk. + log.warn(f"Lookup {layer_file_name} from {list(visible_map.keys())}") + raise + + return list(c for c in candidates if is_visible(c)) + + def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): """ Test the overall data flow in secondary mode: @@ -491,7 +521,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_secondary_download(tenant_id) - assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( + assert list_elegible_layers(ps_attached, tenant_id, timeline_id) == ps_secondary.list_layers( tenant_id, timeline_id ) @@ -509,9 +539,9 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_secondary_download(tenant_id) try: - assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( - tenant_id, timeline_id - ) + assert list_elegible_layers( + ps_attached, tenant_id, timeline_id + ) == ps_secondary.list_layers(tenant_id, timeline_id) except: # Do a full listing of the secondary location on errors, to help debug of # https://github.com/neondatabase/neon/issues/6966 @@ -532,8 +562,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): # ================================================================== try: log.info("Evicting a layer...") - layer_to_evict = ps_attached.list_layers(tenant_id, timeline_id)[0] - some_other_layer = ps_attached.list_layers(tenant_id, timeline_id)[1] + layer_to_evict = list_elegible_layers(ps_attached, tenant_id, timeline_id)[0] + some_other_layer = list_elegible_layers(ps_attached, tenant_id, timeline_id)[1] log.info(f"Victim layer: {layer_to_evict.name}") ps_attached.http_client().evict_layer( tenant_id, timeline_id, layer_name=layer_to_evict.name @@ -551,9 +581,9 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_secondary_download(tenant_id) assert layer_to_evict not in ps_attached.list_layers(tenant_id, timeline_id) - assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( - tenant_id, timeline_id - ) + assert list_elegible_layers( + ps_attached, tenant_id, timeline_id + ) == ps_secondary.list_layers(tenant_id, timeline_id) except: # On assertion failures, log some details to help with debugging heatmap = env.pageserver_remote_storage.heatmap_content(tenant_id)