diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index b8af4c3d11..b4d73baa88 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -411,6 +411,11 @@ pub async fn disk_usage_eviction_task_iteration_impl( evictions_failed.file_sizes += file_size; evictions_failed.count += 1; } + Some(Err(EvictionError::MetadataInconsistency(detail))) => { + warn!(%layer, "failed to evict layer: {detail}"); + evictions_failed.file_sizes += file_size; + evictions_failed.count += 1; + } None => { assert!(cancel.is_cancelled()); return; diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 622f9b1b9e..06e2292156 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1419,6 +1419,13 @@ impl RemoteTimelineClient { } } } + + pub(crate) fn get_layer_metadata( + &self, + name: &LayerFileName, + ) -> anyhow::Result> { + self.upload_queue.lock().unwrap().get_layer_metadata(name) + } } pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 85b33b98b9..537f776176 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1294,7 +1294,23 @@ impl Timeline { Ok(delta) => Some(delta), }; - let layer_metadata = LayerFileMetadata::new(layer_file_size, self.generation); + // RemoteTimelineClient holds the metadata on layers' remote generations, so + // query it to construct a RemoteLayer. + let layer_metadata = self + .remote_client + .as_ref() + .expect("Eviction is not called without remote storage") + .get_layer_metadata(&local_layer.filename()) + .map_err(EvictionError::LayerNotFound)? + .ok_or_else(|| { + EvictionError::LayerNotFound(anyhow::anyhow!("Layer not in remote metadata")) + })?; + if layer_metadata.file_size() != layer_file_size { + return Err(EvictionError::MetadataInconsistency(format!( + "Layer size {layer_file_size} doesn't match remote metadata file size {}", + layer_metadata.file_size() + ))); + } let new_remote_layer = Arc::new(match local_layer.filename() { LayerFileName::Image(image_name) => RemoteLayer::new_img( @@ -1373,6 +1389,10 @@ pub(crate) enum EvictionError { /// different objects in memory. #[error("layer was no longer part of LayerMap")] LayerNotFound(#[source] anyhow::Error), + + /// This should never happen + #[error("Metadata inconsistency")] + MetadataInconsistency(String), } /// Number of times we will compute partition within a checkpoint distance. diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 39f0d03a01..9bf31d85d4 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -285,6 +285,10 @@ impl Timeline { warn!(layer = %l, "failed to evict layer: {e}"); stats.not_evictable += 1; } + Some(Err(EvictionError::MetadataInconsistency(detail))) => { + warn!(layer = %l, "failed to evict layer: {detail}"); + stats.not_evictable += 1; + } } } if stats.candidates == stats.not_evictable { diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs index 8150e71c95..2db67d071a 100644 --- a/pageserver/src/tenant/upload_queue.rs +++ b/pageserver/src/tenant/upload_queue.rs @@ -203,6 +203,18 @@ impl UploadQueue { UploadQueue::Stopped(stopped) => Ok(stopped), } } + + pub(crate) fn get_layer_metadata( + &self, + name: &LayerFileName, + ) -> anyhow::Result> { + match self { + UploadQueue::Stopped(_) | UploadQueue::Uninitialized => { + anyhow::bail!("queue is in state {}", self.as_str()) + } + UploadQueue::Initialized(inner) => Ok(inner.latest_files.get(name).cloned()), + } + } } /// An in-progress upload or delete task. diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 1b9f48706f..6dc8582755 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -104,6 +104,22 @@ def generate_uploads_and_deletions( assert gc_result["layers_removed"] > 0 +def read_all( + env: NeonEnv, tenant_id: Optional[TenantId] = None, timeline_id: Optional[TimelineId] = None +): + if tenant_id is None: + tenant_id = env.initial_tenant + assert tenant_id is not None + + if timeline_id is None: + timeline_id = env.initial_timeline + assert timeline_id is not None + + env.pageserver.http_client() + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + endpoint.safe_psql("SELECT SUM(LENGTH(val)) FROM foo;") + + def get_metric_or_0(ps_http, metric: str) -> int: v = ps_http.get_metric_value(metric) return 0 if v is None else int(v) @@ -467,3 +483,48 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): assert get_deletion_queue_depth(ps_http) == 0 assert get_deletion_queue_validated(ps_http) > 0 assert get_deletion_queue_executed(ps_http) > 0 + + +def evict_all_layers(env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId): + timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id) + initial_local_layers = sorted( + list(filter(lambda path: path.name != "metadata", timeline_path.glob("*"))) + ) + client = env.pageserver.http_client() + for layer in initial_local_layers: + if "ephemeral" in layer.name: + continue + log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}") + client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name) + + +def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder): + """ + Eviction and on-demand downloads exercise a particular code path where RemoteLayer is constructed + and must be constructed using the proper generation for the layer, which may not be the same generation + that the tenant is running in. + """ + neon_env_builder.enable_generations = True + neon_env_builder.enable_pageserver_remote_storage( + RemoteStorageKind.MOCK_S3, + ) + env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) + env.pageserver.http_client() + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + generate_uploads_and_deletions(env) + + read_all(env, tenant_id, timeline_id) + evict_all_layers(env, tenant_id, timeline_id) + read_all(env, tenant_id, timeline_id) + + # This will cause the generation to increment + env.pageserver.stop() + env.pageserver.start() + + # Now we are running as generation 2, but must still correctly remember that the layers + # we are evicting and downloading are from generation 1. + read_all(env, tenant_id, timeline_id) + evict_all_layers(env, tenant_id, timeline_id) + read_all(env, tenant_id, timeline_id)