pageserver: fix eviction across generations (#5538)

## Problem

Bug was introduced by me in 83ae2bd82c

When eviction constructs a RemoteLayer to replace the layer it just
evicted, it is building a LayerFileMetadata using its _current_
generation, rather than the generation of the layer.

## Summary of changes

- Retrieve Generation from RemoteTimelineClient when evicting. This will
no longer be necessary when #4938 lands.
- Add a test for the scenario in question (this fails without the fix).
This commit is contained in:
John Spray
2023-10-15 20:23:18 +01:00
committed by GitHub
parent 99c15907c1
commit 44b1c4c456
6 changed files with 110 additions and 1 deletions

View File

@@ -411,6 +411,11 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
evictions_failed.file_sizes += file_size;
evictions_failed.count += 1;
}
Some(Err(EvictionError::MetadataInconsistency(detail))) => {
warn!(%layer, "failed to evict layer: {detail}");
evictions_failed.file_sizes += file_size;
evictions_failed.count += 1;
}
None => {
assert!(cancel.is_cancelled());
return;

View File

@@ -1419,6 +1419,13 @@ impl RemoteTimelineClient {
}
}
}
pub(crate) fn get_layer_metadata(
&self,
name: &LayerFileName,
) -> anyhow::Result<Option<LayerFileMetadata>> {
self.upload_queue.lock().unwrap().get_layer_metadata(name)
}
}
pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {

View File

@@ -1294,7 +1294,23 @@ impl Timeline {
Ok(delta) => Some(delta),
};
let layer_metadata = LayerFileMetadata::new(layer_file_size, self.generation);
// RemoteTimelineClient holds the metadata on layers' remote generations, so
// query it to construct a RemoteLayer.
let layer_metadata = self
.remote_client
.as_ref()
.expect("Eviction is not called without remote storage")
.get_layer_metadata(&local_layer.filename())
.map_err(EvictionError::LayerNotFound)?
.ok_or_else(|| {
EvictionError::LayerNotFound(anyhow::anyhow!("Layer not in remote metadata"))
})?;
if layer_metadata.file_size() != layer_file_size {
return Err(EvictionError::MetadataInconsistency(format!(
"Layer size {layer_file_size} doesn't match remote metadata file size {}",
layer_metadata.file_size()
)));
}
let new_remote_layer = Arc::new(match local_layer.filename() {
LayerFileName::Image(image_name) => RemoteLayer::new_img(
@@ -1373,6 +1389,10 @@ pub(crate) enum EvictionError {
/// different objects in memory.
#[error("layer was no longer part of LayerMap")]
LayerNotFound(#[source] anyhow::Error),
/// This should never happen
#[error("Metadata inconsistency")]
MetadataInconsistency(String),
}
/// Number of times we will compute partition within a checkpoint distance.

View File

@@ -285,6 +285,10 @@ impl Timeline {
warn!(layer = %l, "failed to evict layer: {e}");
stats.not_evictable += 1;
}
Some(Err(EvictionError::MetadataInconsistency(detail))) => {
warn!(layer = %l, "failed to evict layer: {detail}");
stats.not_evictable += 1;
}
}
}
if stats.candidates == stats.not_evictable {

View File

@@ -203,6 +203,18 @@ impl UploadQueue {
UploadQueue::Stopped(stopped) => Ok(stopped),
}
}
pub(crate) fn get_layer_metadata(
&self,
name: &LayerFileName,
) -> anyhow::Result<Option<LayerFileMetadata>> {
match self {
UploadQueue::Stopped(_) | UploadQueue::Uninitialized => {
anyhow::bail!("queue is in state {}", self.as_str())
}
UploadQueue::Initialized(inner) => Ok(inner.latest_files.get(name).cloned()),
}
}
}
/// An in-progress upload or delete task.

View File

@@ -104,6 +104,22 @@ def generate_uploads_and_deletions(
assert gc_result["layers_removed"] > 0
def read_all(
env: NeonEnv, tenant_id: Optional[TenantId] = None, timeline_id: Optional[TimelineId] = None
):
if tenant_id is None:
tenant_id = env.initial_tenant
assert tenant_id is not None
if timeline_id is None:
timeline_id = env.initial_timeline
assert timeline_id is not None
env.pageserver.http_client()
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
endpoint.safe_psql("SELECT SUM(LENGTH(val)) FROM foo;")
def get_metric_or_0(ps_http, metric: str) -> int:
v = ps_http.get_metric_value(metric)
return 0 if v is None else int(v)
@@ -467,3 +483,48 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
assert get_deletion_queue_depth(ps_http) == 0
assert get_deletion_queue_validated(ps_http) > 0
assert get_deletion_queue_executed(ps_http) > 0
def evict_all_layers(env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
initial_local_layers = sorted(
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
)
client = env.pageserver.http_client()
for layer in initial_local_layers:
if "ephemeral" in layer.name:
continue
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
"""
Eviction and on-demand downloads exercise a particular code path where RemoteLayer is constructed
and must be constructed using the proper generation for the layer, which may not be the same generation
that the tenant is running in.
"""
neon_env_builder.enable_generations = True
neon_env_builder.enable_pageserver_remote_storage(
RemoteStorageKind.MOCK_S3,
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
env.pageserver.http_client()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
generate_uploads_and_deletions(env)
read_all(env, tenant_id, timeline_id)
evict_all_layers(env, tenant_id, timeline_id)
read_all(env, tenant_id, timeline_id)
# This will cause the generation to increment
env.pageserver.stop()
env.pageserver.start()
# Now we are running as generation 2, but must still correctly remember that the layers
# we are evicting and downloading are from generation 1.
read_all(env, tenant_id, timeline_id)
evict_all_layers(env, tenant_id, timeline_id)
read_all(env, tenant_id, timeline_id)