mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 02:12:56 +00:00
pageserver: fix eviction across generations (#5538)
## Problem
Bug was introduced by me in 83ae2bd82c
When eviction constructs a RemoteLayer to replace the layer it just
evicted, it is building a LayerFileMetadata using its _current_
generation, rather than the generation of the layer.
## Summary of changes
- Retrieve Generation from RemoteTimelineClient when evicting. This will
no longer be necessary when #4938 lands.
- Add a test for the scenario in question (this fails without the fix).
This commit is contained in:
@@ -411,6 +411,11 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
evictions_failed.file_sizes += file_size;
|
||||
evictions_failed.count += 1;
|
||||
}
|
||||
Some(Err(EvictionError::MetadataInconsistency(detail))) => {
|
||||
warn!(%layer, "failed to evict layer: {detail}");
|
||||
evictions_failed.file_sizes += file_size;
|
||||
evictions_failed.count += 1;
|
||||
}
|
||||
None => {
|
||||
assert!(cancel.is_cancelled());
|
||||
return;
|
||||
|
||||
@@ -1419,6 +1419,13 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_layer_metadata(
|
||||
&self,
|
||||
name: &LayerFileName,
|
||||
) -> anyhow::Result<Option<LayerFileMetadata>> {
|
||||
self.upload_queue.lock().unwrap().get_layer_metadata(name)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {
|
||||
|
||||
@@ -1294,7 +1294,23 @@ impl Timeline {
|
||||
Ok(delta) => Some(delta),
|
||||
};
|
||||
|
||||
let layer_metadata = LayerFileMetadata::new(layer_file_size, self.generation);
|
||||
// RemoteTimelineClient holds the metadata on layers' remote generations, so
|
||||
// query it to construct a RemoteLayer.
|
||||
let layer_metadata = self
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.expect("Eviction is not called without remote storage")
|
||||
.get_layer_metadata(&local_layer.filename())
|
||||
.map_err(EvictionError::LayerNotFound)?
|
||||
.ok_or_else(|| {
|
||||
EvictionError::LayerNotFound(anyhow::anyhow!("Layer not in remote metadata"))
|
||||
})?;
|
||||
if layer_metadata.file_size() != layer_file_size {
|
||||
return Err(EvictionError::MetadataInconsistency(format!(
|
||||
"Layer size {layer_file_size} doesn't match remote metadata file size {}",
|
||||
layer_metadata.file_size()
|
||||
)));
|
||||
}
|
||||
|
||||
let new_remote_layer = Arc::new(match local_layer.filename() {
|
||||
LayerFileName::Image(image_name) => RemoteLayer::new_img(
|
||||
@@ -1373,6 +1389,10 @@ pub(crate) enum EvictionError {
|
||||
/// different objects in memory.
|
||||
#[error("layer was no longer part of LayerMap")]
|
||||
LayerNotFound(#[source] anyhow::Error),
|
||||
|
||||
/// This should never happen
|
||||
#[error("Metadata inconsistency")]
|
||||
MetadataInconsistency(String),
|
||||
}
|
||||
|
||||
/// Number of times we will compute partition within a checkpoint distance.
|
||||
|
||||
@@ -285,6 +285,10 @@ impl Timeline {
|
||||
warn!(layer = %l, "failed to evict layer: {e}");
|
||||
stats.not_evictable += 1;
|
||||
}
|
||||
Some(Err(EvictionError::MetadataInconsistency(detail))) => {
|
||||
warn!(layer = %l, "failed to evict layer: {detail}");
|
||||
stats.not_evictable += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if stats.candidates == stats.not_evictable {
|
||||
|
||||
@@ -203,6 +203,18 @@ impl UploadQueue {
|
||||
UploadQueue::Stopped(stopped) => Ok(stopped),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_layer_metadata(
|
||||
&self,
|
||||
name: &LayerFileName,
|
||||
) -> anyhow::Result<Option<LayerFileMetadata>> {
|
||||
match self {
|
||||
UploadQueue::Stopped(_) | UploadQueue::Uninitialized => {
|
||||
anyhow::bail!("queue is in state {}", self.as_str())
|
||||
}
|
||||
UploadQueue::Initialized(inner) => Ok(inner.latest_files.get(name).cloned()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An in-progress upload or delete task.
|
||||
|
||||
@@ -104,6 +104,22 @@ def generate_uploads_and_deletions(
|
||||
assert gc_result["layers_removed"] > 0
|
||||
|
||||
|
||||
def read_all(
|
||||
env: NeonEnv, tenant_id: Optional[TenantId] = None, timeline_id: Optional[TimelineId] = None
|
||||
):
|
||||
if tenant_id is None:
|
||||
tenant_id = env.initial_tenant
|
||||
assert tenant_id is not None
|
||||
|
||||
if timeline_id is None:
|
||||
timeline_id = env.initial_timeline
|
||||
assert timeline_id is not None
|
||||
|
||||
env.pageserver.http_client()
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
endpoint.safe_psql("SELECT SUM(LENGTH(val)) FROM foo;")
|
||||
|
||||
|
||||
def get_metric_or_0(ps_http, metric: str) -> int:
|
||||
v = ps_http.get_metric_value(metric)
|
||||
return 0 if v is None else int(v)
|
||||
@@ -467,3 +483,48 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
assert get_deletion_queue_depth(ps_http) == 0
|
||||
assert get_deletion_queue_validated(ps_http) > 0
|
||||
assert get_deletion_queue_executed(ps_http) > 0
|
||||
|
||||
|
||||
def evict_all_layers(env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
initial_local_layers = sorted(
|
||||
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
|
||||
)
|
||||
client = env.pageserver.http_client()
|
||||
for layer in initial_local_layers:
|
||||
if "ephemeral" in layer.name:
|
||||
continue
|
||||
log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
|
||||
client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
|
||||
|
||||
|
||||
def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Eviction and on-demand downloads exercise a particular code path where RemoteLayer is constructed
|
||||
and must be constructed using the proper generation for the layer, which may not be the same generation
|
||||
that the tenant is running in.
|
||||
"""
|
||||
neon_env_builder.enable_generations = True
|
||||
neon_env_builder.enable_pageserver_remote_storage(
|
||||
RemoteStorageKind.MOCK_S3,
|
||||
)
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
env.pageserver.http_client()
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
generate_uploads_and_deletions(env)
|
||||
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
evict_all_layers(env, tenant_id, timeline_id)
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
|
||||
# This will cause the generation to increment
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
# Now we are running as generation 2, but must still correctly remember that the layers
|
||||
# we are evicting and downloading are from generation 1.
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
evict_all_layers(env, tenant_id, timeline_id)
|
||||
read_all(env, tenant_id, timeline_id)
|
||||
|
||||
Reference in New Issue
Block a user