mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
pageserver: assert that uploads don't modify indexed layers (#10228)
## Problem It's not legal to modify layers that are referenced by the current layer index. Assert this in the upload queue, as preparation for upload queue reordering. Touches #10096. ## Summary of changes Add a debug assertion that the upload queue does not modify layers referenced by the current index. I could be convinced that this should be a plain assertion, but will be conservative for now.
This commit is contained in:
@@ -1943,6 +1943,30 @@ impl RemoteTimelineClient {
|
||||
return;
|
||||
}
|
||||
|
||||
// Assert that we don't modify a layer that's referenced by the current index.
|
||||
if cfg!(debug_assertions) {
|
||||
let modified = match &task.op {
|
||||
UploadOp::UploadLayer(layer, layer_metadata, _) => {
|
||||
vec![(layer.layer_desc().layer_name(), layer_metadata)]
|
||||
}
|
||||
UploadOp::Delete(delete) => {
|
||||
delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()
|
||||
}
|
||||
// These don't modify layers.
|
||||
UploadOp::UploadMetadata { .. } => Vec::new(),
|
||||
UploadOp::Barrier(_) => Vec::new(),
|
||||
UploadOp::Shutdown => Vec::new(),
|
||||
};
|
||||
if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {
|
||||
for (ref name, metadata) in modified {
|
||||
debug_assert!(
|
||||
!queue.clean.0.references(name, metadata),
|
||||
"layer {name} modified while referenced by index",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
|
||||
if let Some(OpType::FlushDeletion) = mode {
|
||||
@@ -2509,6 +2533,21 @@ pub fn remote_layer_path(
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
|
||||
/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
|
||||
///
|
||||
/// TODO: there should be a variant of LayerName for the physical path that contains information
|
||||
/// about the shard and generation, such that this could be replaced by a simple comparison.
|
||||
pub fn is_same_remote_layer_path(
|
||||
aname: &LayerName,
|
||||
ameta: &LayerFileMetadata,
|
||||
bname: &LayerName,
|
||||
bmeta: &LayerFileMetadata,
|
||||
) -> bool {
|
||||
// NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.
|
||||
aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation
|
||||
}
|
||||
|
||||
pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
|
||||
RemotePath::from_string(&format!(
|
||||
"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
|
||||
|
||||
@@ -8,14 +8,14 @@ use std::collections::HashMap;
|
||||
use chrono::NaiveDateTime;
|
||||
use pageserver_api::models::AuxFilePolicy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use super::is_same_remote_layer_path;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
use crate::tenant::Generation;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// In-memory representation of an `index_part.json` file
|
||||
@@ -45,10 +45,8 @@ pub struct IndexPart {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
/// that latest version stores.
|
||||
/// Layer filenames and metadata. For an index persisted in remote storage, all layers must
|
||||
/// exist in remote storage.
|
||||
pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,
|
||||
|
||||
/// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
|
||||
@@ -143,6 +141,17 @@ impl IndexPart {
|
||||
pub(crate) fn example() -> Self {
|
||||
Self::empty(TimelineMetadata::example())
|
||||
}
|
||||
|
||||
/// Returns true if the index contains a reference to the given layer (i.e. file path).
|
||||
///
|
||||
/// TODO: there should be a variant of LayerName for the physical remote path that contains
|
||||
/// information about the shard and generation, to avoid passing in metadata.
|
||||
pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {
|
||||
let Some(index_metadata) = self.layer_metadata.get(name) else {
|
||||
return false;
|
||||
};
|
||||
is_same_remote_layer_path(name, metadata, name, index_metadata)
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
|
||||
Reference in New Issue
Block a user