pageserver: do not read redundant timeline_layers from IndexPart, so that we can remove it later (#4972)

## Problem

IndexPart contains two redundant lists of layer names: a set of the
names, and then a map of name to metadata.

We already required that all the layers in `timeline_layers` are also in
`layers_metadata`, in `initialize_with_current_remote_index_part`, so if
there were any index_part.json files in the field that relied on these
sets being different, they would already be broken.

## Summary of changes

`timeline_layers` is made private and no longer read at runtime. It is
still serialized, but not deserialized.

`disk_consistent_lsn` is also made private, as this field only exists
for convenience of humans reading the serialized JSON.

This prepares us to entirely remove `timeline_layers` in a future
release, once this change is fully deployed, and therefore no
pageservers are trying to read the field.
This commit is contained in:
John Spray
2023-08-21 12:29:36 +01:00
committed by GitHub
parent 130ccb4b67
commit b95addddd5
4 changed files with 27 additions and 30 deletions

View File

@@ -1578,7 +1578,11 @@ mod tests {
};
assert_file_list(
&index_part.timeline_layers,
&index_part
.layer_metadata
.keys()
.map(|f| f.to_owned())
.collect(),
&[
&layer_file_name_1.file_name(),
&layer_file_name_2.file_name(),

View File

@@ -62,10 +62,9 @@ pub struct IndexPart {
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_at: Option<NaiveDateTime>,
/// Layer names, which are stored on the remote storage.
///
/// Additional metadata can might exist in `layer_metadata`.
pub timeline_layers: HashSet<LayerFileName>,
/// Legacy field: equal to the keys of `layer_metadata`, only written out for forward compat
#[serde(default, skip_deserializing)]
timeline_layers: HashSet<LayerFileName>,
/// Per layer file name metadata, which can be present for a present or missing layer file.
///
@@ -74,9 +73,10 @@ pub struct IndexPart {
pub layer_metadata: HashMap<LayerFileName, IndexLayerMetadata>,
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
// It's duplicated here for convenience.
// It's duplicated for convenience when reading the serialized structure, but is
// private because internally we would read from metadata instead.
#[serde_as(as = "DisplayFromStr")]
pub disk_consistent_lsn: Lsn,
disk_consistent_lsn: Lsn,
metadata_bytes: Vec<u8>,
}
@@ -85,7 +85,11 @@ impl IndexPart {
/// used to understand later versions.
///
/// Version is currently informative only.
const LATEST_VERSION: usize = 2;
/// Version history
/// - 2: added `deleted_at`
/// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers
/// is always generated from the keys of `layer_metadata`)
const LATEST_VERSION: usize = 3;
pub const FILE_NAME: &'static str = "index_part.json";
pub fn new(
@@ -166,7 +170,7 @@ mod tests {
let expected = IndexPart {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 1,
timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap()]),
timeline_layers: HashSet::new(),
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
file_size: 25600000,
@@ -203,7 +207,7 @@ mod tests {
let expected = IndexPart {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 1,
timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap()]),
timeline_layers: HashSet::new(),
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
file_size: 25600000,
@@ -241,7 +245,7 @@ mod tests {
let expected = IndexPart {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 2,
timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap()]),
timeline_layers: HashSet::new(),
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
file_size: 25600000,

View File

@@ -1730,7 +1730,7 @@ impl Timeline {
let mut corrupted_local_layers = Vec::new();
let mut added_remote_layers = Vec::new();
for remote_layer_name in &index_part.timeline_layers {
for remote_layer_name in index_part.layer_metadata.keys() {
let local_layer = local_only_layers.remove(remote_layer_name);
let remote_layer_metadata = index_part
@@ -1890,7 +1890,7 @@ impl Timeline {
Some(index_part) => {
info!(
"initializing upload queue from remote index with {} layer files",
index_part.timeline_layers.len()
index_part.layer_metadata.len()
);
remote_client.init_upload_queue(index_part)?;
self.create_remote_layers(index_part, local_layers, disk_consistent_lsn)

View File

@@ -140,23 +140,12 @@ impl UploadQueue {
}
}
let mut files = HashMap::with_capacity(index_part.timeline_layers.len());
for layer_name in &index_part.timeline_layers {
match index_part
.layer_metadata
.get(layer_name)
.map(LayerFileMetadata::from)
{
Some(layer_metadata) => {
files.insert(layer_name.to_owned(), layer_metadata);
}
None => {
anyhow::bail!(
"No remote layer metadata found for layer {}",
layer_name.file_name()
);
}
}
let mut files = HashMap::with_capacity(index_part.layer_metadata.len());
for (layer_name, layer_metadata) in &index_part.layer_metadata {
files.insert(
layer_name.to_owned(),
LayerFileMetadata::from(layer_metadata),
);
}
let index_part_metadata = index_part.parse_metadata()?;