merge_local_remote_metadata: bail out in case where local has metadata file but remote is newer than local

This commit is contained in:
Christian Schwarz
2022-11-25 14:54:34 -05:00
parent a8a6603e66
commit d36fd4f141

View File

@@ -382,22 +382,55 @@ impl Drop for TimelineUninitMark {
// i e delete local files which are missing on the remote. This will add extra work,
// wal for these layers needs to be reingested for example
//
// So the solution is to take remote metadata only when it has greater disk_consistent_lsn
// So the solution is to take remote metadata only when we're attaching.
pub fn merge_local_remote_metadata<'a>(
local: Option<&'a TimelineMetadata>,
remote: Option<&'a TimelineMetadata>,
) -> anyhow::Result<(&'a TimelineMetadata, bool)> {
match (local, remote) {
(None, None) => anyhow::bail!("we should have either local metadata or remote"),
(None, Some(remote)) => Ok((remote, false)),
(Some(local), None) => Ok((local, true)),
// happens if we crash during attach, before writing out the metadata file
(None, Some(remote)) => Ok((remote, false)),
// This is the regular case where we crash/exit before finishing queued uploads.
// Also, it happens if we crash during attach after writing the metadata file
// but before removing the attaching marker file.
(Some(local), Some(remote)) => {
// take metadata with highest disk_consistent_lsn.
// FIXME Do we need to merge something?
if local.disk_consistent_lsn() < remote.disk_consistent_lsn() {
Ok((remote, false))
} else {
Ok((local, true))
let consistent_lsn_cmp = local
.disk_consistent_lsn()
.cmp(&remote.disk_consistent_lsn());
let gc_cutoff_lsn_cmp = local
.latest_gc_cutoff_lsn()
.cmp(&remote.latest_gc_cutoff_lsn());
use std::cmp::Ordering::*;
match (consistent_lsn_cmp, gc_cutoff_lsn_cmp) {
// It wouldn't matter, but pick the local one so that we don't rewrite the metadata file.
(Equal, Equal) => Ok((local, true)),
// Local state is clearly ahead of the remote.
(Greater, Greater) => Ok((local, true)),
// We have local layer files that aren't on the remote, but GC horizon is on par.
(Greater, Equal) => Ok((local, true)),
// Local GC started running but we couldn't sync it to the remote.
(Equal, Greater) => Ok((local, true)),
// We always update the local value first, so something else must have
// updated the remote value, probably a different pageserver.
// The control plane is supposed to prevent this from happening.
// Bail out.
(Less, Less)
| (Less, Equal)
| (Equal, Less)
| (Less, Greater)
| (Greater, Less) => {
anyhow::bail!(
r#"remote metadata appears to be ahead of local metadata:
local:
{local:#?}
remote:
{remote:#?}
"#
);
}
}
}
}