diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index f01ab2bfe2..a911da2901 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -382,22 +382,55 @@ impl Drop for TimelineUninitMark { // i e delete local files which are missing on the remote. This will add extra work, // wal for these layers needs to be reingested for example // -// So the solution is to take remote metadata only when it has greater disk_consistent_lsn +// So the solution is to take remote metadata only when we're attaching. pub fn merge_local_remote_metadata<'a>( local: Option<&'a TimelineMetadata>, remote: Option<&'a TimelineMetadata>, ) -> anyhow::Result<(&'a TimelineMetadata, bool)> { match (local, remote) { (None, None) => anyhow::bail!("we should have either local metadata or remote"), - (None, Some(remote)) => Ok((remote, false)), (Some(local), None) => Ok((local, true)), + // happens if we crash during attach, before writing out the metadata file + (None, Some(remote)) => Ok((remote, false)), + // This is the regular case where we crash/exit before finishing queued uploads. + // Also, it happens if we crash during attach after writing the metadata file + // but before removing the attaching marker file. (Some(local), Some(remote)) => { - // take metadata with highest disk_consistent_lsn. - // FIXME Do we need to merge something? - if local.disk_consistent_lsn() < remote.disk_consistent_lsn() { - Ok((remote, false)) - } else { - Ok((local, true)) + let consistent_lsn_cmp = local + .disk_consistent_lsn() + .cmp(&remote.disk_consistent_lsn()); + let gc_cutoff_lsn_cmp = local + .latest_gc_cutoff_lsn() + .cmp(&remote.latest_gc_cutoff_lsn()); + use std::cmp::Ordering::*; + match (consistent_lsn_cmp, gc_cutoff_lsn_cmp) { + // It wouldn't matter, but pick the local one so that we don't rewrite the metadata file. + (Equal, Equal) => Ok((local, true)), + // Local state is clearly ahead of the remote. + (Greater, Greater) => Ok((local, true)), + // We have local layer files that aren't on the remote, but GC horizon is on par. + (Greater, Equal) => Ok((local, true)), + // Local GC started running but we couldn't sync it to the remote. + (Equal, Greater) => Ok((local, true)), + + // We always update the local value first, so something else must have + // updated the remote value, probably a different pageserver. + // The control plane is supposed to prevent this from happening. + // Bail out. + (Less, Less) + | (Less, Equal) + | (Equal, Less) + | (Less, Greater) + | (Greater, Less) => { + anyhow::bail!( + r#"remote metadata appears to be ahead of local metadata: +local: + {local:#?} +remote: + {remote:#?} +"# + ); + } } } }