diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 1a7c0ad891..8b5e759eed 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -31,6 +31,7 @@ pub struct Basebackup<'a> { ar: Builder<&'a mut dyn Write>, timeline: &'a Arc, lsn: Lsn, + prev_record_lsn: Lsn, snappath: String, slru_buf: [u8; pg_constants::SLRU_SEG_SIZE], slru_segno: u32, @@ -43,12 +44,14 @@ impl<'a> Basebackup<'a> { timelineid: ZTimelineId, timeline: &'a Arc, lsn: Lsn, + prev_record_lsn: Lsn, snapshot_lsn: Lsn, ) -> Basebackup<'a> { Basebackup { ar: Builder::new(write), timeline, lsn, + prev_record_lsn, snappath: format!("timelines/{}/snapshots/{:016X}", timelineid, snapshot_lsn.0), slru_path: "", slru_segno: u32::MAX, @@ -239,8 +242,10 @@ impl<'a> Basebackup<'a> { pg_control.state = pg_constants::DB_SHUTDOWNED; // add zenith.signal file - self.ar - .append(&new_tar_header("zenith.signal", 0)?, &b""[..])?; + self.ar.append( + &new_tar_header("zenith.signal", 8)?, + &self.prev_record_lsn.0.to_le_bytes()[..], + )?; //send pg_control let pg_control_bytes = pg_control.encode(); diff --git a/pageserver/src/object_repository.rs b/pageserver/src/object_repository.rs index 3eeff9788e..15549b7444 100644 --- a/pageserver/src/object_repository.rs +++ b/pageserver/src/object_repository.rs @@ -110,6 +110,7 @@ impl Repository for ObjectRepository { let metadata = MetadataEntry { last_valid_lsn: start_lsn, last_record_lsn: start_lsn, + prev_record_lsn: Lsn(0), ancestor_timeline: None, ancestor_lsn: start_lsn, }; @@ -146,6 +147,7 @@ impl Repository for ObjectRepository { let metadata = MetadataEntry { last_valid_lsn: at_lsn, last_record_lsn: at_lsn, + prev_record_lsn: src_timeline.get_prev_record_lsn(), ancestor_timeline: Some(src), ancestor_lsn: at_lsn, }; @@ -216,6 +218,7 @@ pub struct ObjectTimeline { // last_valid_lsn: SeqWait, last_record_lsn: AtomicLsn, + prev_record_lsn: AtomicLsn, ancestor_timeline: Option, ancestor_lsn: Lsn, @@ -244,6 +247,7 @@ impl ObjectTimeline { walredo_mgr, last_valid_lsn: SeqWait::new(metadata.last_valid_lsn), last_record_lsn: AtomicLsn::new(metadata.last_record_lsn.0), + prev_record_lsn: AtomicLsn::new(metadata.prev_record_lsn.0), ancestor_timeline: metadata.ancestor_timeline, ancestor_lsn: metadata.ancestor_lsn, rel_meta: RwLock::new(BTreeMap::new()), @@ -469,7 +473,13 @@ impl Timeline for ObjectTimeline { /// /// Memorize a full image of a page version /// - fn put_page_image(&self, tag: ObjectTag, lsn: Lsn, img: Bytes, update_meta: bool) -> Result<()> { + fn put_page_image( + &self, + tag: ObjectTag, + lsn: Lsn, + img: Bytes, + update_meta: bool, + ) -> Result<()> { self.put_page_entry(&tag, lsn, PageEntry::Page(img))?; debug!("put_page_image rel {:?} at {}", tag, lsn); @@ -552,6 +562,7 @@ impl Timeline for ObjectTimeline { assert!(old == Lsn(0)); let old = self.last_record_lsn.fetch_max(lsn); assert!(old == Lsn(0)); + self.prev_record_lsn.store(Lsn(0)); } /// Like `advance_last_valid_lsn`, but this always points to the end of @@ -566,6 +577,8 @@ impl Timeline for ObjectTimeline { let old = self.last_record_lsn.fetch_max(lsn); assert!(old <= lsn); + self.prev_record_lsn.fetch_max(old); + // Also advance last_valid_lsn let old = self.last_valid_lsn.advance(lsn); // Can't move backwards. @@ -580,6 +593,10 @@ impl Timeline for ObjectTimeline { self.last_record_lsn.load() } + fn get_prev_record_lsn(&self) -> Lsn { + self.prev_record_lsn.load() + } + /// /// Flush to disk all data that was written with the put_* functions /// @@ -593,6 +610,7 @@ impl Timeline for ObjectTimeline { let metadata = MetadataEntry { last_valid_lsn: self.last_valid_lsn.load(), last_record_lsn: self.last_record_lsn.load(), + prev_record_lsn: self.prev_record_lsn.load(), ancestor_timeline: self.ancestor_timeline, ancestor_lsn: self.ancestor_lsn, }; @@ -1120,6 +1138,7 @@ const fn relation_size_key(timelineid: ZTimelineId, rel: RelTag) -> ObjectKey { pub struct MetadataEntry { last_valid_lsn: Lsn, last_record_lsn: Lsn, + prev_record_lsn: Lsn, ancestor_timeline: Option, ancestor_lsn: Lsn, } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index dd2cc3d1e1..dcea1c3de8 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -303,6 +303,7 @@ impl PageServerHandler { timelineid, &timeline, req_lsn, + timeline.get_prev_record_lsn(), snapshot_lsn, ); basebackup.send_tarball()?; diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 6bc212d85f..ddcaabfa97 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -89,7 +89,8 @@ pub trait Timeline: Send + Sync { fn put_raw_data(&self, tag: ObjectTag, lsn: Lsn, data: &[u8]) -> Result<()>; /// Like put_wal_record, but with ready-made image of the page. - fn put_page_image(&self, tag: ObjectTag, lsn: Lsn, img: Bytes, update_meta: bool) -> Result<()>; + fn put_page_image(&self, tag: ObjectTag, lsn: Lsn, img: Bytes, update_meta: bool) + -> Result<()>; /// Truncate relation fn put_truncation(&self, rel: RelTag, lsn: Lsn, nblocks: u32) -> Result<()>; @@ -120,6 +121,9 @@ pub trait Timeline: Send + Sync { fn advance_last_record_lsn(&self, lsn: Lsn); fn get_last_record_lsn(&self) -> Lsn; + // Like `advance_last_record_lsn`, but points to the start position of last record + fn get_prev_record_lsn(&self) -> Lsn; + /// /// Flush to disk all data that was written with the put_* functions /// diff --git a/pageserver/src/restore_local_repo.rs b/pageserver/src/restore_local_repo.rs index fafc13f121..c3cdf273e2 100644 --- a/pageserver/src/restore_local_repo.rs +++ b/pageserver/src/restore_local_repo.rs @@ -256,7 +256,12 @@ fn import_slru_file( let r = file.read_exact(&mut buf); match r { Ok(_) => { - timeline.put_page_image(gen_tag(blknum), lsn, Bytes::copy_from_slice(&buf), false)?; + timeline.put_page_image( + gen_tag(blknum), + lsn, + Bytes::copy_from_slice(&buf), + false, + )?; } // TODO: UnexpectedEof is expected diff --git a/postgres_ffi/src/xlog_utils.rs b/postgres_ffi/src/xlog_utils.rs index d906ea625f..272815d153 100644 --- a/postgres_ffi/src/xlog_utils.rs +++ b/postgres_ffi/src/xlog_utils.rs @@ -96,8 +96,8 @@ pub fn get_current_timestamp() -> TimestampTz { match SystemTime::now().duration_since(SystemTime::UNIX_EPOCH) { Ok(n) => { ((n.as_secs() - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY)) - * USECS_PER_SEC - + n.subsec_micros() as u64) as i64 + * USECS_PER_SEC + + n.subsec_micros() as u64) as i64 } Err(_) => panic!("SystemTime before UNIX EPOCH!"), }