From 8f81ac064ef3785062fd2ba9ebc0c8563bbb094a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sun, 11 Jul 2021 10:59:58 +0300 Subject: [PATCH] Use the 'bookfile' crate for the snapshot files. --- Cargo.lock | 53 ++++++++++++++ pageserver/Cargo.toml | 1 + .../src/layered_repository/snapshot_layer.rs | 73 +++++++++++-------- 3 files changed, 98 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8b69e0a7a..7677e1273b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,6 +80,30 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "aversion" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25b49482974b90e9f36c5adcc50acde2e27e806ac269ff32758d700432782bc0" +dependencies = [ + "aversion-macros", + "byteorder", + "serde", + "serde_cbor", + "thiserror", +] + +[[package]] +name = "aversion-macros" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3009cf133dbd82459e96cf46bb24c8e6ad5c02c387ddb21d0f2c4c781a5394" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "aws-creds" version = "0.26.0" @@ -158,6 +182,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bookfile" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7db391acd99b8bdce5d5a66ca28530761affec9a407df91aee668fc318e3db71" +dependencies = [ + "aversion", + "byteorder", + "serde", + "thiserror", +] + [[package]] name = "boxfnonce" version = "0.1.1" @@ -638,6 +674,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + [[package]] name = "hashbrown" version = "0.9.1" @@ -1106,6 +1148,7 @@ name = "pageserver" version = "0.1.0" dependencies = [ "anyhow", + "bookfile", "byteorder", "bytes", "chrono", @@ -1652,6 +1695,16 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.126" diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index f83723cac9..298b396a90 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bookfile = "^0.2" chrono = "0.4.19" rand = "0.8.3" regex = "1.4.5" diff --git a/pageserver/src/layered_repository/snapshot_layer.rs b/pageserver/src/layered_repository/snapshot_layer.rs index 6a80da61b0..96325938dc 100644 --- a/pageserver/src/layered_repository/snapshot_layer.rs +++ b/pageserver/src/layered_repository/snapshot_layer.rs @@ -17,29 +17,25 @@ //! When a snapshot file needs to be accessed, we slurp the whole file into memory, into //! a SnapshotLayer struct. //! -//! On disk, a snapshot file is actually two files: one containing all the page versions, -//! and another containing the relation size information. That's just for the convenience -//! of serializing the two objects. -//! -//! The files are stored in .zenith/timelines/ directory. +//! On disk, the snapshot files are stored in .zenith/timelines/ directory. //! Currently, there are no subdirectories, and each snapshot file is named like this: //! //! _____ //! -//! And the corresponding file containing the relation size information has _relsizes -//! suffix. For example: +//! For example: //! //! 1663_13990_2609_0_000000000169C348_000000000169C349 -//! 1663_13990_2609_0_000000000169C348_000000000169C349_relsizes //! - +//! A snapshot file is constructed using the 'bookfile' crate. Each file consists of two +//! parts: the page versions and the relation sizes. They are stored as separate chapters. +//! use crate::layered_repository::storage_layer::Layer; use crate::layered_repository::storage_layer::PageVersion; use crate::repository::{RelTag, WALRecord}; use crate::walredo::WalRedoManager; use crate::PageServerConf; use crate::ZTimelineId; -use anyhow::{bail, Result}; +use anyhow::{anyhow, bail, Result}; use bytes::Bytes; use log::*; use std::collections::{BTreeMap, HashSet}; @@ -47,14 +43,22 @@ use std::fs; use std::fs::File; use std::io::Write; use std::ops::Bound::Included; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::sync::Mutex; +use bookfile::{Book, BookWriter}; + use zenith_utils::bin_ser::BeSer; use zenith_utils::lsn::Lsn; static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]); +// Magic constant to identify a Zenith snapshot file +static SNAPSHOT_FILE_MAGIC: u32 = 0x5A616E01; + +static PAGE_VERSIONS_CHAPTER: u64 = 1; +static REL_SIZES_CHAPTER: u64 = 2; + /// /// SnapshotLayer is the in-memory data structure associated with an on-disk snapshot file. /// It is also used to accumulate new changes at the tip of a branch; end_lsn is u64::MAX @@ -297,13 +301,6 @@ impl SnapshotLayer { conf.timeline_path(timelineid).join(&fname) } - fn relsizes_path(path: &Path) -> PathBuf { - let mut fname = path.file_name().unwrap().to_os_string(); - fname.push("_relsizes"); - - path.with_file_name(fname) - } - /// Create a new snapshot file, using the given btreemaps containing the page versions and /// relsizes. /// @@ -343,15 +340,22 @@ impl SnapshotLayer { // Note: This overwrites any existing file. There shouldn't be any. // FIXME: throw an error instead? - // Write out page versions - let mut file = File::create(&path)?; - let buf = BTreeMap::ser(&page_versions)?; - file.write_all(&buf)?; + let file = File::create(&path)?; + let book = BookWriter::new(file, SNAPSHOT_FILE_MAGIC)?; - // and relsizes to separate file - let mut file = File::create(Self::relsizes_path(&path))?; + // Write out page versions + let mut chapter = book.new_chapter(PAGE_VERSIONS_CHAPTER); + let buf = BTreeMap::ser(&page_versions)?; + chapter.write_all(&buf)?; + let book = chapter.close()?; + + // and relsizes to separate chapter + let mut chapter = book.new_chapter(REL_SIZES_CHAPTER); let buf = BTreeMap::ser(&relsizes)?; - file.write_all(&buf)?; + chapter.write_all(&buf)?; + let book = chapter.close()?; + + book.close()?; debug!("saved {}", &path.display()); @@ -421,12 +425,23 @@ impl SnapshotLayer { ) -> Result { let path = Self::path_for(conf, timelineid, tag, start_lsn, end_lsn); - let content = std::fs::read(&path)?; - let page_versions = BTreeMap::des(&content)?; + let file = File::open(&path)?; + let mut book = Book::new(file)?; + + let chapter_index = book + .find_chapter(PAGE_VERSIONS_CHAPTER) + .ok_or_else(|| anyhow!("could not find page versions chapter in {}", path.display()))?; + let chapter = book.read_chapter(chapter_index)?; + let page_versions = BTreeMap::des(&chapter)?; + + let chapter_index = book + .find_chapter(REL_SIZES_CHAPTER) + .ok_or_else(|| anyhow!("could not find relsizes chapter in {}", path.display()))?; + let chapter = book.read_chapter(chapter_index)?; + let relsizes = BTreeMap::des(&chapter)?; + debug!("loaded from {}", &path.display()); - let content = std::fs::read(Self::relsizes_path(&path))?; - let relsizes = BTreeMap::des(&content)?; Ok(SnapshotLayer { conf, timelineid,