diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 85646406e2..bb4d805726 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -5,25 +5,19 @@ //! See also `settings.md` for better description on every parameter. use anyhow::{bail, ensure, Context, Result}; -use serde::{Deserialize, Serialize}; use toml_edit; use toml_edit::{Document, Item}; -use tracing::*; -use zenith_utils::bin_ser::BeSer; use zenith_utils::postgres_backend::AuthType; use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId}; use std::convert::TryInto; use std::env; -use std::fs::{File, OpenOptions}; -use std::io::Write; use std::num::{NonZeroU32, NonZeroUsize}; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::time::Duration; use crate::layered_repository::TIMELINES_SEGMENT_NAME; -use crate::virtual_file::VirtualFile; pub mod defaults { use const_format::formatcp; @@ -145,60 +139,6 @@ pub struct PageServerConf { pub remote_storage_config: Option, } -pub const TENANT_CONFIG_NAME: &str = "config"; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -pub struct TenantConf { - pub checkpoint_distance: u64, - pub compaction_target_size: u64, - pub compaction_period: Duration, - pub gc_horizon: u64, - pub gc_period: Duration, - pub pitr_interval: Duration, -} - -impl TenantConf { - pub fn from(conf: &PageServerConf) -> TenantConf { - TenantConf { - gc_period: conf.gc_period, - gc_horizon: conf.gc_horizon, - pitr_interval: conf.pitr_interval, - checkpoint_distance: conf.checkpoint_distance, - compaction_period: conf.compaction_period, - compaction_target_size: conf.compaction_target_size, - } - } - pub fn save(&self, conf: &'static PageServerConf, tenantid: ZTenantId) -> Result<()> { - let _enter = info_span!("saving tenant config").entered(); - let path = conf.tenant_path(&tenantid).join(TENANT_CONFIG_NAME); - let mut file = - VirtualFile::open_with_options(&path, OpenOptions::new().write(true).create_new(true))?; - let config_bytes = self.ser()?; - if file.write(&config_bytes)? != config_bytes.len() { - bail!("Could not write all the metadata bytes in a single call"); - } - file.sync_all()?; - let tenant_dir = File::open( - &path - .parent() - .expect("Tetant config should always have a parent dir"), - )?; - tenant_dir.sync_all()?; - Ok(()) - } - - pub fn load(conf: &'static PageServerConf, tenantid: ZTenantId) -> Result { - let _enter = info_span!("loading tenant config").entered(); - let path = conf.tenant_path(&tenantid).join(TENANT_CONFIG_NAME); - let content = std::fs::read(&path); - match content { - Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(Self::from(conf)), - Ok(config_bytes) => Ok(TenantConf::des(&config_bytes)?), - Err(err) => bail!(err), - } - } -} - // use dedicated enum for builder to better indicate the intention // and avoid possible confusion with nested options pub enum BuilderValue { diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 7196d659a0..fa8f405fed 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -23,8 +23,9 @@ use super::models::{ }; use crate::remote_storage::{schedule_timeline_download, RemoteIndex}; use crate::repository::Repository; +use crate::tenant_config::TenantConf; use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo}; -use crate::{config::PageServerConf, config::TenantConf, tenant_mgr, timelines, ZTenantId}; +use crate::{config::PageServerConf, tenant_mgr, timelines, ZTenantId}; struct State { conf: &'static PageServerConf, diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 5a0947d806..362f883350 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -32,8 +32,9 @@ use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, TryLockError}; use std::time::{Duration, Instant, SystemTime}; use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME}; -use crate::config::{PageServerConf, TenantConf}; +use crate::config::PageServerConf; use crate::keyspace::KeySpace; +use crate::tenant_config::{TenantConf, TenantConfFile}; use crate::page_cache; use crate::remote_storage::{schedule_timeline_checkpoint_upload, RemoteIndex}; @@ -566,6 +567,58 @@ impl LayeredRepository { } } + /// Save tenant's config to file + pub fn save_tenantconf( + conf: &'static PageServerConf, + tenantid: ZTenantId, + tenant_conf: TenantConf, + first_save: bool, + ) -> Result<()> { + let _enter = info_span!("saving tenantconf").entered(); + let path = TenantConf::tenantconf_path(conf, tenantid); + info!("save tenantconf to {}", path.display()); + + // use OpenOptions to ensure file presence is consistent with first_save + let mut file = VirtualFile::open_with_options( + &path, + OpenOptions::new().write(true).create_new(first_save), + )?; + + let data = TenantConfFile::from(tenant_conf); + let tenantconf_bytes = data.to_bytes().context("Failed to get tenantconf bytes")?; + + if file.write(&tenantconf_bytes)? != tenantconf_bytes.len() { + bail!("Could not write all the tenantconf bytes in a single call"); + } + file.sync_all()?; + + // fsync the parent directory to ensure the directory entry is durable + if first_save { + let tenant_dir = File::open( + &path + .parent() + .expect("Tenantconf should always have a parent dir"), + )?; + tenant_dir.sync_all()?; + } + + Ok(()) + } + + pub fn load_tenantconf( + conf: &'static PageServerConf, + tenantid: ZTenantId, + ) -> Result { + let path = TenantConf::tenantconf_path(conf, tenantid); + info!("loading tenantconf from {}", path.display()); + let tenantconf_bytes = std::fs::read(&path)?; + let tenant_conf_file = TenantConfFile::from_bytes(&tenantconf_bytes); + match tenant_conf_file { + Ok(tenant_conf) => return Ok(tenant_conf.body), + Err(err) => return Err(err), + }; + } + /// Save timeline metadata to file fn save_metadata( conf: &'static PageServerConf, @@ -2254,10 +2307,6 @@ pub mod tests { } let cutoff = tline.get_last_record_lsn(); - let parts = keyspace - .clone() - .to_keyspace() - .partition(TEST_FILE_SIZE as u64); tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO); tline.checkpoint(CheckpointConfig::Forced)?; diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 6dddef5f27..670edc5854 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -10,6 +10,7 @@ pub mod pgdatadir_mapping; pub mod reltag; pub mod remote_storage; pub mod repository; +pub mod tenant_config; pub mod tenant_mgr; pub mod tenant_threads; pub mod thread_mgr; diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 348b2b91e0..5db9f7e4e7 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -1,6 +1,6 @@ -use crate::config::TenantConf; use crate::layered_repository::metadata::TimelineMetadata; use crate::remote_storage::RemoteIndex; +use crate::tenant_config::TenantConf; use crate::walrecord::ZenithWalRecord; use crate::CheckpointConfig; use anyhow::{bail, Result}; @@ -511,6 +511,7 @@ pub mod repo_harness { let repo = LayeredRepository::new( self.conf, + // Use default TenantConf in tests TenantConf::from(self.conf), walredo_mgr, self.tenant_id, diff --git a/pageserver/src/tenant_config.rs b/pageserver/src/tenant_config.rs new file mode 100644 index 0000000000..9d9f6a05f4 --- /dev/null +++ b/pageserver/src/tenant_config.rs @@ -0,0 +1,185 @@ +//! Functions for handling per-tenant configuration options +//! +//! If tenant is created with --config option, +//! the tenant-specific config will be stored in tenant's directory. +//! Otherwise, global pageserver's config is used. +//! +//! If the tenant config file is corrupted, the tenant will be disabled. +//! We cannot use global or default config instead, because wrong settings +//! may lead to a data loss. +//! +use crate::config::PageServerConf; +use crate::STORAGE_FORMAT_VERSION; +use anyhow::ensure; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::time::Duration; +use zenith_utils::bin_ser::BeSer; +use zenith_utils::zid::ZTenantId; + +pub const TENANT_CONFIG_NAME: &str = "config"; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct TenantConf { + pub checkpoint_distance: u64, + pub compaction_target_size: u64, + pub compaction_period: Duration, + pub gc_horizon: u64, + pub gc_period: Duration, + pub pitr_interval: Duration, +} + +/// We assume that a write of up to TENANTCONF_MAX_SIZE bytes is atomic. +/// +/// This is the same assumption that PostgreSQL makes with the control file, +/// see PG_CONTROL_MAX_SAFE_SIZE +const TENANTCONF_MAX_SIZE: usize = 512; + +/// TenantConfFile is stored on disk in tenant's directory +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TenantConfFile { + hdr: TenantConfHeader, + pub body: TenantConf, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct TenantConfHeader { + checksum: u32, // CRC of serialized tenantconf body + size: u16, // size of serialized tenantconf + format_version: u16, // storage format version (used for compatibility checks) +} +const TENANTCONF_HDR_SIZE: usize = std::mem::size_of::(); + +impl TenantConfFile { + pub fn new( + checkpoint_distance: u64, + compaction_target_size: u64, + compaction_period: Duration, + gc_horizon: u64, + gc_period: Duration, + pitr_interval: Duration, + ) -> Self { + Self { + hdr: TenantConfHeader { + checksum: 0, + size: 0, + format_version: STORAGE_FORMAT_VERSION, + }, + body: TenantConf { + gc_period, + gc_horizon, + pitr_interval, + checkpoint_distance, + compaction_period, + compaction_target_size, + }, + } + } + + pub fn from(tconf: TenantConf) -> Self { + Self { + hdr: TenantConfHeader { + checksum: 0, + size: 0, + format_version: STORAGE_FORMAT_VERSION, + }, + body: TenantConf { + gc_period: tconf.gc_period, + gc_horizon: tconf.gc_horizon, + pitr_interval: tconf.pitr_interval, + checkpoint_distance: tconf.checkpoint_distance, + compaction_period: tconf.compaction_period, + compaction_target_size: tconf.compaction_target_size, + }, + } + } + + pub fn from_bytes(tenantconf_bytes: &[u8]) -> anyhow::Result { + ensure!( + tenantconf_bytes.len() == TENANTCONF_MAX_SIZE, + "tenantconf bytes size is wrong" + ); + let hdr = TenantConfHeader::des(&tenantconf_bytes[0..TENANTCONF_HDR_SIZE])?; + ensure!( + hdr.format_version == STORAGE_FORMAT_VERSION, + "format version mismatch" + ); + let tenantconf_size = hdr.size as usize; + ensure!( + tenantconf_size <= TENANTCONF_MAX_SIZE, + "corrupted tenantconf file" + ); + let calculated_checksum = + crc32c::crc32c(&tenantconf_bytes[TENANTCONF_HDR_SIZE..tenantconf_size]); + ensure!( + hdr.checksum == calculated_checksum, + "tenantconf checksum mismatch" + ); + let body = TenantConf::des(&tenantconf_bytes[TENANTCONF_HDR_SIZE..tenantconf_size])?; + + Ok(TenantConfFile { hdr, body }) + } + + pub fn to_bytes(&self) -> anyhow::Result> { + let body_bytes = self.body.ser()?; + let tenantconf_size = TENANTCONF_HDR_SIZE + body_bytes.len(); + let hdr = TenantConfHeader { + size: tenantconf_size as u16, + format_version: STORAGE_FORMAT_VERSION, + checksum: crc32c::crc32c(&body_bytes), + }; + let hdr_bytes = hdr.ser()?; + let mut tenantconf_bytes = vec![0u8; TENANTCONF_MAX_SIZE]; + tenantconf_bytes[0..TENANTCONF_HDR_SIZE].copy_from_slice(&hdr_bytes); + tenantconf_bytes[TENANTCONF_HDR_SIZE..tenantconf_size].copy_from_slice(&body_bytes); + Ok(tenantconf_bytes) + } +} + +impl TenantConf { + pub fn from(conf: &PageServerConf) -> TenantConf { + TenantConf { + gc_period: conf.gc_period, + gc_horizon: conf.gc_horizon, + pitr_interval: conf.pitr_interval, + checkpoint_distance: conf.checkpoint_distance, + compaction_period: conf.compaction_period, + compaction_target_size: conf.compaction_target_size, + } + } + + /// Points to a place in pageserver's local directory, + /// where certain tenant's tenantconf file should be located. + pub fn tenantconf_path(conf: &'static PageServerConf, tenantid: ZTenantId) -> PathBuf { + conf.tenant_path(&tenantid).join(TENANT_CONFIG_NAME) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenantconf_serializes_correctly() { + let original_tenantconf = TenantConfFile::new( + 111, + 111, + Duration::from_secs(111), + 222, + Duration::from_secs(111), + Duration::from_secs(60 * 60), + ); + + let tenantconf_bytes = original_tenantconf + .to_bytes() + .expect("Should serialize correct tenantconf to bytes"); + + let deserialized_tenantconf = TenantConfFile::from_bytes(&tenantconf_bytes) + .expect("Should deserialize its own bytes"); + + assert_eq!( + deserialized_tenantconf.body, original_tenantconf.body, + "Tenantconf that was serialized to bytes and deserialized back should not change" + ); + } +} diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index eae792d45b..9f729788ed 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -1,10 +1,11 @@ //! This module acts as a switchboard to access different repositories managed by this //! page server. -use crate::config::{PageServerConf, TenantConf}; +use crate::config::PageServerConf; use crate::layered_repository::LayeredRepository; use crate::remote_storage::RemoteIndex; use crate::repository::{Repository, TimelineSyncStatusUpdate}; +use crate::tenant_config::TenantConf; use crate::thread_mgr; use crate::thread_mgr::ThreadKind; use crate::timelines; @@ -63,7 +64,7 @@ fn access_tenants() -> MutexGuard<'static, HashMap> { TENANTS.lock().unwrap() } -// Sets up wal redo manager and repository for tenant. Reduces code duplocation. +// Sets up wal redo manager and repository for tenant. Reduces code duplication. // Used during pageserver startup, or when new tenant is attached to pageserver. pub fn load_local_repo( conf: &'static PageServerConf, @@ -76,13 +77,9 @@ pub fn load_local_repo( let walredo_mgr = PostgresRedoManager::new(conf, tenant_id); // Try to load config file - let tenant_conf = match TenantConf::load(conf, tenant_id) { - Ok(tenant_conf) => tenant_conf, - Err(e) => { - error!("Failed to load tenant state: {:?}", e); - TenantConf::from(conf) - } - }; + let tenant_conf = LayeredRepository::load_tenantconf(conf, tenant_id) + .with_context(|| format!("Failed to load tenant state for id {}", tenant_id)) + .unwrap(); // Set up an object repository, for actual data storage. let repo: Arc = Arc::new(LayeredRepository::new( diff --git a/pageserver/src/timelines.rs b/pageserver/src/timelines.rs index c6fab256fc..19d03495f8 100644 --- a/pageserver/src/timelines.rs +++ b/pageserver/src/timelines.rs @@ -20,10 +20,10 @@ use zenith_utils::{crashsafe_dir, logging}; use crate::{ config::PageServerConf, - config::TenantConf, layered_repository::metadata::TimelineMetadata, remote_storage::RemoteIndex, repository::{LocalTimelineState, Repository}, + tenant_config::TenantConf, DatadirTimeline, RepositoryImpl, }; use crate::{import_datadir, LOG_FILE_NAME}; @@ -211,6 +211,9 @@ pub fn create_repo( crashsafe_dir::create_dir(conf.timelines_path(&tenant_id))?; info!("created directory structure in {}", repo_dir.display()); + // Save tenant's config + LayeredRepository::save_tenantconf(conf, tenant_id, tenant_conf, true)?; + Ok(Arc::new(LayeredRepository::new( conf, tenant_conf,