From 8e7f336540102e1c1d9ac2e2b8a42fe208ee9bfd Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 11 Sep 2024 11:12:00 +0300 Subject: [PATCH] Cache initdb output to speed up tenant creation in tests initdb takes about 1 s. Our tests create and destroy a lot of tenants, so that adds up. Cache the initdb result to speed it up. This is currently only enabled in tests. Out of caution, mostly. But also because when you reuse the initdb result, all the postgres clusters end up having the same system_identifier, which is supposed to be unique. It's not necessary for it to be unique for correctness, nothing critical relies on it and you can easily end up with duplicate system_identifiers in standalone PostgreSQL too, if you e.g. create a backup and restore it on a different system. But it is used in various checks to reduce the chance that you e.g. accidentally apply WAL belonging to a different cluster. Because this is aimed at tests, there are a few things that might be surprising: - The initdb cache directory is configurable, and can be outside the pageserver's repo directory. This allows reuse across different pageservers running on the same host. In production use, that'd be pointless, but our tests create a lot of pageservers. - The cache is not automatically purged at start / shutdown. For production use, we'd probably want that, so that we'd pick up any changes in what an empty cluster looks like after a Postgres minor version upgrade, for example. But again tests create and destroy a lot of pageservers, so it's important to retain the cache. - The locking on the cache directory relies purely on filesystem operations and atomic rename(). Using e.g. a rust Mutex() would be more straightforward, but that's not enough because the cache needs to be shared between different pageservers running on the same system. --- libs/pageserver_api/src/config.rs | 2 + pageserver/src/config.rs | 4 + pageserver/src/tenant.rs | 114 +++++++++++++++++++++++++- test_runner/fixtures/neon_fixtures.py | 22 +++++ 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 1194ee93ef..321048a22e 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -64,6 +64,7 @@ pub struct ConfigToml { #[serde(with = "humantime_serde")] pub wal_redo_timeout: Duration, pub superuser: String, + pub initdb_cache_dir: Option, pub page_cache_size: usize, pub max_file_descriptors: usize, pub pg_distrib_dir: Option, @@ -358,6 +359,7 @@ impl Default for ConfigToml { wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT) .expect("cannot parse default wal redo timeout")), superuser: (DEFAULT_SUPERUSER.to_string()), + initdb_cache_dir: None, page_cache_size: (DEFAULT_PAGE_CACHE_SIZE), max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS), pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir() diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 29a98855d3..14fa8e4895 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -71,6 +71,8 @@ pub struct PageServerConf { pub superuser: String, + pub initdb_cache_dir: Option, + pub page_cache_size: usize, pub max_file_descriptors: usize, @@ -309,6 +311,7 @@ impl PageServerConf { wait_lsn_timeout, wal_redo_timeout, superuser, + initdb_cache_dir, page_cache_size, max_file_descriptors, pg_distrib_dir, @@ -356,6 +359,7 @@ impl PageServerConf { wait_lsn_timeout, wal_redo_timeout, superuser, + initdb_cache_dir, page_cache_size, max_file_descriptors, http_auth_type, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c6f0e48101..1b7e8facfb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3491,7 +3491,7 @@ impl Tenant { .context("extract initdb tar")?; } else { // Init temporarily repo to get bootstrap data, this creates a directory in the `pgdata_path` path - run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?; + run_initdb_with_cache(self.conf, &pgdata_path, pg_version, &self.cancel).await?; // Upload the created data dir to S3 if self.tenant_shard_id().is_shard_zero() { @@ -3837,6 +3837,118 @@ impl Tenant { } } +fn cached_initdb_dirname(initial_superuser_name: &str, pg_version: u32) -> String +{ + use std::hash::Hash; + use std::hash::Hasher; + use std::collections::hash_map::DefaultHasher; + let mut hasher = DefaultHasher::new(); + initial_superuser_name.hash(&mut hasher); + let hash = hasher.finish(); + + format!("cached_initial_pgdata_{pg_version}_{:016}", hash) +} + +fn copy_dir_all(src: impl AsRef, dst: impl AsRef) -> std::io::Result<()> { + for entry in fs::read_dir(src.as_ref())? { + let entry = entry?; + let subsrc = entry.path(); + let subdst = dst.as_ref().join(&entry.file_name()); + + if entry.file_type()?.is_dir() { + std::fs::create_dir(&subdst)?; + copy_dir_all(&subsrc, &subdst)?; + } else { + std::fs::copy(&subsrc, &subdst)?; + } + } + Ok(()) +} + +fn restore_cached_initdb_dir( + cached_path: &Utf8Path, + target_path: &Utf8Path, +) -> anyhow::Result { + if !cached_path.exists() { + info!("cached initdb dir \"{cached_path}\" does not exist yet"); + return Ok(false); + } + + std::fs::create_dir(target_path)?; + copy_dir_all(cached_path, target_path)?; + info!("restored initdb result from cache dir \"{cached_path}\""); + Ok(true) +} + +fn save_cached_initdb_dir( + src_path: &Utf8Path, + cache_path: &Utf8Path, +) -> anyhow::Result<()> { + match std::fs::create_dir(cache_path) { + Ok(()) => { + info!("saving initdb result to cache dir \"{cache_path}\""); + }, + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + info!("cache initdb dir \"{cache_path}\" already exists, not saving"); + return Ok(()) + }, + Err(err) => { return Err(anyhow::Error::from(err))}, + }; + let cache_dir_guard = scopeguard::guard(cache_path, |cp| { + if let Err(err) = std::fs::remove_dir_all(&cp) { + error!("could not remove cached initdb directory {cp}: {err}"); + } + }); + + let cache_parent_path = cache_path.parent().ok_or(anyhow::Error::msg("no cache parent path"))?; + + let tmp_dirpath = camino_tempfile::tempdir_in(cache_parent_path)?; + copy_dir_all(src_path, &tmp_dirpath)?; + std::fs::rename(tmp_dirpath, &*cache_dir_guard)?; + + // disarm the guard + scopeguard::ScopeGuard::into_inner(cache_dir_guard); + + Ok(()) +} + +async fn run_initdb_with_cache( + conf: &'static PageServerConf, + initdb_target_dir: &Utf8Path, + pg_version: u32, + cancel: &CancellationToken, +) -> Result<(), InitdbError> { + + let cache_dir = conf.initdb_cache_dir.as_ref().map(|initdb_cache_dir| { + initdb_cache_dir.join(cached_initdb_dirname(&conf.superuser, pg_version)) + }); + + if let Some(cache_dir) = &cache_dir { + match restore_cached_initdb_dir(&cache_dir, initdb_target_dir) { + Ok(true) => return Ok(()), + Ok(false) => {}, + Err(err) => { + warn!("Error restoring from cached initdb directory \"{cache_dir}\": {err}"); + if initdb_target_dir.exists() { + if let Err(err) = std::fs::remove_dir_all(&initdb_target_dir) { + error!("could not remove temporary initdb target directory {initdb_target_dir}: {err}"); + } + } + }, + } + } + + run_initdb(conf, initdb_target_dir, pg_version, cancel).await?; + + if let Some(cache_dir) = &cache_dir { + if let Err(err) = save_cached_initdb_dir(initdb_target_dir, &cache_dir) { + warn!("error saving initdb result to cache directory \"{cache_dir}\": {err}"); + } + } + + Ok(()) +} + /// Create the cluster temporarily in 'initdbpath' directory inside the repository /// to get bootstrap data for timeline initialization. async fn run_initdb( diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 60887b9aed..b289f28edb 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -181,6 +181,17 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]: log.info(f"top_output_dir is {output_dir}") yield output_dir +@pytest.fixture(scope="session", autouse=True) +def shared_initdb_cache_dir(top_output_dir: Path) -> Iterator[Path]: + log.info("Creating shared initdb cache directory") + + cache_dir = top_output_dir / "shared_initdb_cache" + + shutil.rmtree(cache_dir, ignore_errors=True) + cache_dir.mkdir(exist_ok=True) + + yield cache_dir + @pytest.fixture(scope="function") def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]: @@ -484,6 +495,7 @@ class NeonEnvBuilder: safekeeper_extra_opts: Optional[list[str]] = None, storage_controller_port_override: Optional[int] = None, pageserver_io_buffer_alignment: Optional[int] = None, + shared_initdb_cache_dir: Optional[Path] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -516,6 +528,7 @@ class NeonEnvBuilder: self.enable_scrub_on_exit = True self.test_output_dir = test_output_dir self.test_overlay_dir = test_overlay_dir + self.shared_initdb_cache_dir = shared_initdb_cache_dir self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = [] self.config_init_force: Optional[str] = None self.top_output_dir = top_output_dir @@ -1052,6 +1065,7 @@ class NeonEnv: def __init__(self, config: NeonEnvBuilder): self.repo_dir = config.repo_dir + self.shared_initdb_cache_dir = config.shared_initdb_cache_dir self.rust_log_override = config.rust_log_override self.port_distributor = config.port_distributor self.s3_mock_server = config.mock_s3_server @@ -1157,6 +1171,10 @@ class NeonEnv: # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override` "availability_zone": "us-east-2a", } + + if self.shared_initdb_cache_dir is not None: + ps_cfg["initdb_cache_dir"] = str(self.shared_initdb_cache_dir) + if self.pageserver_virtual_file_io_engine is not None: ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine if config.pageserver_default_tenant_config_compaction_algorithm is not None: @@ -1420,6 +1438,7 @@ def neon_simple_env( pageserver_aux_file_policy: Optional[AuxFileStore], pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]], pageserver_io_buffer_alignment: Optional[int], + shared_initdb_cache_dir: Optional[Path], ) -> Iterator[NeonEnv]: """ Simple Neon environment, with no authentication and no safekeepers. @@ -1447,6 +1466,7 @@ def neon_simple_env( pageserver_aux_file_policy=pageserver_aux_file_policy, pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, pageserver_io_buffer_alignment=pageserver_io_buffer_alignment, + shared_initdb_cache_dir=shared_initdb_cache_dir ) as builder: env = builder.init_start() @@ -1472,6 +1492,7 @@ def neon_env_builder( pageserver_aux_file_policy: Optional[AuxFileStore], record_property: Callable[[str, object], None], pageserver_io_buffer_alignment: Optional[int], + shared_initdb_cache_dir: Optional[Path], ) -> Iterator[NeonEnvBuilder]: """ Fixture to create a Neon environment for test. @@ -1508,6 +1529,7 @@ def neon_env_builder( pageserver_aux_file_policy=pageserver_aux_file_policy, pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, pageserver_io_buffer_alignment=pageserver_io_buffer_alignment, + shared_initdb_cache_dir=shared_initdb_cache_dir ) as builder: yield builder # Propogate `preserve_database_files` to make it possible to use in other fixtures,