diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 1194ee93ef..321048a22e 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -64,6 +64,7 @@ pub struct ConfigToml { #[serde(with = "humantime_serde")] pub wal_redo_timeout: Duration, pub superuser: String, + pub initdb_cache_dir: Option, pub page_cache_size: usize, pub max_file_descriptors: usize, pub pg_distrib_dir: Option, @@ -358,6 +359,7 @@ impl Default for ConfigToml { wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT) .expect("cannot parse default wal redo timeout")), superuser: (DEFAULT_SUPERUSER.to_string()), + initdb_cache_dir: None, page_cache_size: (DEFAULT_PAGE_CACHE_SIZE), max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS), pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir() diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 29a98855d3..14fa8e4895 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -71,6 +71,8 @@ pub struct PageServerConf { pub superuser: String, + pub initdb_cache_dir: Option, + pub page_cache_size: usize, pub max_file_descriptors: usize, @@ -309,6 +311,7 @@ impl PageServerConf { wait_lsn_timeout, wal_redo_timeout, superuser, + initdb_cache_dir, page_cache_size, max_file_descriptors, pg_distrib_dir, @@ -356,6 +359,7 @@ impl PageServerConf { wait_lsn_timeout, wal_redo_timeout, superuser, + initdb_cache_dir, page_cache_size, max_file_descriptors, http_auth_type, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c6f0e48101..1b7e8facfb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3491,7 +3491,7 @@ impl Tenant { .context("extract initdb tar")?; } else { // Init temporarily repo to get bootstrap data, this creates a directory in the `pgdata_path` path - run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?; + run_initdb_with_cache(self.conf, &pgdata_path, pg_version, &self.cancel).await?; // Upload the created data dir to S3 if self.tenant_shard_id().is_shard_zero() { @@ -3837,6 +3837,118 @@ impl Tenant { } } +fn cached_initdb_dirname(initial_superuser_name: &str, pg_version: u32) -> String +{ + use std::hash::Hash; + use std::hash::Hasher; + use std::collections::hash_map::DefaultHasher; + let mut hasher = DefaultHasher::new(); + initial_superuser_name.hash(&mut hasher); + let hash = hasher.finish(); + + format!("cached_initial_pgdata_{pg_version}_{:016}", hash) +} + +fn copy_dir_all(src: impl AsRef, dst: impl AsRef) -> std::io::Result<()> { + for entry in fs::read_dir(src.as_ref())? { + let entry = entry?; + let subsrc = entry.path(); + let subdst = dst.as_ref().join(&entry.file_name()); + + if entry.file_type()?.is_dir() { + std::fs::create_dir(&subdst)?; + copy_dir_all(&subsrc, &subdst)?; + } else { + std::fs::copy(&subsrc, &subdst)?; + } + } + Ok(()) +} + +fn restore_cached_initdb_dir( + cached_path: &Utf8Path, + target_path: &Utf8Path, +) -> anyhow::Result { + if !cached_path.exists() { + info!("cached initdb dir \"{cached_path}\" does not exist yet"); + return Ok(false); + } + + std::fs::create_dir(target_path)?; + copy_dir_all(cached_path, target_path)?; + info!("restored initdb result from cache dir \"{cached_path}\""); + Ok(true) +} + +fn save_cached_initdb_dir( + src_path: &Utf8Path, + cache_path: &Utf8Path, +) -> anyhow::Result<()> { + match std::fs::create_dir(cache_path) { + Ok(()) => { + info!("saving initdb result to cache dir \"{cache_path}\""); + }, + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + info!("cache initdb dir \"{cache_path}\" already exists, not saving"); + return Ok(()) + }, + Err(err) => { return Err(anyhow::Error::from(err))}, + }; + let cache_dir_guard = scopeguard::guard(cache_path, |cp| { + if let Err(err) = std::fs::remove_dir_all(&cp) { + error!("could not remove cached initdb directory {cp}: {err}"); + } + }); + + let cache_parent_path = cache_path.parent().ok_or(anyhow::Error::msg("no cache parent path"))?; + + let tmp_dirpath = camino_tempfile::tempdir_in(cache_parent_path)?; + copy_dir_all(src_path, &tmp_dirpath)?; + std::fs::rename(tmp_dirpath, &*cache_dir_guard)?; + + // disarm the guard + scopeguard::ScopeGuard::into_inner(cache_dir_guard); + + Ok(()) +} + +async fn run_initdb_with_cache( + conf: &'static PageServerConf, + initdb_target_dir: &Utf8Path, + pg_version: u32, + cancel: &CancellationToken, +) -> Result<(), InitdbError> { + + let cache_dir = conf.initdb_cache_dir.as_ref().map(|initdb_cache_dir| { + initdb_cache_dir.join(cached_initdb_dirname(&conf.superuser, pg_version)) + }); + + if let Some(cache_dir) = &cache_dir { + match restore_cached_initdb_dir(&cache_dir, initdb_target_dir) { + Ok(true) => return Ok(()), + Ok(false) => {}, + Err(err) => { + warn!("Error restoring from cached initdb directory \"{cache_dir}\": {err}"); + if initdb_target_dir.exists() { + if let Err(err) = std::fs::remove_dir_all(&initdb_target_dir) { + error!("could not remove temporary initdb target directory {initdb_target_dir}: {err}"); + } + } + }, + } + } + + run_initdb(conf, initdb_target_dir, pg_version, cancel).await?; + + if let Some(cache_dir) = &cache_dir { + if let Err(err) = save_cached_initdb_dir(initdb_target_dir, &cache_dir) { + warn!("error saving initdb result to cache directory \"{cache_dir}\": {err}"); + } + } + + Ok(()) +} + /// Create the cluster temporarily in 'initdbpath' directory inside the repository /// to get bootstrap data for timeline initialization. async fn run_initdb( diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 60887b9aed..b289f28edb 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -181,6 +181,17 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]: log.info(f"top_output_dir is {output_dir}") yield output_dir +@pytest.fixture(scope="session", autouse=True) +def shared_initdb_cache_dir(top_output_dir: Path) -> Iterator[Path]: + log.info("Creating shared initdb cache directory") + + cache_dir = top_output_dir / "shared_initdb_cache" + + shutil.rmtree(cache_dir, ignore_errors=True) + cache_dir.mkdir(exist_ok=True) + + yield cache_dir + @pytest.fixture(scope="function") def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]: @@ -484,6 +495,7 @@ class NeonEnvBuilder: safekeeper_extra_opts: Optional[list[str]] = None, storage_controller_port_override: Optional[int] = None, pageserver_io_buffer_alignment: Optional[int] = None, + shared_initdb_cache_dir: Optional[Path] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -516,6 +528,7 @@ class NeonEnvBuilder: self.enable_scrub_on_exit = True self.test_output_dir = test_output_dir self.test_overlay_dir = test_overlay_dir + self.shared_initdb_cache_dir = shared_initdb_cache_dir self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = [] self.config_init_force: Optional[str] = None self.top_output_dir = top_output_dir @@ -1052,6 +1065,7 @@ class NeonEnv: def __init__(self, config: NeonEnvBuilder): self.repo_dir = config.repo_dir + self.shared_initdb_cache_dir = config.shared_initdb_cache_dir self.rust_log_override = config.rust_log_override self.port_distributor = config.port_distributor self.s3_mock_server = config.mock_s3_server @@ -1157,6 +1171,10 @@ class NeonEnv: # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override` "availability_zone": "us-east-2a", } + + if self.shared_initdb_cache_dir is not None: + ps_cfg["initdb_cache_dir"] = str(self.shared_initdb_cache_dir) + if self.pageserver_virtual_file_io_engine is not None: ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine if config.pageserver_default_tenant_config_compaction_algorithm is not None: @@ -1420,6 +1438,7 @@ def neon_simple_env( pageserver_aux_file_policy: Optional[AuxFileStore], pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]], pageserver_io_buffer_alignment: Optional[int], + shared_initdb_cache_dir: Optional[Path], ) -> Iterator[NeonEnv]: """ Simple Neon environment, with no authentication and no safekeepers. @@ -1447,6 +1466,7 @@ def neon_simple_env( pageserver_aux_file_policy=pageserver_aux_file_policy, pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, pageserver_io_buffer_alignment=pageserver_io_buffer_alignment, + shared_initdb_cache_dir=shared_initdb_cache_dir ) as builder: env = builder.init_start() @@ -1472,6 +1492,7 @@ def neon_env_builder( pageserver_aux_file_policy: Optional[AuxFileStore], record_property: Callable[[str, object], None], pageserver_io_buffer_alignment: Optional[int], + shared_initdb_cache_dir: Optional[Path], ) -> Iterator[NeonEnvBuilder]: """ Fixture to create a Neon environment for test. @@ -1508,6 +1529,7 @@ def neon_env_builder( pageserver_aux_file_policy=pageserver_aux_file_policy, pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm, pageserver_io_buffer_alignment=pageserver_io_buffer_alignment, + shared_initdb_cache_dir=shared_initdb_cache_dir ) as builder: yield builder # Propogate `preserve_database_files` to make it possible to use in other fixtures,