Cache initdb output to speed up tenant creation in tests

initdb takes about 1 s. Our tests create and destroy a lot of tenants,
so that adds up. Cache the initdb result to speed it up.

This is currently only enabled in tests. Out of caution, mostly. But
also because when you reuse the initdb result, all the postgres
clusters end up having the same system_identifier, which is supposed
to be unique. It's not necessary for it to be unique for correctness,
nothing critical relies on it and you can easily end up with duplicate
system_identifiers in standalone PostgreSQL too, if you e.g. create a
backup and restore it on a different system. But it is used in various
checks to reduce the chance that you e.g. accidentally apply WAL
belonging to a different cluster.

Because this is aimed at tests, there are a few things that might be
surprising:

- The initdb cache directory is configurable, and can be outside the
  pageserver's repo directory. This allows reuse across different
  pageservers running on the same host. In production use, that'd be
  pointless, but our tests create a lot of pageservers.

- The cache is not automatically purged at start / shutdown. For
  production use, we'd probably want that, so that we'd pick up any
  changes in what an empty cluster looks like after a Postgres minor
  version upgrade, for example. But again tests create and destroy a
  lot of pageservers, so it's important to retain the cache.

- The locking on the cache directory relies purely on filesystem
  operations and atomic rename(). Using e.g. a rust Mutex() would be
  more straightforward, but that's not enough because the cache needs
  to be shared between different pageservers running on the same
  system.
This commit is contained in:
Heikki Linnakangas
2024-09-11 11:12:00 +03:00
parent cb060548fb
commit 8e7f336540
4 changed files with 141 additions and 1 deletions

View File

@@ -64,6 +64,7 @@ pub struct ConfigToml {
#[serde(with = "humantime_serde")]
pub wal_redo_timeout: Duration,
pub superuser: String,
pub initdb_cache_dir: Option<Utf8PathBuf>,
pub page_cache_size: usize,
pub max_file_descriptors: usize,
pub pg_distrib_dir: Option<Utf8PathBuf>,
@@ -358,6 +359,7 @@ impl Default for ConfigToml {
wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
.expect("cannot parse default wal redo timeout")),
superuser: (DEFAULT_SUPERUSER.to_string()),
initdb_cache_dir: None,
page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()

View File

@@ -71,6 +71,8 @@ pub struct PageServerConf {
pub superuser: String,
pub initdb_cache_dir: Option<Utf8PathBuf>,
pub page_cache_size: usize,
pub max_file_descriptors: usize,
@@ -309,6 +311,7 @@ impl PageServerConf {
wait_lsn_timeout,
wal_redo_timeout,
superuser,
initdb_cache_dir,
page_cache_size,
max_file_descriptors,
pg_distrib_dir,
@@ -356,6 +359,7 @@ impl PageServerConf {
wait_lsn_timeout,
wal_redo_timeout,
superuser,
initdb_cache_dir,
page_cache_size,
max_file_descriptors,
http_auth_type,

View File

@@ -3491,7 +3491,7 @@ impl Tenant {
.context("extract initdb tar")?;
} else {
// Init temporarily repo to get bootstrap data, this creates a directory in the `pgdata_path` path
run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?;
run_initdb_with_cache(self.conf, &pgdata_path, pg_version, &self.cancel).await?;
// Upload the created data dir to S3
if self.tenant_shard_id().is_shard_zero() {
@@ -3837,6 +3837,118 @@ impl Tenant {
}
}
fn cached_initdb_dirname(initial_superuser_name: &str, pg_version: u32) -> String
{
use std::hash::Hash;
use std::hash::Hasher;
use std::collections::hash_map::DefaultHasher;
let mut hasher = DefaultHasher::new();
initial_superuser_name.hash(&mut hasher);
let hash = hasher.finish();
format!("cached_initial_pgdata_{pg_version}_{:016}", hash)
}
fn copy_dir_all(src: impl AsRef<std::path::Path>, dst: impl AsRef<std::path::Path>) -> std::io::Result<()> {
for entry in fs::read_dir(src.as_ref())? {
let entry = entry?;
let subsrc = entry.path();
let subdst = dst.as_ref().join(&entry.file_name());
if entry.file_type()?.is_dir() {
std::fs::create_dir(&subdst)?;
copy_dir_all(&subsrc, &subdst)?;
} else {
std::fs::copy(&subsrc, &subdst)?;
}
}
Ok(())
}
fn restore_cached_initdb_dir(
cached_path: &Utf8Path,
target_path: &Utf8Path,
) -> anyhow::Result<bool> {
if !cached_path.exists() {
info!("cached initdb dir \"{cached_path}\" does not exist yet");
return Ok(false);
}
std::fs::create_dir(target_path)?;
copy_dir_all(cached_path, target_path)?;
info!("restored initdb result from cache dir \"{cached_path}\"");
Ok(true)
}
fn save_cached_initdb_dir(
src_path: &Utf8Path,
cache_path: &Utf8Path,
) -> anyhow::Result<()> {
match std::fs::create_dir(cache_path) {
Ok(()) => {
info!("saving initdb result to cache dir \"{cache_path}\"");
},
Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
info!("cache initdb dir \"{cache_path}\" already exists, not saving");
return Ok(())
},
Err(err) => { return Err(anyhow::Error::from(err))},
};
let cache_dir_guard = scopeguard::guard(cache_path, |cp| {
if let Err(err) = std::fs::remove_dir_all(&cp) {
error!("could not remove cached initdb directory {cp}: {err}");
}
});
let cache_parent_path = cache_path.parent().ok_or(anyhow::Error::msg("no cache parent path"))?;
let tmp_dirpath = camino_tempfile::tempdir_in(cache_parent_path)?;
copy_dir_all(src_path, &tmp_dirpath)?;
std::fs::rename(tmp_dirpath, &*cache_dir_guard)?;
// disarm the guard
scopeguard::ScopeGuard::into_inner(cache_dir_guard);
Ok(())
}
async fn run_initdb_with_cache(
conf: &'static PageServerConf,
initdb_target_dir: &Utf8Path,
pg_version: u32,
cancel: &CancellationToken,
) -> Result<(), InitdbError> {
let cache_dir = conf.initdb_cache_dir.as_ref().map(|initdb_cache_dir| {
initdb_cache_dir.join(cached_initdb_dirname(&conf.superuser, pg_version))
});
if let Some(cache_dir) = &cache_dir {
match restore_cached_initdb_dir(&cache_dir, initdb_target_dir) {
Ok(true) => return Ok(()),
Ok(false) => {},
Err(err) => {
warn!("Error restoring from cached initdb directory \"{cache_dir}\": {err}");
if initdb_target_dir.exists() {
if let Err(err) = std::fs::remove_dir_all(&initdb_target_dir) {
error!("could not remove temporary initdb target directory {initdb_target_dir}: {err}");
}
}
},
}
}
run_initdb(conf, initdb_target_dir, pg_version, cancel).await?;
if let Some(cache_dir) = &cache_dir {
if let Err(err) = save_cached_initdb_dir(initdb_target_dir, &cache_dir) {
warn!("error saving initdb result to cache directory \"{cache_dir}\": {err}");
}
}
Ok(())
}
/// Create the cluster temporarily in 'initdbpath' directory inside the repository
/// to get bootstrap data for timeline initialization.
async fn run_initdb(

View File

@@ -181,6 +181,17 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]:
log.info(f"top_output_dir is {output_dir}")
yield output_dir
@pytest.fixture(scope="session", autouse=True)
def shared_initdb_cache_dir(top_output_dir: Path) -> Iterator[Path]:
log.info("Creating shared initdb cache directory")
cache_dir = top_output_dir / "shared_initdb_cache"
shutil.rmtree(cache_dir, ignore_errors=True)
cache_dir.mkdir(exist_ok=True)
yield cache_dir
@pytest.fixture(scope="function")
def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
@@ -484,6 +495,7 @@ class NeonEnvBuilder:
safekeeper_extra_opts: Optional[list[str]] = None,
storage_controller_port_override: Optional[int] = None,
pageserver_io_buffer_alignment: Optional[int] = None,
shared_initdb_cache_dir: Optional[Path] = None,
):
self.repo_dir = repo_dir
self.rust_log_override = rust_log_override
@@ -516,6 +528,7 @@ class NeonEnvBuilder:
self.enable_scrub_on_exit = True
self.test_output_dir = test_output_dir
self.test_overlay_dir = test_overlay_dir
self.shared_initdb_cache_dir = shared_initdb_cache_dir
self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = []
self.config_init_force: Optional[str] = None
self.top_output_dir = top_output_dir
@@ -1052,6 +1065,7 @@ class NeonEnv:
def __init__(self, config: NeonEnvBuilder):
self.repo_dir = config.repo_dir
self.shared_initdb_cache_dir = config.shared_initdb_cache_dir
self.rust_log_override = config.rust_log_override
self.port_distributor = config.port_distributor
self.s3_mock_server = config.mock_s3_server
@@ -1157,6 +1171,10 @@ class NeonEnv:
# Default which can be overriden with `NeonEnvBuilder.pageserver_config_override`
"availability_zone": "us-east-2a",
}
if self.shared_initdb_cache_dir is not None:
ps_cfg["initdb_cache_dir"] = str(self.shared_initdb_cache_dir)
if self.pageserver_virtual_file_io_engine is not None:
ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine
if config.pageserver_default_tenant_config_compaction_algorithm is not None:
@@ -1420,6 +1438,7 @@ def neon_simple_env(
pageserver_aux_file_policy: Optional[AuxFileStore],
pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]],
pageserver_io_buffer_alignment: Optional[int],
shared_initdb_cache_dir: Optional[Path],
) -> Iterator[NeonEnv]:
"""
Simple Neon environment, with no authentication and no safekeepers.
@@ -1447,6 +1466,7 @@ def neon_simple_env(
pageserver_aux_file_policy=pageserver_aux_file_policy,
pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
pageserver_io_buffer_alignment=pageserver_io_buffer_alignment,
shared_initdb_cache_dir=shared_initdb_cache_dir
) as builder:
env = builder.init_start()
@@ -1472,6 +1492,7 @@ def neon_env_builder(
pageserver_aux_file_policy: Optional[AuxFileStore],
record_property: Callable[[str, object], None],
pageserver_io_buffer_alignment: Optional[int],
shared_initdb_cache_dir: Optional[Path],
) -> Iterator[NeonEnvBuilder]:
"""
Fixture to create a Neon environment for test.
@@ -1508,6 +1529,7 @@ def neon_env_builder(
pageserver_aux_file_policy=pageserver_aux_file_policy,
pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
pageserver_io_buffer_alignment=pageserver_io_buffer_alignment,
shared_initdb_cache_dir=shared_initdb_cache_dir
) as builder:
yield builder
# Propogate `preserve_database_files` to make it possible to use in other fixtures,