diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 72865ad74d..98730a7637 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -23,13 +23,11 @@ use bytes::{BufMut, Bytes, BytesMut}; use nix::poll::*; use serde::Serialize; use std::collections::VecDeque; -use std::fs::OpenOptions; use std::io::prelude::*; use std::io::{Error, ErrorKind}; use std::ops::{Deref, DerefMut}; use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::prelude::CommandExt; -use std::path::PathBuf; use std::process::Stdio; use std::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command}; use std::sync::{Mutex, MutexGuard}; @@ -639,26 +637,26 @@ impl PostgresRedoManager { input: &mut MutexGuard>, pg_version: u32, ) -> Result<(), Error> { - // FIXME: We need a dummy Postgres cluster to run the process in. Currently, we - // just create one with constant name. That fails if you try to launch more than - // one WAL redo manager concurrently. - let datadir = path_with_suffix_extension( + // Previous versions of wal-redo required data directory and that directories + // occupied some space on disk. Remove it if we face it. + // + // This code could be dropped after one release cycle. + let legacy_datadir = path_with_suffix_extension( self.conf .tenant_path(&self.tenant_id) .join("wal-redo-datadir"), TEMP_FILE_SUFFIX, ); - - // Create empty data directory for wal-redo postgres, deleting old one first. - if datadir.exists() { - info!("old temporary datadir {datadir:?} exists, removing"); - fs::remove_dir_all(&datadir).map_err(|e| { + if legacy_datadir.exists() { + info!("legacy wal-redo datadir {legacy_datadir:?} exists, removing"); + fs::remove_dir_all(&legacy_datadir).map_err(|e| { Error::new( e.kind(), - format!("Old temporary dir {datadir:?} removal failure: {e}"), + format!("legacy wal-redo datadir {legacy_datadir:?} removal failure: {e}"), ) })?; } + let pg_bin_dir_path = self .conf .pg_bin_dir(pg_version) @@ -668,35 +666,6 @@ impl PostgresRedoManager { .pg_lib_dir(pg_version) .map_err(|e| Error::new(ErrorKind::Other, format!("incorrect pg_lib_dir path: {e}")))?; - info!("running initdb in {}", datadir.display()); - let initdb = Command::new(pg_bin_dir_path.join("initdb")) - .args(["-D", &datadir.to_string_lossy()]) - .arg("-N") - .env_clear() - .env("LD_LIBRARY_PATH", &pg_lib_dir_path) - .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path) // macOS - .close_fds() - .output() - .map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {e}")))?; - - if !initdb.status.success() { - return Err(Error::new( - ErrorKind::Other, - format!( - "initdb failed\nstdout: {}\nstderr:\n{}", - String::from_utf8_lossy(&initdb.stdout), - String::from_utf8_lossy(&initdb.stderr) - ), - )); - } else { - // Limit shared cache for wal-redo-postgres - let mut config = OpenOptions::new() - .append(true) - .open(PathBuf::from(&datadir).join("postgresql.conf"))?; - config.write_all(b"shared_buffers=128kB\n")?; - config.write_all(b"fsync=off\n")?; - } - // Start postgres itself let child = Command::new(pg_bin_dir_path.join("postgres")) .arg("--wal-redo") @@ -706,7 +675,6 @@ impl PostgresRedoManager { .env_clear() .env("LD_LIBRARY_PATH", &pg_lib_dir_path) .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path) - .env("PGDATA", &datadir) // The redo process is not trusted, and runs in seccomp mode that // doesn't allow it to open any files. We have to also make sure it // doesn't inherit any file descriptors from the pageserver, that diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c index ffbfca5a40..9cce9b2a67 100644 --- a/pgxn/neon_walredo/walredoproc.c +++ b/pgxn/neon_walredo/walredoproc.c @@ -65,6 +65,14 @@ #include "rusagestub.h" #endif +#include "access/clog.h" +#include "access/commit_ts.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/nbtree.h" +#include "access/subtrans.h" +#include "access/syncscan.h" +#include "access/twophase.h" #include "access/xlog.h" #include "access/xlog_internal.h" #if PG_VERSION_NUM >= 150000 @@ -72,18 +80,36 @@ #endif #include "access/xlogutils.h" #include "catalog/pg_class.h" -#include "libpq/libpq.h" +#include "commands/async.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/autovacuum.h" +#include "postmaster/bgworker_internals.h" +#include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" +#include "replication/logicallauncher.h" +#include "replication/origin.h" +#include "replication/slot.h" +#include "replication/walreceiver.h" +#include "replication/walsender.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/dsm.h" #include "storage/ipc.h" +#include "storage/pg_shmem.h" +#include "storage/pmsignal.h" +#include "storage/predicate.h" #include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/procsignal.h" +#include "storage/sinvaladt.h" #include "storage/smgr.h" +#include "storage/spin.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" #include "utils/ps_status.h" +#include "utils/snapmgr.h" #include "inmem_smgr.h" @@ -101,6 +127,7 @@ static void apply_error_callback(void *arg); static bool redo_block_filter(XLogReaderState *record, uint8 block_id); static void GetPage(StringInfo input_message); static ssize_t buffered_read(void *buf, size_t count); +static void CreateFakeSharedMemoryAndSemaphores(); static BufferTag target_redo_tag; @@ -141,7 +168,7 @@ enter_seccomp_mode(void) PG_SCMP_ALLOW(shmctl), PG_SCMP_ALLOW(shmdt), PG_SCMP_ALLOW(unlink), // shm_unlink - */ + */ }; #ifdef MALLOC_NO_MMAP @@ -177,6 +204,7 @@ WalRedoMain(int argc, char *argv[]) * buffers. So let's keep it small (default value is 1024) */ num_temp_buffers = 4; + NBuffers = 4; /* * install the simple in-memory smgr @@ -184,49 +212,33 @@ WalRedoMain(int argc, char *argv[]) smgr_hook = smgr_inmem; smgr_init_hook = smgr_init_inmem; - /* - * Validate we have been given a reasonable-looking DataDir and change into it. - */ - checkDataDir(); - ChangeToDataDir(); - - /* - * Create lockfile for data directory. - */ - CreateDataDirLockFile(false); - - /* read control file (error checking and contains config ) */ - LocalProcessControlFile(false); - - /* - * process any libraries that should be preloaded at postmaster start - */ - process_shared_preload_libraries(); /* Initialize MaxBackends (if under postmaster, was done already) */ + MaxConnections = 1; + max_worker_processes = 0; + max_parallel_workers = 0; + max_wal_senders = 0; InitializeMaxBackends(); -#if PG_VERSION_NUM >= 150000 - /* - * Give preloaded libraries a chance to request additional shared memory. - */ - process_shmem_requests(); + /* Disable lastWrittenLsnCache */ + lastWrittenLsnCacheSize = 0; - /* - * Now that loadable modules have had their chance to request additional - * shared memory, determine the value of any runtime-computed GUCs that - * depend on the amount of shared memory required. - */ +#if PG_VERSION_NUM >= 150000 + process_shmem_requests(); InitializeShmemGUCs(); /* - * Now that modules have been loaded, we can process any custom resource - * managers specified in the wal_consistency_checking GUC. + * This will try to access data directory which we do not set. + * Seems to be pretty safe to disable. */ - InitializeWalConsistencyChecking(); + /* InitializeWalConsistencyChecking(); */ #endif - CreateSharedMemoryAndSemaphores(); + /* + * We have our own version of CreateSharedMemoryAndSemaphores() that + * sets up local memory instead of shared one. + */ + CreateFakeSharedMemoryAndSemaphores(); /* * Remember stand-alone backend startup time,roughly at the same point @@ -354,6 +366,172 @@ WalRedoMain(int argc, char *argv[]) } +/* + * Initialize dummy shmem. + * + * This code follows CreateSharedMemoryAndSemaphores() but manually sets up + * the shmem header and skips few initialization steps that are not needed for + * WAL redo. + * + * I've also tried removing most of initialization functions that request some + * memory (like ApplyLauncherShmemInit and friends) but in reality it haven't had + * any sizeable effect on RSS, so probably such clean up not worth the risk of having + * half-initialized postgres. + */ +static void +CreateFakeSharedMemoryAndSemaphores() +{ + PGShmemHeader *shim = NULL; + PGShmemHeader *hdr; + Size size; + int numSemas; + char cwd[MAXPGPATH]; + +#if PG_VERSION_NUM >= 150000 + size = CalculateShmemSize(&numSemas); +#else + /* + * Postgres v14 doesn't have a separate CalculateShmemSize(). Use result of the + * corresponging calculation in CreateSharedMemoryAndSemaphores() + */ + size = 1409024; + numSemas = 10; +#endif + + /* Dummy implementation of PGSharedMemoryCreate() */ + { + hdr = (PGShmemHeader *) malloc(size); + if (!hdr) + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[neon-wal-redo] can not allocate (pseudo-) shared memory"))); + + hdr->creatorPID = getpid(); + hdr->magic = PGShmemMagic; + hdr->dsm_control = 0; + hdr->device = 42; /* not relevant for non-shared memory */ + hdr->inode = 43; /* not relevant for non-shared memory */ + hdr->totalsize = size; + hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); + + shim = hdr; + UsedShmemSegAddr = hdr; + UsedShmemSegID = (unsigned long) 42; /* not relevant for non-shared memory */ + } + + InitShmemAccess(hdr); + + /* + * Reserve semaphores uses dir name as a source of entropy. Set it to cwd(). Rest + * of the code does not need DataDir access so nullify DataDir after + * PGReserveSemaphores() to error out if something will try to access it. + */ + if (!getcwd(cwd, MAXPGPATH)) + ereport(FATAL, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("[neon-wal-redo] can not read current directory name"))); + DataDir = cwd; + PGReserveSemaphores(numSemas); + DataDir = NULL; + + /* + * The rest of function follows CreateSharedMemoryAndSemaphores() closely, + * skipped parts are marked with comments. + */ + InitShmemAllocation(); + + /* + * Now initialize LWLocks, which do shared memory allocation and are + * needed for InitShmemIndex. + */ + CreateLWLocks(); + + /* + * Set up shmem.c index hashtable + */ + InitShmemIndex(); + + dsm_shmem_init(); + + /* + * Set up xlog, clog, and buffers + */ + XLOGShmemInit(); + CLOGShmemInit(); + CommitTsShmemInit(); + SUBTRANSShmemInit(); + MultiXactShmemInit(); + InitBufferPool(); + + /* + * Set up lock manager + */ + InitLocks(); + + /* + * Set up predicate lock manager + */ + InitPredicateLocks(); + + /* + * Set up process table + */ + if (!IsUnderPostmaster) + InitProcGlobal(); + CreateSharedProcArray(); + CreateSharedBackendStatus(); + TwoPhaseShmemInit(); + BackgroundWorkerShmemInit(); + + /* + * Set up shared-inval messaging + */ + CreateSharedInvalidationState(); + + /* + * Set up interprocess signaling mechanisms + */ + PMSignalShmemInit(); + ProcSignalShmemInit(); + CheckpointerShmemInit(); + AutoVacuumShmemInit(); + ReplicationSlotsShmemInit(); + ReplicationOriginShmemInit(); + WalSndShmemInit(); + WalRcvShmemInit(); + PgArchShmemInit(); + ApplyLauncherShmemInit(); + + /* + * Set up other modules that need some shared memory space + */ + SnapMgrInit(); + BTreeShmemInit(); + SyncScanShmemInit(); + /* Skip due to the 'pg_notify' directory check */ + /* AsyncShmemInit(); */ + +#ifdef EXEC_BACKEND + + /* + * Alloc the win32 shared backend array + */ + if (!IsUnderPostmaster) + ShmemBackendArrayAllocation(); +#endif + + /* Initialize dynamic shared memory facilities. */ + if (!IsUnderPostmaster) + dsm_postmaster_startup(shim); + + /* + * Now give loadable modules a chance to set up their shmem allocations + */ + if (shmem_startup_hook) + shmem_startup_hook(); +} + + /* Version compatility wrapper for ReadBufferWithoutRelcache */ static inline Buffer NeonRedoReadBuffer(RelFileNode rnode, diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 332e2f2519..731e78a3e3 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -220,9 +220,12 @@ def prepare_snapshot( for tenant in (repo_dir / "pgdatadirs" / "tenants").glob("*"): shutil.rmtree(tenant) - # Remove wal-redo temp directory + # Remove wal-redo temp directory if it exists. Newer pageserver versions don't create + # them anymore, but old versions did. for tenant in (repo_dir / "tenants").glob("*"): - shutil.rmtree(tenant / "wal-redo-datadir.___temp") + wal_redo_dir = tenant / "wal-redo-datadir.___temp" + if wal_redo_dir.exists() and wal_redo_dir.is_dir(): + shutil.rmtree(wal_redo_dir) # Update paths and ports in config files pageserver_toml = repo_dir / "pageserver.toml" diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index 5fb2e0bba0..9fd9794436 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit 5fb2e0bba06cc018ee2506f337c91751ab695454 +Subproject commit 9fd9794436d02fbfe68f8fca5beab218907cec41 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 919851e781..257aaefb25 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 919851e7811fcb2ecfc67f35bfd63a35639c73b5 +Subproject commit 257aaefb251c5c85c44652c01bf68c43db62748a