drive by typo fix

chore: remove no longer needed empty rel fix
this seems to have been fixed long enough ago.
2026-02-05 03:30:36 +00:00 · 2022-11-02 21:11:05 +02:00 · 2022-11-02 21:10:44 +02:00 · 2022-11-02 18:37:48 +00:00 · 2022-11-02 12:30:09 -04:00 · 2022-11-02 16:22:58 +01:00
71 changed files with 2452 additions and 1278 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -127,8 +127,8 @@ jobs:
            target/
          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
          key: |
-            v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
-            v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-
+            v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
+            v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-

      - name: Cache postgres v14 build
        id: cache_pg_14
@@ -389,7 +389,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
-          key: v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
+          key: v10-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}

      - name: Get Neon artifact
        uses: ./.github/actions/download
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -106,7 +106,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git
            target
-          key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
+          key: v6-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -317,12 +317,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "boxfnonce"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5988cb1d626264ac94100be357308f29ff7cbdd3b36bda27f450a4ee3f713426"
-
 [[package]]
 name = "bstr"
 version = "1.0.1"
@@ -600,6 +594,7 @@ dependencies = [
 "tar",
 "thiserror",
 "toml",
+ "url",
 "utils",
 "workspace_hack",
 ]
@@ -849,16 +844,6 @@ dependencies = [
 "syn",
 ]

-[[package]]
-name = "daemonize"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70c24513e34f53b640819f0ac9f705b673fcf4006d7aab8778bee72ebfc89815"
-dependencies = [
- "boxfnonce",
- "libc",
-]
-
 [[package]]
 name = "darling"
 version = "0.14.1"
@@ -2140,7 +2125,6 @@ dependencies = [
 "crc32c",
 "criterion",
 "crossbeam-utils",
- "daemonize",
 "etcd_broker",
 "fail",
 "futures",
@@ -3087,7 +3071,6 @@ dependencies = [
 "clap 4.0.15",
 "const_format",
 "crc32c",
- "daemonize",
 "etcd_broker",
 "fs2",
 "git-version",
@@ -3095,6 +3078,7 @@ dependencies = [
 "humantime",
 "hyper",
 "metrics",
+ "nix 0.25.0",
 "once_cell",
 "parking_lot 0.12.1",
 "postgres",
--- a/14
+++ b/14
@@ -111,8 +111,6 @@ postgres-v14: postgres-v14-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
 	+@echo "Compiling libpq v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
-	+@echo "Compiling pg_prewarm v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v14"
@@ -125,8 +123,6 @@ postgres-v15: postgres-v15-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
 	+@echo "Compiling libpq v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
-	+@echo "Compiling pg_prewarm v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v15"
@@ -155,6 +151,11 @@ neon-pg-ext-v14: postgres-v14
 	(cd $(POSTGRES_INSTALL_DIR)/build/neon-v14 && \
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+	+@echo "Compiling neon_walredo v14"
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v14
+	(cd $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v14 && \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install)
 	+@echo "Compiling neon_test_utils" v14
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14
 	(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14 && \
@@ -167,6 +168,11 @@ neon-pg-ext-v15: postgres-v15
 	(cd $(POSTGRES_INSTALL_DIR)/build/neon-v15 && \
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
+	+@echo "Compiling neon_walredo v15"
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v15
+	(cd $(POSTGRES_INSTALL_DIR)/build/neon-walredo-v15 && \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install)
 	+@echo "Compiling neon_test_utils" v15
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15
 	(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15 && \
--- a/README.md
+++ b/README.md
@@ -223,10 +223,7 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git

-# either:
 CARGO_BUILD_FLAGS="--features=testing" make
-# or:
-make debug

 ./scripts/pytest
 ```
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -4,20 +4,21 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
+anyhow = "1.0"
 clap = "4.0"
 comfy-table = "6.1"
 git-version = "0.3.5"
-tar = "0.4.38"
+nix = "0.25"
+once_cell = "1.13.0"
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+regex = "1"
+reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "2.0"
-toml = "0.5"
-once_cell = "1.13.0"
-regex = "1"
-anyhow = "1.0"
+tar = "0.4.38"
 thiserror = "1"
-nix = "0.25"
-reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
+toml = "0.5"
+url = "2.2.2"

 # Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
 # instead, so that recompile times are better.
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -0,0 +1,264 @@
+//! Spawns and kills background processes that are needed by Neon CLI.
+//! Applies common set-up such as log and pid files (if needed) to every process.
+//!
+//! Neon CLI does not run in background, so it needs to store the information about
+//! spawned processes, which it does in this module.
+//! We do that by storing the pid of the process in the "${process_name}.pid" file.
+//! The pid file can be created by the process itself
+//! (Neon storage binaries do that and also ensure that a lock is taken onto that file)
+//! or we create such file after starting the process
+//! (non-Neon binaries don't necessarily follow our pidfile conventions).
+//! The pid stored in the file is later used to stop the service.
+//!
+//! See [`lock_file`] module for more info.
+
+use std::ffi::OsStr;
+use std::io::Write;
+use std::path::Path;
+use std::process::{Child, Command};
+use std::time::Duration;
+use std::{fs, io, thread};
+
+use anyhow::{anyhow, bail, Context, Result};
+use nix::errno::Errno;
+use nix::sys::signal::{kill, Signal};
+use nix::unistd::Pid;
+
+use utils::lock_file;
+
+const RETRIES: u32 = 15;
+const RETRY_TIMEOUT_MILLIS: u64 = 500;
+
+/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
+/// it itself.
+pub enum InitialPidFile<'t> {
+    /// Create a pidfile, to allow future CLI invocations to manipulate the process.
+    Create(&'t Path),
+    /// The process will create the pidfile itself, need to wait for that event.
+    Expect(&'t Path),
+}
+
+/// Start a background child process using the parameters given.
+pub fn start_process<F, S: AsRef<OsStr>>(
+    process_name: &str,
+    datadir: &Path,
+    command: &Path,
+    args: &[S],
+    initial_pid_file: InitialPidFile,
+    process_status_check: F,
+) -> anyhow::Result<Child>
+where
+    F: Fn() -> anyhow::Result<bool>,
+{
+    let log_path = datadir.join(format!("{process_name}.log"));
+    let process_log_file = fs::OpenOptions::new()
+        .create(true)
+        .write(true)
+        .append(true)
+        .open(&log_path)
+        .with_context(|| {
+            format!("Could not open {process_name} log file {log_path:?} for writing")
+        })?;
+    let same_file_for_stderr = process_log_file.try_clone().with_context(|| {
+        format!("Could not reuse {process_name} log file {log_path:?} for writing stderr")
+    })?;
+
+    let mut command = Command::new(command);
+    let background_command = command
+        .stdout(process_log_file)
+        .stderr(same_file_for_stderr)
+        .args(args);
+    let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
+
+    let mut spawned_process = filled_cmd.spawn().with_context(|| {
+        format!("Could not spawn {process_name}, see console output and log files for details.")
+    })?;
+    let pid = spawned_process.id();
+    let pid = Pid::from_raw(
+        i32::try_from(pid)
+            .with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
+    );
+
+    let pid_file_to_check = match initial_pid_file {
+        InitialPidFile::Create(target_pid_file_path) => {
+            match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
+                lock_file::LockCreationResult::Created { .. } => {
+                    // We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
+                    // as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
+                }
+                lock_file::LockCreationResult::AlreadyLocked { .. } => {
+                    anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
+                }
+                lock_file::LockCreationResult::CreationFailed(e) => {
+                    return Err(e.context(format!(
+                    "Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
+                )))
+                }
+            }
+            None
+        }
+        InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
+    };
+
+    for retries in 0..RETRIES {
+        match process_started(pid, pid_file_to_check, &process_status_check) {
+            Ok(true) => {
+                println!("\n{process_name} started, pid: {pid}");
+                return Ok(spawned_process);
+            }
+            Ok(false) => {
+                if retries < 5 {
+                    print!(".");
+                    io::stdout().flush().unwrap();
+                } else {
+                    if retries == 5 {
+                        println!() // put a line break after dots for second message
+                    }
+                    println!("{process_name} has not started yet, retrying ({retries})...");
+                }
+                thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
+            }
+            Err(e) => {
+                println!("{process_name} failed to start: {e:#}");
+                if let Err(e) = spawned_process.kill() {
+                    println!("Could not stop {process_name} subprocess: {e:#}")
+                };
+                return Err(e);
+            }
+        }
+    }
+    anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
+}
+
+/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
+pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
+    if !pid_file.exists() {
+        println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
+        return Ok(());
+    }
+    let pid = read_pidfile(pid_file)?;
+
+    let sig = if immediate {
+        print!("Stopping {process_name} with pid {pid} immediately..");
+        Signal::SIGQUIT
+    } else {
+        print!("Stopping {process_name} with pid {pid} gracefully..");
+        Signal::SIGTERM
+    };
+    io::stdout().flush().unwrap();
+    match kill(pid, sig) {
+        Ok(()) => (),
+        Err(Errno::ESRCH) => {
+            println!(
+                "{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
+            );
+            return Ok(());
+        }
+        Err(e) => anyhow::bail!("Failed to send signal to {process_name} with pid {pid}: {e}"),
+    }
+
+    // Wait until process is gone
+    for _ in 0..RETRIES {
+        match process_has_stopped(pid) {
+            Ok(true) => {
+                println!("\n{process_name} stopped");
+                if let Err(e) = fs::remove_file(pid_file) {
+                    if e.kind() != io::ErrorKind::NotFound {
+                        eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
+                    }
+                }
+                return Ok(());
+            }
+            Ok(false) => {
+                print!(".");
+                io::stdout().flush().unwrap();
+                thread::sleep(Duration::from_secs(1))
+            }
+            Err(e) => {
+                println!("{process_name} with pid {pid} failed to stop: {e:#}");
+                return Err(e);
+            }
+        }
+    }
+
+    anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
+}
+
+fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
+    let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
+
+    let var = "LLVM_PROFILE_FILE";
+    if let Some(val) = std::env::var_os(var) {
+        filled_cmd = filled_cmd.env(var, val);
+    }
+
+    const RUST_LOG_KEY: &str = "RUST_LOG";
+    if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
+        filled_cmd.env(RUST_LOG_KEY, rust_log_value)
+    } else {
+        filled_cmd
+    }
+}
+
+fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
+    for env_key in [
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
+    ] {
+        if let Ok(value) = std::env::var(env_key) {
+            cmd = cmd.env(env_key, value);
+        }
+    }
+    cmd
+}
+
+fn process_started<F>(
+    pid: Pid,
+    pid_file_to_check: Option<&Path>,
+    status_check: &F,
+) -> anyhow::Result<bool>
+where
+    F: Fn() -> anyhow::Result<bool>,
+{
+    match status_check() {
+        Ok(true) => match pid_file_to_check {
+            Some(pid_file_path) => {
+                if pid_file_path.exists() {
+                    let pid_in_file = read_pidfile(pid_file_path)?;
+                    Ok(pid_in_file == pid)
+                } else {
+                    Ok(false)
+                }
+            }
+            None => Ok(true),
+        },
+        Ok(false) => Ok(false),
+        Err(e) => anyhow::bail!("process failed to start: {e}"),
+    }
+}
+
+/// Read a PID file
+///
+/// We expect a file that contains a single integer.
+fn read_pidfile(pidfile: &Path) -> Result<Pid> {
+    let pid_str = fs::read_to_string(pidfile)
+        .with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
+    let pid: i32 = pid_str
+        .parse()
+        .map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
+    if pid < 1 {
+        bail!("pidfile {pidfile:?} contained bad value '{pid}'");
+    }
+    Ok(Pid::from_raw(pid))
+}
+
+fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
+    match kill(pid, None) {
+        // Process exists, keep waiting
+        Ok(_) => Ok(false),
+        // Process not found, we're done
+        Err(Errno::ESRCH) => Ok(true),
+        Err(err) => anyhow::bail!("Failed to send signal to process with pid {pid}: {err}"),
+    }
+}
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -9,8 +9,8 @@ use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
 use control_plane::compute::ComputeControlPlane;
 use control_plane::local_env::{EtcdBroker, LocalEnv};
+use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
-use control_plane::storage::PageServerNode;
 use control_plane::{etcd, local_env};
 use pageserver_api::models::TimelineInfo;
 use pageserver_api::{
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -12,15 +12,14 @@ use std::time::Duration;

 use anyhow::{Context, Result};
 use utils::{
-    connstring::connection_host_port,
    id::{TenantId, TimelineId},
    lsn::Lsn,
    postgres_backend::AuthType,
 };

 use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
+use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;
-use crate::storage::PageServerNode;

 //
 // ComputeControlPlane
@@ -300,7 +299,8 @@ impl PostgresNode {

        // Configure the node to fetch pages from pageserver
        let pageserver_connstr = {
-            let (host, port) = connection_host_port(&self.pageserver.pg_connection_config);
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());

            // Set up authentication
            //
@@ -319,7 +319,7 @@ impl PostgresNode {
            // uses only needed variables namely host, port, user, password.
            format!("postgresql://no_user:{password}@{host}:{port}")
        };
-        conf.append("shared_preload_libraries", "neon,pg_prewarm");
+        conf.append("shared_preload_libraries", "neon");
        conf.append_line("");
        conf.append("neon.pageserver_connstring", &pageserver_connstr);
        conf.append("neon.tenant_id", &self.tenant_id.to_string());
--- a/control_plane/src/connection.rs
+++ b/control_plane/src/connection.rs
@@ -0,0 +1,57 @@
+use url::Url;
+
+#[derive(Debug)]
+pub struct PgConnectionConfig {
+    url: Url,
+}
+
+impl PgConnectionConfig {
+    pub fn host(&self) -> &str {
+        self.url.host_str().expect("BUG: no host")
+    }
+
+    pub fn port(&self) -> u16 {
+        self.url.port().expect("BUG: no port")
+    }
+
+    /// Return a `<host>:<port>` string.
+    pub fn raw_address(&self) -> String {
+        format!("{}:{}", self.host(), self.port())
+    }
+
+    /// Connect using postgres protocol with TLS disabled.
+    pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
+        postgres::Client::connect(self.url.as_str(), postgres::NoTls)
+    }
+}
+
+impl std::str::FromStr for PgConnectionConfig {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let mut url: Url = s.parse()?;
+
+        match url.scheme() {
+            "postgres" | "postgresql" => {}
+            other => anyhow::bail!("invalid scheme: {other}"),
+        }
+
+        // It's not a valid connection url if host is unavailable.
+        if url.host().is_none() {
+            anyhow::bail!(url::ParseError::EmptyHost);
+        }
+
+        // E.g. `postgres:bar`.
+        if url.cannot_be_a_base() {
+            anyhow::bail!("URL cannot be a base");
+        }
+
+        // Set the default PG port if it's missing.
+        if url.port().is_none() {
+            url.set_port(Some(5432))
+                .expect("BUG: couldn't set the default port");
+        }
+
+        Ok(Self { url })
+    }
+}
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -1,99 +1,75 @@
-use std::{
-    fs,
-    path::PathBuf,
-    process::{Command, Stdio},
-};
+use std::{fs, path::PathBuf};

 use anyhow::Context;
-use nix::{
-    sys::signal::{kill, Signal},
-    unistd::Pid,
-};

-use crate::{local_env, read_pidfile};
+use crate::{background_process, local_env};

 pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_broker = &env.etcd_broker;
    println!(
-        "Starting etcd broker using {}",
-        etcd_broker.etcd_binary_path.display()
+        "Starting etcd broker using {:?}",
+        etcd_broker.etcd_binary_path
    );

    let etcd_data_dir = env.base_data_dir.join("etcd");
-    fs::create_dir_all(&etcd_data_dir).with_context(|| {
-        format!(
-            "Failed to create etcd data dir: {}",
-            etcd_data_dir.display()
-        )
-    })?;
+    fs::create_dir_all(&etcd_data_dir)
+        .with_context(|| format!("Failed to create etcd data dir {etcd_data_dir:?}"))?;

-    let etcd_stdout_file =
-        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
-            format!(
-                "Failed to create etcd stout file in directory {}",
-                etcd_data_dir.display()
-            )
-        })?;
-    let etcd_stderr_file =
-        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
-            format!(
-                "Failed to create etcd stderr file in directory {}",
-                etcd_data_dir.display()
-            )
-        })?;
    let client_urls = etcd_broker.comma_separated_endpoints();
+    let args = [
+        format!("--data-dir={}", etcd_data_dir.display()),
+        format!("--listen-client-urls={client_urls}"),
+        format!("--advertise-client-urls={client_urls}"),
+        // Set --quota-backend-bytes to keep the etcd virtual memory
+        // size smaller. Our test etcd clusters are very small.
+        // See https://github.com/etcd-io/etcd/issues/7910
+        "--quota-backend-bytes=100000000".to_string(),
+        // etcd doesn't compact (vacuum) with default settings,
+        // enable it to prevent space exhaustion.
+        "--auto-compaction-mode=revision".to_string(),
+        "--auto-compaction-retention=1".to_string(),
+    ];

-    let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
-        .args(&[
-            format!("--data-dir={}", etcd_data_dir.display()),
-            format!("--listen-client-urls={client_urls}"),
-            format!("--advertise-client-urls={client_urls}"),
-            // Set --quota-backend-bytes to keep the etcd virtual memory
-            // size smaller. Our test etcd clusters are very small.
-            // See https://github.com/etcd-io/etcd/issues/7910
-            "--quota-backend-bytes=100000000".to_string(),
-            // etcd doesn't compact (vacuum) with default settings,
-            // enable it to prevent space exhaustion.
-            "--auto-compaction-mode=revision".to_string(),
-            "--auto-compaction-retention=1".to_string(),
-        ])
-        .stdout(Stdio::from(etcd_stdout_file))
-        .stderr(Stdio::from(etcd_stderr_file))
-        .spawn()
-        .context("Failed to spawn etcd subprocess")?;
-    let pid = etcd_process.id();
+    let pid_file_path = etcd_pid_file_path(env);

-    let etcd_pid_file_path = etcd_pid_file_path(env);
-    fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
-        format!(
-            "Failed to create etcd pid file at {}",
-            etcd_pid_file_path.display()
-        )
-    })?;
+    let client = reqwest::blocking::Client::new();
+
+    background_process::start_process(
+        "etcd",
+        &etcd_data_dir,
+        &etcd_broker.etcd_binary_path,
+        &args,
+        background_process::InitialPidFile::Create(&pid_file_path),
+        || {
+            for broker_endpoint in &etcd_broker.broker_endpoints {
+                let request = broker_endpoint
+                    .join("health")
+                    .with_context(|| {
+                        format!(
+                            "Failed to append /health path to broker endopint {}",
+                            broker_endpoint
+                        )
+                    })
+                    .and_then(|url| {
+                        client.get(&url.to_string()).build().with_context(|| {
+                            format!("Failed to construct request to etcd endpoint {url}")
+                        })
+                    })?;
+                if client.execute(request).is_ok() {
+                    return Ok(true);
+                }
+            }
+
+            Ok(false)
+        },
+    )
+    .context("Failed to spawn etcd subprocess")?;

    Ok(())
 }

 pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
-    let etcd_path = &env.etcd_broker.etcd_binary_path;
-    println!("Stopping etcd broker at {}", etcd_path.display());
-
-    let etcd_pid_file_path = etcd_pid_file_path(env);
-    let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
-        format!(
-            "Failed to read etcd pid file at {}",
-            etcd_pid_file_path.display()
-        )
-    })?);
-
-    kill(pid, Signal::SIGTERM).with_context(|| {
-        format!(
-            "Failed to stop etcd with pid {pid} at {}",
-            etcd_pid_file_path.display()
-        )
-    })?;
-
-    Ok(())
+    background_process::stop_process(true, "etcd", &etcd_pid_file_path(env))
 }

 fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -6,59 +6,12 @@
 // Intended to be used in integration tests and in CLI tools for
 // local installations.
 //
-use anyhow::{anyhow, bail, Context, Result};
-use std::fs;
-use std::path::Path;
-use std::process::Command;

+mod background_process;
 pub mod compute;
+pub mod connection;
 pub mod etcd;
 pub mod local_env;
+pub mod pageserver;
 pub mod postgresql_conf;
 pub mod safekeeper;
-pub mod storage;
-
-/// Read a PID file
-///
-/// We expect a file that contains a single integer.
-/// We return an i32 for compatibility with libc and nix.
-pub fn read_pidfile(pidfile: &Path) -> Result<i32> {
-    let pid_str = fs::read_to_string(pidfile)
-        .with_context(|| format!("failed to read pidfile {:?}", pidfile))?;
-    let pid: i32 = pid_str
-        .parse()
-        .map_err(|_| anyhow!("failed to parse pidfile {:?}", pidfile))?;
-    if pid < 1 {
-        bail!("pidfile {:?} contained bad value '{}'", pidfile, pid);
-    }
-    Ok(pid)
-}
-
-fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
-    let cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");
-
-    let var = "LLVM_PROFILE_FILE";
-    if let Some(val) = std::env::var_os(var) {
-        cmd.env(var, val);
-    }
-
-    const RUST_LOG_KEY: &str = "RUST_LOG";
-    if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
-        cmd.env(RUST_LOG_KEY, rust_log_value)
-    } else {
-        cmd
-    }
-}
-
-fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in [
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_SESSION_TOKEN",
-    ] {
-        if let Ok(value) = std::env::var(env_key) {
-            cmd = cmd.env(env_key, value);
-        }
-    }
-    cmd
-}
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -226,12 +226,12 @@ impl LocalEnv {
        }
    }

-    pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
-        Ok(self.neon_distrib_dir.join("pageserver"))
+    pub fn pageserver_bin(&self) -> PathBuf {
+        self.neon_distrib_dir.join("pageserver")
    }

-    pub fn safekeeper_bin(&self) -> anyhow::Result<PathBuf> {
-        Ok(self.neon_distrib_dir.join("safekeeper"))
+    pub fn safekeeper_bin(&self) -> PathBuf {
+        self.neon_distrib_dir.join("safekeeper")
    }

    pub fn pg_data_dirs_path(&self) -> PathBuf {
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,33 +1,27 @@
 use std::collections::HashMap;
-use std::fs::File;
+use std::fs::{self, File};
 use std::io::{BufReader, Write};
 use std::num::NonZeroU64;
 use std::path::{Path, PathBuf};
-use std::process::Command;
-use std::time::Duration;
-use std::{io, result, thread};
+use std::process::Child;
+use std::{io, result};

+use crate::connection::PgConnectionConfig;
 use anyhow::{bail, Context};
-use nix::errno::Errno;
-use nix::sys::signal::{kill, Signal};
-use nix::unistd::Pid;
 use pageserver_api::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
-use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
 use utils::{
-    connstring::connection_address,
    http::error::HttpErrorBody,
    id::{TenantId, TimelineId},
    lsn::Lsn,
    postgres_backend::AuthType,
 };

-use crate::local_env::LocalEnv;
-use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
+use crate::{background_process, local_env::LocalEnv};

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -75,7 +69,7 @@ impl ResponseErrorMessageExt for Response {
 //
 #[derive(Debug)]
 pub struct PageServerNode {
-    pub pg_connection_config: Config,
+    pub pg_connection_config: PgConnectionConfig,
    pub env: LocalEnv,
    pub http_client: Client,
    pub http_base_url: String,
@@ -101,7 +95,7 @@ impl PageServerNode {
    }

    /// Construct libpq connection string for connecting to the pageserver.
-    fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
+    fn pageserver_connection_config(password: &str, listen_addr: &str) -> PgConnectionConfig {
        format!("postgresql://no_user:{password}@{listen_addr}/no_db")
            .parse()
            .unwrap()
@@ -161,7 +155,15 @@ impl PageServerNode {
            init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
        }

-        self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
+        let mut pageserver_process = self
+            .start_node(&init_config_overrides, &self.env.base_data_dir, true)
+            .with_context(|| {
+                format!(
+                    "Failed to start a process for pageserver {}",
+                    self.env.pageserver.id,
+                )
+            })?;
+
        let init_result = self
            .try_init_timeline(create_tenant, initial_timeline_id, pg_version)
            .context("Failed to create initial tenant and timeline for pageserver");
@@ -171,7 +173,29 @@ impl PageServerNode {
            }
            Err(e) => eprintln!("{e:#}"),
        }
-        self.stop(false)?;
+        match pageserver_process.kill() {
+            Err(e) => {
+                eprintln!(
+                    "Failed to stop pageserver {} process with pid {}: {e:#}",
+                    self.env.pageserver.id,
+                    pageserver_process.id(),
+                )
+            }
+            Ok(()) => {
+                println!(
+                    "Stopped pageserver {} process with pid {}",
+                    self.env.pageserver.id,
+                    pageserver_process.id(),
+                );
+                // cleanup after pageserver startup, since we do not call regular `stop_process` during init
+                let pid_file = self.pid_file();
+                if let Err(e) = fs::remove_file(&pid_file) {
+                    if e.kind() != io::ErrorKind::NotFound {
+                        eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
+                    }
+                }
+            }
+        }
        init_result
    }

@@ -196,11 +220,14 @@ impl PageServerNode {
        self.env.pageserver_data_dir()
    }

-    pub fn pid_file(&self) -> PathBuf {
+    /// The pid file is created by the pageserver process, with its pid stored inside.
+    /// Other pageservers cannot lock the same file and overwrite it for as long as the current
+    /// pageserver runs. (Unless someone removes the file manually; never do that!)
+    fn pid_file(&self) -> PathBuf {
        self.repo_path().join("pageserver.pid")
    }

-    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
        self.start_node(config_overrides, &self.repo_path(), false)
    }

@@ -209,10 +236,10 @@ impl PageServerNode {
        config_overrides: &[&str],
        datadir: &Path,
        update_config: bool,
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<Child> {
        println!(
            "Starting pageserver at '{}' in '{}'",
-            connection_address(&self.pg_connection_config),
+            self.pg_connection_config.raw_address(),
            datadir.display()
        );
        io::stdout().flush()?;
@@ -220,10 +247,7 @@ impl PageServerNode {
        let mut args = vec![
            "-D",
            datadir.to_str().with_context(|| {
-                format!(
-                    "Datadir path '{}' cannot be represented as a unicode string",
-                    datadir.display()
-                )
+                format!("Datadir path {datadir:?} cannot be represented as a unicode string")
            })?,
        ];

@@ -235,48 +259,18 @@ impl PageServerNode {
            args.extend(["-c", config_override]);
        }

-        let mut cmd = Command::new(self.env.pageserver_bin()?);
-        let mut filled_cmd = fill_rust_env_vars(cmd.args(&args).arg("--daemonize"));
-        filled_cmd = fill_aws_secrets_vars(filled_cmd);
-
-        if !filled_cmd.status()?.success() {
-            bail!(
-                "Pageserver failed to start. See console output and '{}' for details.",
-                datadir.join("pageserver.log").display()
-            );
-        }
-
-        // It takes a while for the page server to start up. Wait until it is
-        // open for business.
-        const RETRIES: i8 = 15;
-        for retries in 1..RETRIES {
-            match self.check_status() {
-                Ok(()) => {
-                    println!("\nPageserver started");
-                    return Ok(());
-                }
-                Err(err) => {
-                    match err {
-                        PageserverHttpError::Transport(err) => {
-                            if err.is_connect() && retries < 5 {
-                                print!(".");
-                                io::stdout().flush().unwrap();
-                            } else {
-                                if retries == 5 {
-                                    println!() // put a line break after dots for second message
-                                }
-                                println!("Pageserver not responding yet, err {err} retrying ({retries})...");
-                            }
-                        }
-                        PageserverHttpError::Response(msg) => {
-                            bail!("pageserver failed to start: {msg} ")
-                        }
-                    }
-                    thread::sleep(Duration::from_secs(1));
-                }
-            }
-        }
-        bail!("pageserver failed to start in {RETRIES} seconds");
+        background_process::start_process(
+            "pageserver",
+            datadir,
+            &self.env.pageserver_bin(),
+            &args,
+            background_process::InitialPidFile::Expect(&self.pid_file()),
+            || match self.check_status() {
+                Ok(()) => Ok(true),
+                Err(PageserverHttpError::Transport(_)) => Ok(false),
+                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+            },
+        )
    }

    ///
@@ -288,69 +282,18 @@ impl PageServerNode {
    /// If the server is not running, returns success
    ///
    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
-        let pid_file = self.pid_file();
-        if !pid_file.exists() {
-            println!("Pageserver is already stopped");
-            return Ok(());
-        }
-        let pid = Pid::from_raw(read_pidfile(&pid_file)?);
-
-        let sig = if immediate {
-            print!("Stopping pageserver immediately..");
-            Signal::SIGQUIT
-        } else {
-            print!("Stopping pageserver gracefully..");
-            Signal::SIGTERM
-        };
-        io::stdout().flush().unwrap();
-        match kill(pid, sig) {
-            Ok(_) => (),
-            Err(Errno::ESRCH) => {
-                println!("Pageserver with pid {pid} does not exist, but a PID file was found");
-                return Ok(());
-            }
-            Err(err) => bail!(
-                "Failed to send signal to pageserver with pid {pid}: {}",
-                err.desc()
-            ),
-        }
-
-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
-
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
-        }
-
-        bail!("Failed to stop pageserver with pid {pid}");
+        background_process::stop_process(immediate, "pageserver", &self.pid_file())
    }

    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
-        let mut client = self.pg_connection_config.connect(NoTls).unwrap();
+        let mut client = self.pg_connection_config.connect_no_tls().unwrap();

        println!("Pageserver query: '{sql}'");
        client.simple_query(sql).unwrap()
    }

    pub fn page_server_psql_client(&self) -> result::Result<postgres::Client, postgres::Error> {
-        self.pg_connection_config.connect(NoTls)
+        self.pg_connection_config.connect_no_tls()
    }

    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
@@ -549,7 +492,7 @@ impl PageServerNode {
        pg_wal: Option<(Lsn, PathBuf)>,
        pg_version: u32,
    ) -> anyhow::Result<()> {
-        let mut client = self.pg_connection_config.connect(NoTls).unwrap();
+        let mut client = self.pg_connection_config.connect_no_tls().unwrap();

        // Init base reader
        let (start_lsn, base_tarfile_path) = base;
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,23 +1,21 @@
 use std::io::Write;
 use std::path::PathBuf;
-use std::process::Command;
+use std::process::Child;
 use std::sync::Arc;
-use std::time::Duration;
-use std::{io, result, thread};
+use std::{io, result};

-use anyhow::bail;
-use nix::errno::Errno;
-use nix::sys::signal::{kill, Signal};
-use nix::unistd::Pid;
-use postgres::Config;
+use anyhow::Context;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};
+use utils::{http::error::HttpErrorBody, id::NodeId};

-use crate::local_env::{LocalEnv, SafekeeperConf};
-use crate::storage::PageServerNode;
-use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
+use crate::connection::PgConnectionConfig;
+use crate::pageserver::PageServerNode;
+use crate::{
+    background_process,
+    local_env::{LocalEnv, SafekeeperConf},
+};

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
@@ -63,7 +61,7 @@ pub struct SafekeeperNode {

    pub conf: SafekeeperConf,

-    pub pg_connection_config: Config,
+    pub pg_connection_config: PgConnectionConfig,
    pub env: LocalEnv,
    pub http_client: Client,
    pub http_base_url: String,
@@ -87,15 +85,15 @@ impl SafekeeperNode {
    }

    /// Construct libpq connection string for connecting to this safekeeper.
-    fn safekeeper_connection_config(port: u16) -> Config {
+    fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
        // TODO safekeeper authentication not implemented yet
-        format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
+        format!("postgresql://no_user@127.0.0.1:{port}/no_db")
            .parse()
            .unwrap()
    }

    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
-        env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
+        env.safekeeper_data_dir(&format!("sk{sk_id}"))
    }

    pub fn datadir_path(&self) -> PathBuf {
@@ -106,91 +104,78 @@ impl SafekeeperNode {
        self.datadir_path().join("safekeeper.pid")
    }

-    pub fn start(&self) -> anyhow::Result<()> {
+    pub fn start(&self) -> anyhow::Result<Child> {
        print!(
            "Starting safekeeper at '{}' in '{}'",
-            connection_address(&self.pg_connection_config),
+            self.pg_connection_config.raw_address(),
            self.datadir_path().display()
        );
        io::stdout().flush().unwrap();

        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
+        let id = self.id;
+        let datadir = self.datadir_path();

-        let mut cmd = Command::new(self.env.safekeeper_bin()?);
-        fill_rust_env_vars(
-            cmd.args(&["-D", self.datadir_path().to_str().unwrap()])
-                .args(&["--id", self.id.to_string().as_ref()])
-                .args(&["--listen-pg", &listen_pg])
-                .args(&["--listen-http", &listen_http])
-                .arg("--daemonize"),
-        );
+        let id_string = id.to_string();
+        let mut args = vec![
+            "-D",
+            datadir.to_str().with_context(|| {
+                format!("Datadir path {datadir:?} cannot be represented as a unicode string")
+            })?,
+            "--id",
+            &id_string,
+            "--listen-pg",
+            &listen_pg,
+            "--listen-http",
+            &listen_http,
+        ];
        if !self.conf.sync {
-            cmd.arg("--no-sync");
+            args.push("--no-sync");
        }

        let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
        if !comma_separated_endpoints.is_empty() {
-            cmd.args(&["--broker-endpoints", &comma_separated_endpoints]);
+            args.extend(["--broker-endpoints", &comma_separated_endpoints]);
        }
        if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
-            cmd.args(&["--broker-etcd-prefix", prefix]);
+            args.extend(["--broker-etcd-prefix", prefix]);
        }
+
+        let mut backup_threads = String::new();
        if let Some(threads) = self.conf.backup_threads {
-            cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
+            backup_threads = threads.to_string();
+            args.extend(["--backup-threads", &backup_threads]);
+        } else {
+            drop(backup_threads);
        }
+
        if let Some(ref remote_storage) = self.conf.remote_storage {
-            cmd.args(&["--remote-storage", remote_storage]);
+            args.extend(["--remote-storage", remote_storage]);
        }
+
+        let key_path = self.env.base_data_dir.join("auth_public_key.pem");
        if self.conf.auth_enabled {
-            cmd.arg("--auth-validation-public-key-path");
-            // PathBuf is better be passed as is, not via `String`.
-            cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
+            args.extend([
+                "--auth-validation-public-key-path",
+                key_path.to_str().with_context(|| {
+                    format!("Key path {key_path:?} cannot be represented as a unicode string")
+                })?,
+            ]);
        }

-        fill_aws_secrets_vars(&mut cmd);
-
-        if !cmd.status()?.success() {
-            bail!(
-                "Safekeeper failed to start. See '{}' for details.",
-                self.datadir_path().join("safekeeper.log").display()
-            );
-        }
-
-        // It takes a while for the safekeeper to start up. Wait until it is
-        // open for business.
-        const RETRIES: i8 = 15;
-        for retries in 1..RETRIES {
-            match self.check_status() {
-                Ok(_) => {
-                    println!("\nSafekeeper started");
-                    return Ok(());
-                }
-                Err(err) => {
-                    match err {
-                        SafekeeperHttpError::Transport(err) => {
-                            if err.is_connect() && retries < 5 {
-                                print!(".");
-                                io::stdout().flush().unwrap();
-                            } else {
-                                if retries == 5 {
-                                    println!() // put a line break after dots for second message
-                                }
-                                println!(
-                                    "Safekeeper not responding yet, err {} retrying ({})...",
-                                    err, retries
-                                );
-                            }
-                        }
-                        SafekeeperHttpError::Response(msg) => {
-                            bail!("safekeeper failed to start: {} ", msg)
-                        }
-                    }
-                    thread::sleep(Duration::from_secs(1));
-                }
-            }
-        }
-        bail!("safekeeper failed to start in {} seconds", RETRIES);
+        background_process::start_process(
+            &format!("safekeeper {id}"),
+            &datadir,
+            &self.env.safekeeper_bin(),
+            &args,
+            background_process::InitialPidFile::Expect(&self.pid_file()),
+            || match self.check_status() {
+                Ok(()) => Ok(true),
+                Err(SafekeeperHttpError::Transport(_)) => Ok(false),
+                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+            },
+        )
    }

    ///
@@ -202,63 +187,11 @@ impl SafekeeperNode {
    /// If the server is not running, returns success
    ///
    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
-        let pid_file = self.pid_file();
-        if !pid_file.exists() {
-            println!("Safekeeper {} is already stopped", self.id);
-            return Ok(());
-        }
-        let pid = read_pidfile(&pid_file)?;
-        let pid = Pid::from_raw(pid);
-
-        let sig = if immediate {
-            print!("Stopping safekeeper {} immediately..", self.id);
-            Signal::SIGQUIT
-        } else {
-            print!("Stopping safekeeper {} gracefully..", self.id);
-            Signal::SIGTERM
-        };
-        io::stdout().flush().unwrap();
-        match kill(pid, sig) {
-            Ok(_) => (),
-            Err(Errno::ESRCH) => {
-                println!(
-                    "Safekeeper with pid {} does not exist, but a PID file was found",
-                    pid
-                );
-                return Ok(());
-            }
-            Err(err) => bail!(
-                "Failed to send signal to safekeeper with pid {}: {}",
-                pid,
-                err.desc()
-            ),
-        }
-
-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
-
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
-        }
-
-        bail!("Failed to stop safekeeper with pid {}", pid);
+        background_process::stop_process(
+            immediate,
+            &format!("safekeeper {}", self.id),
+            &self.pid_file(),
+        )
    }

    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
--- a/docs/rfcs/020-pageserver-s3-coordination.md
+++ b/docs/rfcs/020-pageserver-s3-coordination.md
@@ -0,0 +1,246 @@
+# Coordinating access of multiple pageservers to the same s3 data
+
+## Motivation
+
+There are some blind spots around coordinating access of multiple pageservers
+to the same s3 data. Currently this is applicable only to tenant relocation
+case, but in the future we'll need to solve similar problems for
+replica/standby pageservers.
+
+## Impacted components (e.g. pageserver, safekeeper, console, etc)
+
+Pageserver
+
+## The problem
+
+### Relocation
+
+During relocation both pageservers can write to s3. This should be ok for all
+data except the `index_part.json`. For index part it causes problems during
+compaction/gc because they remove files from index/s3.
+
+Imagine this case:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant PS1
+    participant S3
+    participant PS2
+
+    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2
+    PS2->>S3: Attach called, sees L1, L2
+    PS1->>S3: Compaction comes <br/> Removes L1, adds L3
+    note over S3: Index now L2, L3
+    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)
+    note over S3: Index now L1, L2, L4
+```
+
+At this point it is not possible to restore from index, it contains L2 which
+is no longer available in s3 and doesnt contain L3 added by compaction by the
+first pageserver. So if any of the pageservers restart initial sync will fail
+(or in on-demand world it will fail a bit later during page request from
+missing layer)
+
+### Standby pageserver
+
+Another related case is standby pageserver. In this case second pageserver can
+be used as a replica to scale reads and serve as a failover target in case
+first one fails.
+
+In this mode second pageserver needs to have the same picture of s3 files to
+be able to load layers on-demand. To accomplish that second pageserver
+cannot run gc/compaction jobs. Instead it needs to receive updates for index
+contents. (There is no need to run walreceiver on the second pageserver then).
+
+## Observations
+
+- If both pageservers ingest wal then their layer set diverges, because layer
+  file generation is not deterministic
+- If one of the pageservers does not ingest wal (and just picks up layer
+  updates) then it lags behind and cannot really answer queries in the same
+  pace as the primary one
+- Can compaction help make layers deterministic? E g we do not upload level
+  zero layers and construction of higher levels should be deterministic.
+  This way we can guarantee that layer creation by timeout wont mess things up.
+  This way one pageserver uploads data and second one can just ingest it.
+  But we still need some form of election
+
+## Solutions
+
+### Manual orchestration
+
+One possible solution for relocation case is to orchestrate background jobs
+from outside. The oracle who runs migration can turn off background jobs on
+PS1 before migration and then run migration -> enable them on PS2. The problem
+comes if migration fails. In this case in order to resume background jobs
+oracle needs to guarantee that PS2 doesnt run background jobs and if it doesnt
+respond then PS1 is stuck unable to run compaction/gc. This cannot be solved
+without human ensuring that no upload from PS2 can happen. In order to be able
+to resolve this automatically CAS is required on S3 side so pageserver can
+avoid overwriting index part if it is no longer the leading one
+
+Note that flag that disables background jobs needs to be persistent, because
+otherwise pageserver restart will clean it
+
+### Avoid index_part.json
+
+Index part consists of two parts, list of layers and metadata. List of layers
+can be easily obtained by `ListObjects` S3 API method. But what to do with
+metadata? Create metadata instance for each checkpoint and add some counter
+to the file name?
+
+Back to potentially long s3 ls.
+
+### Coordination based approach
+
+Do it like safekeepers chose leader for WAL upload. Ping each other and decide
+based on some heuristics e g smallest node id. During relocation PS1 sends
+"resign" ping message so others can start election without waiting for a timeout.
+
+This still leaves metadata question open and non deterministic layers are a
+problem as well
+
+### Avoid metadata file
+
+One way to eliminate metadata file is to store it in layer files under some
+special key. This may resonate with intention to keep all relation sizes in
+some special segment to avoid initial download during size calculation.
+Maybe with that we can even store pre calculated value.
+
+As a downside each checkpoint gets 512 bytes larger.
+
+If we entirely avoid metadata file this opens up many approaches
+
+* * *
+
+During discussion it seems that we converged on the approach consisting of:
+
+- index files stored per pageserver in the same timeline directory. With that
+  index file name starts to look like: `<pageserver_node_id>_index_part.json`.
+  In such set up there are no concurrent overwrites of index file by different
+  pageservers.
+- For replica pageservers the solution would be for primary to broadcast index
+  changes to any followers with an ability to check index files in s3 and
+  restore the full state. To properly merge changes with index files we can use
+  a counter that is persisted in an index file, is incremented on every change
+  to it and passed along with broadcasted change. This way we can determine
+  whether we need to apply change to the index state or not.
+- Responsibility for running background jobs is assigned externally. Pageserver
+  keeps locally persistent flag for each tenant that indicates whether this
+  pageserver is considered as primary one or not. TODO what happends if we
+  crash and cannot start for some extended period of time? Control plane can
+  assign ownership to some other pageserver. Pageserver needs some way to check
+  if its still the blessed one. Maybe by explicit request to control plane on
+  start.
+
+Requirement for deterministic layer generation was considered overly strict
+because of two reasons:
+
+- It can limit possible optimizations e g when pageserver wants to reshuffle
+  some data locally and doesnt want to coordinate this
+- The deterministic algorithm itself can change so during deployments for some
+  time there will be two different version running at the same time which can
+  cause non determinism
+
+### External elections
+
+The above case with lost state in this schema with externally managed
+leadership is represented like this:
+
+Note that here we keep objects list in the index file.
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant PS1
+    participant CP as Control Plane
+    participant S3
+    participant PS2
+
+    note over PS1,PS2: PS1 starts up and still a leader
+    PS1->>CP: Am I still the leader for Tenant X?
+    activate CP
+    CP->>PS1: Yes
+    deactivate CP
+    PS1->>S3: Fetch PS1 index.
+    note over PS1: Continue operations, start backround jobs
+    note over PS1,PS2: PS1 starts up and still and is not a leader anymore
+    PS1->>CP: Am I still the leader for Tenant X?
+    CP->>PS1: No
+    PS1->>PS2: Subscribe to index changes
+    PS1->>S3: Fetch PS1 and PS2 indexes
+    note over PS1: Combine index file to include layers <br> from both indexes to be able <br> to see newer files from leader (PS2)
+    note over PS1: Continue operations, do not start background jobs
+```
+
+### Internal elections
+
+To manage leadership internally we can use broker to exchange pings so nodes
+can decide on the leader roles. In case multiple pageservers are active leader
+is the one with lowest node id.
+
+Operations with internally managed elections:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant PS1
+    participant S3
+
+    note over PS1: Starts up
+    note over PS1: Subscribes to changes, waits for two ping <br> timeouts to see if there is a leader
+    PS1->>S3: Fetch indexes from s3
+    alt there is a leader
+        note over PS1: do not start background jobs, <br> continue applying index updates
+    else there is no leader
+        note over PS1: start background jobs, <br> broadcast index changes
+    end
+
+    note over PS1,S3: Then the picture is similar to external elections <br> the difference is that follower can become a leader <br> if there are no pings after some timeout new leader gets elected
+```
+
+### Eviction
+
+When two pageservers operate on a tenant for extended period of time follower
+doesnt perform write operations in s3. When layer is evicted follower relies
+on updates from primary to get info about layers it needs to cover range for
+evicted layer.
+
+Note that it wont match evicted layer exactly, so layers will overlap and
+lookup code needs to correctly handle that.
+
+### Relocation flow
+
+Actions become:
+
+- Attach tenant to new pageserver
+- New pageserver becomes follower since previous one is still leading
+- New pageserver starts replicating from safekeepers but does not upload layers
+- Detach is called on the old one
+- New pageserver becomes leader after it realizes that old one disappeared
+
+### Index File
+
+Using `s3 ls` on startup simplifies things, but we still need metadata, so we
+need to fetch index files anyway. If they contain list of files we can combine
+them and avoid costly `s3 ls`
+
+### Remaining issues
+
+- More than one remote consistent lsn for safekeepers to know
+
+Anything else?
+
+### Proposed solution
+
+To recap. On meeting we converged on approach with external elections but I
+think it will be overall harder to manage and will introduce a dependency on
+control plane for pageserver. Using separate index files for each pageserver
+consisting of log of operations and a metadata snapshot should be enough.
+
+### What we need to get there?
+
+- Change index file structure to contain log of changes instead of just the
+  file list
+- Implement pinging/elections for pageservers
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -52,6 +52,10 @@ PostgreSQL extension that implements storage manager API and network communicati

 PostgreSQL extension that contains functions needed for testing and debugging.

+`/pgxn/neon_walredo`:
+
+Library to run Postgres as a "WAL redo process" in the pageserver.
+
 `/safekeeper`:

 The neon WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -230,7 +230,6 @@ pub enum PagestreamFeMessage {
    Nblocks(PagestreamNblocksRequest),
    GetPage(PagestreamGetPageRequest),
    DbSize(PagestreamDbSizeRequest),
-    Fcntl(PagestreamFcntlRequest),
 }

 // Wrapped in libpq CopyData
@@ -271,12 +270,6 @@ pub struct PagestreamDbSizeRequest {
    pub dbnode: u32,
 }

-#[derive(Debug)]
-pub struct PagestreamFcntlRequest {
-    pub cmd: u32,
-    pub data: Bytes,
-}
-
 #[derive(Debug)]
 pub struct PagestreamExistsResponse {
    pub exists: bool,
@@ -348,14 +341,6 @@ impl PagestreamFeMessage {
                lsn: Lsn::from(body.get_u64()),
                dbnode: body.get_u32(),
            })),
-            4 => {
-                let cmd = body.get_u32();
-                let size = body.get_u32() as usize;
-                Ok(PagestreamFeMessage::Fcntl(PagestreamFcntlRequest {
-                    cmd,
-                    data: body.copy_to_bytes(size),
-                }))
-            }
            _ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
        }
    }
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -232,6 +232,3 @@ pub const PGDATA_SPECIAL_FILES: [&str; 3] =
    ["pg_hba.conf", "pg_ident.conf", "postgresql.auto.conf"];

 pub static PG_HBA: &str = include_str!("../samples/pg_hba.conf");
-
-pub static AUTOPREWARM_FILE_NAME: &str = "autoprewarm.blocks";
-pub const SMGR_FCNTL_CACHE_SNAPSHOT: u32 = 1;
--- a/libs/utils/src/connstring.rs
+++ b/libs/utils/src/connstring.rs
@@ -1,52 +0,0 @@
-use postgres::Config;
-
-pub fn connection_host_port(config: &Config) -> (String, u16) {
-    assert_eq!(
-        config.get_hosts().len(),
-        1,
-        "only one pair of host and port is supported in connection string"
-    );
-    assert_eq!(
-        config.get_ports().len(),
-        1,
-        "only one pair of host and port is supported in connection string"
-    );
-    let host = match &config.get_hosts()[0] {
-        postgres::config::Host::Tcp(host) => host.as_ref(),
-        postgres::config::Host::Unix(host) => host.to_str().unwrap(),
-    };
-    (host.to_owned(), config.get_ports()[0])
-}
-
-pub fn connection_address(config: &Config) -> String {
-    let (host, port) = connection_host_port(config);
-    format!("{}:{}", host, port)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_connection_host_port() {
-        let config: Config = "postgresql://no_user@localhost:64000/no_db"
-            .parse()
-            .unwrap();
-        assert_eq!(
-            connection_host_port(&config),
-            ("localhost".to_owned(), 64000)
-        );
-    }
-
-    #[test]
-    #[should_panic(expected = "only one pair of host and port is supported in connection string")]
-    fn test_connection_host_port_multiple_ports() {
-        let config: Config = "postgresql://no_user@localhost:64000,localhost:64001/no_db"
-            .parse()
-            .unwrap();
-        assert_eq!(
-            connection_host_port(&config),
-            ("localhost".to_owned(), 64000)
-        );
-    }
-}
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -19,9 +19,6 @@ pub mod postgres_backend;
 pub mod postgres_backend_async;
 pub mod pq_proto;

-// dealing with connstring parsing and handy access to it's parts
-pub mod connstring;
-
 // helper functions for creating and fsyncing
 pub mod crashsafe;

@@ -39,6 +36,8 @@ pub mod sock_split;
 // common log initialisation routine
 pub mod logging;

+pub mod lock_file;
+
 // Misc
 pub mod accum;
 pub mod shutdown;
--- a/libs/utils/src/lock_file.rs
+++ b/libs/utils/src/lock_file.rs
@@ -0,0 +1,81 @@
+//! A module to create and read lock files. A lock file ensures that only one
+//! process is running at a time, in a particular directory.
+//!
+//! File locking is done using [`fcntl::flock`], which means that holding the
+//! lock on file only prevents acquiring another lock on it; all other
+//! operations are still possible on files. Other process can still open, read,
+//! write, or remove the file, for example.
+//! If the file is removed while a process is holding a lock on it,
+//! the process that holds the lock does not get any error or notification.
+//! Furthermore, you can create a new file with the same name and lock the new file,
+//! while the old process is still running.
+//! Deleting the lock file while the locking process is still running is a bad idea!
+
+use std::{fs, os::unix::prelude::AsRawFd, path::Path};
+
+use anyhow::Context;
+use nix::fcntl;
+
+use crate::crashsafe;
+
+pub enum LockCreationResult {
+    Created {
+        new_lock_contents: String,
+        file: fs::File,
+    },
+    AlreadyLocked {
+        existing_lock_contents: String,
+    },
+    CreationFailed(anyhow::Error),
+}
+
+/// Creates a lock file in the path given and writes the given contents into the file.
+/// Note: The lock is automatically released when the file closed. You might want to use Box::leak to make sure it lives until the end of the program.
+pub fn create_lock_file(lock_file_path: &Path, contents: String) -> LockCreationResult {
+    let lock_file = match fs::OpenOptions::new()
+        .create(true) // O_CREAT
+        .write(true)
+        .open(lock_file_path)
+        .context("Failed to open lock file")
+    {
+        Ok(file) => file,
+        Err(e) => return LockCreationResult::CreationFailed(e),
+    };
+
+    match fcntl::flock(
+        lock_file.as_raw_fd(),
+        fcntl::FlockArg::LockExclusiveNonblock,
+    ) {
+        Ok(()) => {
+            match lock_file
+                .set_len(0)
+                .context("Failed to truncate lockfile")
+                .and_then(|()| {
+                    fs::write(lock_file_path, &contents).with_context(|| {
+                        format!("Failed to write '{contents}' contents into lockfile")
+                    })
+                })
+                .and_then(|()| {
+                    crashsafe::fsync_file_and_parent(lock_file_path)
+                        .context("Failed to fsync lockfile")
+                }) {
+                Ok(()) => LockCreationResult::Created {
+                    new_lock_contents: contents,
+                    file: lock_file,
+                },
+                Err(e) => LockCreationResult::CreationFailed(e),
+            }
+        }
+        Err(nix::errno::Errno::EAGAIN) => {
+            match fs::read_to_string(lock_file_path).context("Failed to read lockfile contents") {
+                Ok(existing_lock_contents) => LockCreationResult::AlreadyLocked {
+                    existing_lock_contents,
+                },
+                Err(e) => LockCreationResult::CreationFailed(e),
+            }
+        }
+        Err(e) => {
+            LockCreationResult::CreationFailed(anyhow::anyhow!("Failed to lock lockfile: {e}"))
+        }
+    }
+}
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,10 +1,6 @@
-use std::{
-    fs::{File, OpenOptions},
-    path::Path,
-    str::FromStr,
-};
+use std::str::FromStr;

-use anyhow::{Context, Result};
+use anyhow::Context;
 use strum_macros::{EnumString, EnumVariantNames};

 #[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
@@ -25,19 +21,8 @@ impl LogFormat {
        })
    }
 }
-pub fn init(
-    log_filename: impl AsRef<Path>,
-    daemonize: bool,
-    log_format: LogFormat,
-) -> Result<File> {
-    // Don't open the same file for output multiple times;
-    // the different fds could overwrite each other's output.
-    let log_file = OpenOptions::new()
-        .create(true)
-        .append(true)
-        .open(&log_filename)
-        .with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?;

+pub fn init(log_format: LogFormat) -> anyhow::Result<()> {
    let default_filter_str = "info";

    // We fall back to printing all spans at info-level or above if
@@ -45,50 +30,16 @@ pub fn init(
    let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));

-    let x: File = log_file.try_clone().unwrap();
    let base_logger = tracing_subscriber::fmt()
        .with_env_filter(env_filter)
        .with_target(false)
        .with_ansi(false)
-        .with_writer(move || -> Box<dyn std::io::Write> {
-            // we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
-            // if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
-            // for example to be in line with docker log command which expects logs comimg from stdout
-            if daemonize {
-                Box::new(x.try_clone().unwrap())
-            } else {
-                Box::new(std::io::stdout())
-            }
-        });
+        .with_writer(std::io::stdout);

    match log_format {
        LogFormat::Json => base_logger.json().init(),
        LogFormat::Plain => base_logger.init(),
    }

-    Ok(log_file)
-}
-
-// #[cfg(test)]
-// Due to global logger, can't run tests in same process.
-// So until there's a non-global one, the tests are in ../tests/ as separate files.
-#[macro_export(local_inner_macros)]
-macro_rules! test_init_file_logger {
-    ($log_level:expr, $log_format:expr) => {{
-        use std::str::FromStr;
-        std::env::set_var("RUST_LOG", $log_level);
-
-        let tmp_dir = tempfile::TempDir::new().unwrap();
-        let log_file_path = tmp_dir.path().join("logfile");
-
-        let log_format = $crate::logging::LogFormat::from_str($log_format).unwrap();
-        let _log_file = $crate::logging::init(&log_file_path, true, log_format).unwrap();
-
-        let log_file = std::fs::OpenOptions::new()
-            .read(true)
-            .open(&log_file_path)
-            .unwrap();
-
-        log_file
-    }};
+    Ok(())
 }
--- a/libs/utils/tests/logger_json_test.rs
+++ b/libs/utils/tests/logger_json_test.rs
@@ -1,36 +0,0 @@
-// This could be in ../src/logging.rs but since the logger is global, these
-// can't be run in threads of the same process
-use std::fs::File;
-use std::io::{BufRead, BufReader, Lines};
-use tracing::*;
-use utils::test_init_file_logger;
-
-fn read_lines(file: File) -> Lines<BufReader<File>> {
-    BufReader::new(file).lines()
-}
-
-#[test]
-fn test_json_format_has_message_and_custom_field() {
-    std::env::set_var("RUST_LOG", "info");
-
-    let log_file = test_init_file_logger!("info", "json");
-
-    let custom_field: &str = "hi";
-    trace!(custom = %custom_field, "test log message");
-    debug!(custom = %custom_field, "test log message");
-    info!(custom = %custom_field, "test log message");
-    warn!(custom = %custom_field, "test log message");
-    error!(custom = %custom_field, "test log message");
-
-    let lines = read_lines(log_file);
-    for line in lines {
-        let content = line.unwrap();
-        let json_object = serde_json::from_str::<serde_json::Value>(&content).unwrap();
-
-        assert_eq!(json_object["fields"]["custom"], "hi");
-        assert_eq!(json_object["fields"]["message"], "test log message");
-
-        assert_ne!(json_object["level"], "TRACE");
-        assert_ne!(json_object["level"], "DEBUG");
-    }
-}
--- a/libs/utils/tests/logger_plain_test.rs
+++ b/libs/utils/tests/logger_plain_test.rs
@@ -1,36 +0,0 @@
-// This could be in ../src/logging.rs but since the logger is global, these
-// can't be run in threads of the same process
-use std::fs::File;
-use std::io::{BufRead, BufReader, Lines};
-use tracing::*;
-use utils::test_init_file_logger;
-
-fn read_lines(file: File) -> Lines<BufReader<File>> {
-    BufReader::new(file).lines()
-}
-
-#[test]
-fn test_plain_format_has_message_and_custom_field() {
-    std::env::set_var("RUST_LOG", "warn");
-
-    let log_file = test_init_file_logger!("warn", "plain");
-
-    let custom_field: &str = "hi";
-    trace!(custom = %custom_field, "test log message");
-    debug!(custom = %custom_field, "test log message");
-    info!(custom = %custom_field, "test log message");
-    warn!(custom = %custom_field, "test log message");
-    error!(custom = %custom_field, "test log message");
-
-    let lines = read_lines(log_file);
-    for line in lines {
-        let content = line.unwrap();
-        serde_json::from_str::<serde_json::Value>(&content).unwrap_err();
-        assert!(content.contains("custom=hi"));
-        assert!(content.contains("test log message"));
-
-        assert!(!content.contains("TRACE"));
-        assert!(!content.contains("DEBUG"));
-        assert!(!content.contains("INFO"));
-    }
-}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -24,7 +24,6 @@ hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
 clap = { version = "4.0", features = ["string"] }
-daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
 tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
 postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -17,7 +17,6 @@ use itertools::Itertools;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
-use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
@@ -26,10 +25,8 @@ use tracing::*;
 use crate::tenant::Timeline;
 use pageserver_api::reltag::{RelTag, SlruKind};

-use postgres_ffi::pg_constants::{
-    AUTOPREWARM_FILE_NAME, PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA,
-};
 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
+use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
 use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
@@ -148,7 +145,6 @@ where
                self.ar.append(&header, &mut io::empty())?;
            }
        }
-        self.add_prewarm_file()?;

        // Gather non-relational files from object storage pages.
        for kind in [
@@ -222,21 +218,6 @@ where
        Ok(())
    }

-    //
-    // Include "autoprewarm-bin.blocks" in archive (if exists)
-    //
-    fn add_prewarm_file(&mut self) -> anyhow::Result<()> {
-        let path = self
-            .timeline
-            .conf
-            .timeline_path(&self.timeline.timeline_id, &self.timeline.tenant_id)
-            .join(AUTOPREWARM_FILE_NAME);
-        if PathBuf::from(&path).exists() {
-            self.ar.append_path_with_name(path, AUTOPREWARM_FILE_NAME)?;
-        }
-        Ok(())
-    }
-
    //
    // Generate SLRU segment files from repository.
    //
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -1,17 +1,14 @@
 //! Main entry point for the Page Server executable.

-use remote_storage::GenericRemoteStorage;
 use std::{env, ops::ControlFlow, path::Path, str::FromStr};
+
+use anyhow::{anyhow, Context};
+use clap::{Arg, ArgAction, Command};
+use fail::FailScenario;
+use nix::unistd::Pid;
 use tracing::*;

-use anyhow::{anyhow, bail, Context, Result};
-
-use clap::{Arg, ArgAction, Command};
-use daemonize::Daemonize;
-
-use fail::FailScenario;
 use metrics::set_build_info_metric;
-
 use pageserver::{
    config::{defaults::*, PageServerConf},
    http, page_cache, page_service, profiling, task_mgr,
@@ -19,20 +16,22 @@ use pageserver::{
    task_mgr::{
        BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
    },
-    tenant_mgr, virtual_file, LOG_FILE_NAME,
+    tenant_mgr, virtual_file,
 };
+use remote_storage::GenericRemoteStorage;
 use utils::{
    auth::JwtAuth,
-    logging,
+    lock_file, logging,
    postgres_backend::AuthType,
    project_git_version,
-    shutdown::exit_now,
    signals::{self, Signal},
    tcp_listener,
 };

 project_git_version!(GIT_VERSION);

+const PID_FILE_NAME: &str = "pageserver.pid";
+
 const FEATURES: &[&str] = &[
    #[cfg(feature = "testing")]
    "testing",
@@ -65,6 +64,7 @@ fn main() -> anyhow::Result<()> {
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
+
    let cfg_file_path = workdir.join("pageserver.toml");

    // Set CWD to workdir for non-daemon modes
@@ -75,8 +75,6 @@ fn main() -> anyhow::Result<()> {
        )
    })?;

-    let daemonize = arg_matches.get_flag("daemonize");
-
    let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
        ControlFlow::Continue(conf) => conf,
        ControlFlow::Break(()) => {
@@ -102,7 +100,7 @@ fn main() -> anyhow::Result<()> {
    virtual_file::init(conf.max_file_descriptors);
    page_cache::init(conf.page_cache_size);

-    start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
+    start_pageserver(conf).context("Failed to start pageserver")?;

    scenario.teardown();
    Ok(())
@@ -197,12 +195,34 @@ fn initialize_config(
    })
 }

-fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
-    // Initialize logger
-    let log_file = logging::init(LOG_FILE_NAME, daemonize, conf.log_format)?;
-
+fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
+    logging::init(conf.log_format)?;
    info!("version: {}", version());

+    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
+    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
+        lock_file::LockCreationResult::Created {
+            new_lock_contents,
+            file,
+        } => {
+            info!("Created lock file at {lock_file_path:?} with contents {new_lock_contents}");
+            file
+        }
+        lock_file::LockCreationResult::AlreadyLocked {
+            existing_lock_contents,
+        } => anyhow::bail!(
+            "Could not lock pid file; pageserver is already running in {:?} with PID {}",
+            conf.workdir,
+            existing_lock_contents
+        ),
+        lock_file::LockCreationResult::CreationFailed(e) => {
+            return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
+        }
+    };
+    // ensure that the lock file is held even if the main thread of the process is panics
+    // we need to release the lock file only when the current process is gone
+    let _ = Box::leak(Box::new(lock_file));
+
    // TODO: Check that it looks like a valid repository before going further

    // bind sockets before daemonizing so we report errors early and do not return until we are listening
@@ -218,33 +238,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    );
    let pageserver_listener = tcp_listener::bind(conf.listen_pg_addr.clone())?;

-    // NB: Don't spawn any threads before daemonizing!
-    if daemonize {
-        info!("daemonizing...");
-
-        // There shouldn't be any logging to stdin/stdout. Redirect it to the main log so
-        // that we will see any accidental manual fprintf's or backtraces.
-        let stdout = log_file
-            .try_clone()
-            .with_context(|| format!("Failed to clone log file '{:?}'", log_file))?;
-        let stderr = log_file;
-
-        let daemonize = Daemonize::new()
-            .pid_file("pageserver.pid")
-            .working_directory(".")
-            .stdout(stdout)
-            .stderr(stderr);
-
-        // XXX: The parent process should exit abruptly right after
-        // it has spawned a child to prevent coverage machinery from
-        // dumping stats into a `profraw` file now owned by the child.
-        // Otherwise, the coverage data will be damaged.
-        match daemonize.exit_action(|| exit_now(0)).start() {
-            Ok(_) => info!("Success, daemonized"),
-            Err(err) => bail!("{err}. could not daemonize. bailing."),
-        }
-    }
-
    let signals = signals::install_shutdown_handlers()?;

    // start profiler (if enabled)
@@ -347,14 +340,6 @@ fn cli() -> Command {
    Command::new("Neon page server")
        .about("Materializes WAL stream to pages and serves them to the postgres")
        .version(version())
-        .arg(
-
-            Arg::new("daemonize")
-                .short('d')
-                .long("daemonize")
-                .action(ArgAction::SetTrue)
-                .help("Run in the background"),
-        )
        .arg(
            Arg::new("init")
                .long("init")
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -227,13 +227,10 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,

    let state = get_state(&request);

-    let timelines = tokio::task::spawn_blocking(move || {
-        let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
+    let timelines = info_span!("timeline_list", tenant = %tenant_id).in_scope(|| {
        let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
        Ok(tenant.list_timelines())
-    })
-    .await
-    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
+    })?;

    let mut response_data = Vec::with_capacity(timelines.len());
    for timeline in timelines {
@@ -523,9 +520,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
    check_permission(&request, Some(tenant_id))?;

    // if tenant is in progress of downloading it can be absent in global tenant map
-    let tenant = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant(tenant_id, false))
-        .await
-        .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, false);

    let state = get_state(&request);
    let remote_index = &state.remote_index;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -43,8 +43,6 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
 pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

-pub const LOG_FILE_NAME: &str = "pageserver.log";
-
 static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);

 /// Config for the Repository checkpointer
@@ -81,7 +79,6 @@ pub async fn shutdown_pageserver(exit_code: i32) {

    // There should be nothing left, but let's be sure
    task_mgr::shutdown_tasks(None, None, None).await;
-
    info!("Shut down successfully completed");
    std::process::exit(exit_code);
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -15,11 +15,10 @@ use futures::{Stream, StreamExt};
 use pageserver_api::models::{
    PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
    PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
-    PagestreamFcntlRequest, PagestreamFeMessage, PagestreamGetPageRequest,
-    PagestreamGetPageResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
+    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
+    PagestreamNblocksRequest, PagestreamNblocksResponse,
 };
 use std::io;
-use std::io::Write;
 use std::net::TcpListener;
 use std::str;
 use std::str::FromStr;
@@ -46,12 +45,9 @@ use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::Timeline;
 use crate::tenant_mgr;
-use crate::virtual_file::VirtualFile;
 use crate::CheckpointConfig;
-use crate::TEMP_FILE_SUFFIX;
-use postgres_ffi::pg_constants::{
-    AUTOPREWARM_FILE_NAME, DEFAULTTABLESPACE_OID, SMGR_FCNTL_CACHE_SNAPSHOT,
-};
+
+use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

 fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
@@ -304,6 +300,7 @@ impl PageServerHandler {
            trace!("query: {copy_data_bytes:?}");

            let neon_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
+
            let response = match neon_fe_msg {
                PagestreamFeMessage::Exists(req) => {
                    let _timer = metrics.get_rel_exists.start_timer();
@@ -321,10 +318,6 @@ impl PageServerHandler {
                    let _timer = metrics.get_db_size.start_timer();
                    self.handle_db_size_request(&timeline, &req).await
                }
-                PagestreamFeMessage::Fcntl(req) => {
-                    self.handle_fcntl_request(&timeline, &req).await?;
-                    continue;
-                }
            };

            let response = response.unwrap_or_else(|e| {
@@ -594,33 +587,6 @@ impl PageServerHandler {
        }))
    }

-    async fn handle_fcntl_request(
-        &self,
-        timeline: &Timeline,
-        req: &PagestreamFcntlRequest,
-    ) -> Result<()> {
-        if req.cmd == SMGR_FCNTL_CACHE_SNAPSHOT {
-            let temp_path = self
-                .conf
-                .timeline_path(&timeline.timeline_id, &timeline.tenant_id)
-                .join(format!("{AUTOPREWARM_FILE_NAME}.{TEMP_FILE_SUFFIX}"));
-            let mut file = VirtualFile::open_with_options(
-                &temp_path,
-                std::fs::OpenOptions::new().write(true).create_new(true),
-            )?;
-            file.write_all(&req.data)?;
-            drop(file);
-            let final_path = self
-                .conf
-                .timeline_path(&timeline.timeline_id, &timeline.tenant_id)
-                .join(AUTOPREWARM_FILE_NAME);
-            std::fs::rename(temp_path, &final_path)?;
-        } else {
-            warn!("Fcntl request {} is not supported", req.cmd);
-        }
-        Ok(())
-    }
-
    #[instrument(skip(self, pgb))]
    async fn handle_basebackup_request(
        &self,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -523,7 +523,6 @@ impl Tenant {
        let timelines = self.timelines.lock().unwrap();
        let timelines_to_compact = timelines
            .iter()
-            .filter(|(_, timeline)| timeline.is_active())
            .map(|(timeline_id, timeline)| (*timeline_id, timeline.clone()))
            .collect::<Vec<_>>();
        drop(timelines);
@@ -995,7 +994,6 @@ impl Tenant {

            timelines
                .iter()
-                .filter(|(_, timeline)| timeline.is_active())
                .map(|(timeline_id, timeline_entry)| {
                    // This is unresolved question for now, how to do gc in presence of remote timelines
                    // especially when this is combined with branching.
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -62,8 +62,8 @@ use crate::{
 };

 pub struct Timeline {
-    pub conf: &'static PageServerConf,
-    pub tenant_conf: Arc<RwLock<TenantConfOpt>>,
+    conf: &'static PageServerConf,
+    tenant_conf: Arc<RwLock<TenantConfOpt>>,

    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -155,22 +155,19 @@ impl<E: Clone> TaskHandle<E> {

    /// Aborts current task, waiting for it to finish.
    pub async fn shutdown(self) {
-        match self.join_handle {
-            Some(jh) => {
-                self.cancellation.send(()).ok();
-                match jh.await {
-                    Ok(Ok(())) => debug!("Shutdown success"),
-                    Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
-                    Err(join_error) => {
-                        if join_error.is_cancelled() {
-                            error!("Shutdown task was cancelled");
-                        } else {
-                            error!("Shutdown task join error: {join_error}")
-                        }
+        if let Some(jh) = self.join_handle {
+            self.cancellation.send(()).ok();
+            match jh.await {
+                Ok(Ok(())) => debug!("Shutdown success"),
+                Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
+                Err(join_error) => {
+                    if join_error.is_cancelled() {
+                        error!("Shutdown task was cancelled");
+                    } else {
+                        error!("Shutdown task join error: {join_error}")
                    }
                }
            }
-            None => {}
        }
    }
 }
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -93,7 +93,7 @@ pub fn spawn_connection_manager_task(
            }
        }
        .instrument(
-            info_span!("wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
+            info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
        ),
    );
 }
@@ -836,15 +836,20 @@ fn wal_stream_connection_string(
    listen_pg_addr_str: &str,
 ) -> anyhow::Result<String> {
    let sk_connstr = format!("postgresql://no_user@{listen_pg_addr_str}/no_db");
-    let me_conf = sk_connstr
-        .parse::<postgres::config::Config>()
-        .with_context(|| {
-            format!("Failed to parse pageserver connection string '{sk_connstr}' as a postgres one")
-        })?;
-    let (host, port) = utils::connstring::connection_host_port(&me_conf);
-    Ok(format!(
-        "host={host} port={port} options='-c timeline_id={timeline_id} tenant_id={tenant_id}'"
-    ))
+    sk_connstr
+        .parse()
+        .context("bad url")
+        .and_then(|url: url::Url| {
+            let host = url.host_str().context("host is missing")?;
+            let port = url.port().unwrap_or(5432); // default PG port
+
+            Ok(format!(
+                "host={host} \
+                 port={port} \
+                 options='-c timeline_id={timeline_id} tenant_id={tenant_id}'"
+            ))
+        })
+        .with_context(|| format!("Failed to parse pageserver connection URL '{sk_connstr}'"))
 }

 #[cfg(test)]
@@ -892,7 +897,7 @@ mod tests {
                        peer_horizon_lsn: None,
                        local_start_lsn: None,

-                        safekeeper_connstr: Some("no commit_lsn".to_string()),
+                        safekeeper_connstr: Some("no_commit_lsn".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -909,7 +914,7 @@ mod tests {
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
                        local_start_lsn: None,
-                        safekeeper_connstr: Some("no commit_lsn".to_string()),
+                        safekeeper_connstr: Some("no_commit_lsn".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -1005,7 +1010,7 @@ mod tests {
                        peer_horizon_lsn: None,
                        local_start_lsn: None,

-                        safekeeper_connstr: Some("not advanced Lsn".to_string()),
+                        safekeeper_connstr: Some("not_advanced_lsn".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -1023,7 +1028,7 @@ mod tests {
                        peer_horizon_lsn: None,
                        local_start_lsn: None,

-                        safekeeper_connstr: Some("not enough advanced Lsn".to_string()),
+                        safekeeper_connstr: Some("not_enough_advanced_lsn".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -1093,7 +1098,7 @@ mod tests {
                        peer_horizon_lsn: None,
                        local_start_lsn: None,

-                        safekeeper_connstr: Some("smaller commit_lsn".to_string()),
+                        safekeeper_connstr: Some("smaller_commit_lsn".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -1283,7 +1288,7 @@ mod tests {
                        peer_horizon_lsn: None,
                        local_start_lsn: None,

-                        safekeeper_connstr: Some("advanced by Lsn safekeeper".to_string()),
+                        safekeeper_connstr: Some("advanced_by_lsn_safekeeper".to_string()),
                    },
                    etcd_version: 0,
                    latest_update: now,
@@ -1307,7 +1312,7 @@ mod tests {
        );
        assert!(over_threshcurrent_candidate
            .wal_source_connstr
-            .contains("advanced by Lsn safekeeper"));
+            .contains("advanced_by_lsn_safekeeper"));

        Ok(())
    }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -10,7 +10,7 @@
 //! process. Then we get the page image back. Communication with the
 //! postgres process happens via stdin/stdout
 //!
-//! See src/backend/tcop/zenith_wal_redo.c for the other side of
+//! See pgxn/neon_walredo/walredoproc.c for the other side of
 //! this communication.
 //!
 //! The Postgres process is assumed to be secure against malicious WAL
@@ -644,14 +644,12 @@ impl PostgresRedoProcess {
                ),
            ));
        } else {
-            // Limit shared cache for wal-redo-postres
+            // Limit shared cache for wal-redo-postgres
            let mut config = OpenOptions::new()
                .append(true)
                .open(PathBuf::from(&datadir).join("postgresql.conf"))?;
            config.write_all(b"shared_buffers=128kB\n")?;
            config.write_all(b"fsync=off\n")?;
-            config.write_all(b"shared_preload_libraries=neon\n")?;
-            config.write_all(b"neon.wal_redo=on\n")?;
        }

        // Start postgres itself
@@ -664,18 +662,15 @@ impl PostgresRedoProcess {
            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
            .env("PGDATA", &datadir)
-            // The redo process is not trusted, so it runs in seccomp mode
-            // (see seccomp in zenith_wal_redo.c). We have to make sure it doesn't
-            // inherit any file descriptors from the pageserver that would allow
-            // an attacker to do bad things.
+            // The redo process is not trusted, and runs in seccomp mode that
+            // doesn't allow it to open any files. We have to also make sure it
+            // doesn't inherit any file descriptors from the pageserver, that
+            // would allow an attacker to read any files that happen to be open
+            // in the pageserver.
            //
            // The Rust standard library makes sure to mark any file descriptors with
            // as close-on-exec by default, but that's not enough, since we use
            // libraries that directly call libc open without setting that flag.
-            //
-            // One example is the pidfile of the daemonize library, which doesn't
-            // currently mark file descriptors as close-on-exec. Either way, we
-            // want to be on the safe side and prevent accidental regression.
            .close_fds()
            .spawn()
            .map_err(|e| {
@@ -844,7 +839,7 @@ impl PostgresRedoProcess {
 }

 // Functions for constructing messages to send to the postgres WAL redo
-// process. See vendor/postgres/src/backend/tcop/zenith_wal_redo.c for
+// process. See pgxn/neon_walredo/walredoproc.c for
 // explanation of the protocol.

 fn build_begin_redo_for_block_msg(tag: BufferTag, buf: &mut Vec<u8>) {
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -4,7 +4,6 @@
 MODULE_big = neon
 OBJS = \
 	$(WIN32RES) \
-	inmem_smgr.o \
 	libpagestore.o \
 	libpqwalproposer.o \
 	pagestore_smgr.o \
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -419,15 +419,6 @@ pg_init_libpagestore(void)
 							   0,	/* no flags required */
 							   check_neon_id, NULL, NULL);

-	DefineCustomBoolVariable("neon.wal_redo",
-							 "start in wal-redo mode",
-							 NULL,
-							 &wal_redo,
-							 false,
-							 PGC_POSTMASTER,
-							 0,
-							 NULL, NULL, NULL);
-
 	DefineCustomIntVariable("neon.max_cluster_size",
 							"cluster size limit",
 							NULL,
@@ -452,13 +443,7 @@ pg_init_libpagestore(void)
 	neon_timeline_walproposer = neon_timeline;
 	neon_tenant_walproposer = neon_tenant;

-	if (wal_redo)
-	{
-		neon_log(PageStoreTrace, "set inmem_smgr hook");
-		smgr_hook = smgr_inmem;
-		smgr_init_hook = smgr_init_inmem;
-	}
-	else if (page_server_connstring && page_server_connstring[0])
+	if (page_server_connstring && page_server_connstring[0])
 	{
 		neon_log(PageStoreTrace, "set neon_smgr hook");
 		smgr_hook = smgr_neon;
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -32,7 +32,6 @@ typedef enum
 	T_NeonNblocksRequest,
 	T_NeonGetPageRequest,
 	T_NeonDbSizeRequest,
-	T_NeonFcntlRequest,

 	/* pagestore -> pagestore_client */
 	T_NeonExistsResponse = 100,
@@ -92,14 +91,6 @@ typedef struct
 	BlockNumber blkno;
 }			NeonGetPageRequest;

-typedef struct
-{
-	NeonRequest req;
-	int cmd;
-	int size;
-	char data[1];
-}			NeonFcntlRequest;
-
 /* supertype of all the Neon*Response structs below */
 typedef struct
 {
@@ -164,10 +155,6 @@ extern int32 max_cluster_size;
 extern const f_smgr *smgr_neon(BackendId backend, RelFileNode rnode);
 extern void smgr_init_neon(void);

-extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
-extern void smgr_init_inmem(void);
-extern void smgr_shutdown_inmem(void);
-
 /* Neon storage manager functionality */

 extern void neon_init(void);
@@ -180,7 +167,7 @@ extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
 						BlockNumber blocknum, char *buffer, bool skipFsync);
 extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
 						  BlockNumber blocknum);
-extern bool neon_prefetch_in_progress(SMgrRelation reln);
+extern void neon_reset_prefetch(SMgrRelation reln);
 extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 					  char *buffer);

@@ -197,29 +184,6 @@ extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
 						  BlockNumber nblocks);
 extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);

-/* neon wal-redo storage manager functionality */
-
-extern void inmem_init(void);
-extern void inmem_open(SMgrRelation reln);
-extern void inmem_close(SMgrRelation reln, ForkNumber forknum);
-extern void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
-extern bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
-extern void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
-extern void inmem_extend(SMgrRelation reln, ForkNumber forknum,
-						 BlockNumber blocknum, char *buffer, bool skipFsync);
-extern bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber blocknum);
-extern void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-					   char *buffer);
-extern void inmem_write(SMgrRelation reln, ForkNumber forknum,
-						BlockNumber blocknum, char *buffer, bool skipFsync);
-extern void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
-							BlockNumber blocknum, BlockNumber nblocks);
-extern BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
-extern void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber nblocks);
-extern void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
-
 /* utils for neon relsize cache */
 extern void relsize_hash_init(void);
 extern bool get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -99,7 +99,6 @@ char	   *page_server_connstring;
 /*with substituted password*/
 char	   *neon_timeline;
 char	   *neon_tenant;
-bool		wal_redo = false;
 int32		max_cluster_size;

 /* unlogged relation build states */
@@ -127,7 +126,7 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 * all prefetch responses has to be consumed.
 */

-#define MAX_PREFETCH_REQUESTS 1024
+#define MAX_PREFETCH_REQUESTS 128

 BufferTag	prefetch_requests[MAX_PREFETCH_REQUESTS];
 BufferTag	prefetch_responses[MAX_PREFETCH_REQUESTS];
@@ -220,15 +219,6 @@ nm_pack_request(NeonRequest * msg)

 				break;
 			}
-		case T_NeonFcntlRequest:
-			{
-				NeonFcntlRequest *msg_req = (NeonFcntlRequest *) msg;
-				pq_sendint32(&s, msg_req->cmd);
-				pq_sendint32(&s, msg_req->size);
-				pq_sendbytes(&s, msg_req->data, msg_req->size);
-
-				break;
-			}

 			/* pagestore -> pagestore_client. We never need to create these. */
 		case T_NeonExistsResponse:
@@ -1015,25 +1005,12 @@ neon_close(SMgrRelation reln, ForkNumber forknum)


 /*
- *	neon_prefetch_in_progress() -- Check if there are active prefetch requests.
+ *	neon_reset_prefetch() -- reoe all previously rgistered prefeth requests
 */
-bool
-neon_prefetch_in_progress(SMgrRelation reln)
-{
-	return n_prefetch_requests + n_prefetch_responses != 0;
-}
-
-
 void
-neon_fcntl(SMgrRelation reln, int cmd, void const* data, size_t size)
+neon_reset_prefetch(SMgrRelation reln)
 {
-	NeonFcntlRequest* req = (NeonFcntlRequest *)palloc(sizeof(NeonFcntlRequest) + size);
-	req->req.tag = T_NeonFcntlRequest;
-	req->cmd = cmd;
-	req->size = (int)size;
-	memcpy(req->data, data, size);
-	page_server->send((NeonRequest*) req);
-	page_server->flush();
+	n_prefetch_requests = 0;
 }

 /*
@@ -1838,7 +1815,7 @@ static const struct f_smgr neon_smgr =
 	.smgr_unlink = neon_unlink,
 	.smgr_extend = neon_extend,
 	.smgr_prefetch = neon_prefetch,
-	.smgr_prefetch_in_progress = neon_prefetch_in_progress,
+	.smgr_reset_prefetch = neon_reset_prefetch,
 	.smgr_read = neon_read,
 	.smgr_write = neon_write,
 	.smgr_writeback = neon_writeback,
@@ -1849,7 +1826,6 @@ static const struct f_smgr neon_smgr =
 	.smgr_start_unlogged_build = neon_start_unlogged_build,
 	.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
 	.smgr_end_unlogged_build = neon_end_unlogged_build,
-	.smgr_fcntl = neon_fcntl
 };

 const f_smgr *
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -43,6 +43,7 @@
 #if PG_VERSION_NUM >= 150000
 #include "access/xlogrecovery.h"
 #endif
+#include "storage/fd.h"
 #include "storage/latch.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -69,7 +70,8 @@
 #include "neon.h"
 #include "walproposer.h"
 #include "walproposer_utils.h"
-#include "replication/walpropshim.h"
+
+static bool syncSafekeepers = false;

 char	   *wal_acceptors_list;
 int			wal_acceptor_reconnect_timeout;
@@ -117,8 +119,8 @@ static TimestampTz last_reconnect_attempt;
 static WalproposerShmemState * walprop_shared;

 /* Prototypes for private functions */
-static void WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId);
-static void WalProposerStartImpl(void);
+static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
+static void WalProposerStart(void);
 static void WalProposerLoop(void);
 static void InitEventSet(void);
 static void UpdateEventSet(Safekeeper *sk, uint32 events);
@@ -186,9 +188,56 @@ pg_init_walproposer(void)
 	ProcessInterruptsCallback = backpressure_throttling_impl;

 	WalProposerRegister();
+}

-	WalProposerInit = &WalProposerInitImpl;
-	WalProposerStart = &WalProposerStartImpl;
+/*
+ * Entry point for `postgres --sync-safekeepers`.
+ */
+void
+WalProposerSync(int argc, char *argv[])
+{
+	struct stat stat_buf;
+
+	syncSafekeepers = true;
+#if PG_VERSION_NUM < 150000
+	ThisTimeLineID = 1;
+#endif
+
+	/*
+	 * Initialize postmaster_alive_fds as WaitEventSet checks them.
+	 *
+	 * Copied from InitPostmasterDeathWatchHandle()
+	 */
+	if (pipe(postmaster_alive_fds) < 0)
+		ereport(FATAL,
+				(errcode_for_file_access(),
+					errmsg_internal("could not create pipe to monitor postmaster death: %m")));
+	if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
+		ereport(FATAL,
+				(errcode_for_socket_access(),
+					errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
+
+	ChangeToDataDir();
+
+	/* Create pg_wal directory, if it doesn't exist */
+	if (stat(XLOGDIR, &stat_buf) != 0)
+	{
+		ereport(LOG, (errmsg("creating missing WAL directory \"%s\"", XLOGDIR)));
+		if (MakePGDirectory(XLOGDIR) < 0)
+		{
+			ereport(ERROR,
+					(errcode_for_file_access(),
+						errmsg("could not create directory \"%s\": %m",
+							   XLOGDIR)));
+			exit(1);
+		}
+	}
+
+	WalProposerInit(0, 0);
+
+	BackgroundWorkerUnblockSignals();
+
+	WalProposerStart();
 }

 static void
@@ -429,7 +478,7 @@ WalProposerRegister(void)
 }

 static void
-WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId)
+WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId)
 {
 	char	   *host;
 	char	   *sep;
@@ -508,7 +557,7 @@ WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId)
 }

 static void
-WalProposerStartImpl(void)
+WalProposerStart(void)
 {

 	/* Initiate connections to all safekeeper nodes */
--- a/pgxn/neon_walredo/Makefile
+++ b/pgxn/neon_walredo/Makefile
@@ -0,0 +1,22 @@
+# pgxs/neon_walredo/Makefile
+
+MODULE_big = neon_walredo
+OBJS = \
+	$(WIN32RES) \
+	inmem_smgr.o \
+	walredoproc.o \
+
+# This really should be guarded by $(with_libseccomp), but I couldn't
+# make that work with pgxs. So we always compile it, but its contents
+# are wrapped in #ifdef HAVE_LIBSECCOMP instead.
+OBJS += seccomp.o
+
+PGFILEDESC = "neon_walredo - helper process that runs in Neon pageserver"
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+ifeq ($(with_libseccomp),yes)
+SHLIB_LINK += -lseccomp
+endif
--- a/pgxn/neon_walredo/inmem_smgr.c
+++ b/pgxn/neon_walredo/inmem_smgr.c
@@ -3,9 +3,8 @@
 * inmem_smgr.c
 *
 * This is an implementation of the SMGR interface, used in the WAL redo
- * process (see src/backend/tcop/zenith_wal_redo.c). It has no persistent
- * storage, the pages that are written out are kept in a small number of
- * in-memory buffers.
+ * process. It has no persistent storage, the pages that are written out
+ * are kept in a small number of in-memory buffers.
 *
 * Normally, replaying a WAL record only needs to access a handful of
 * buffers, which fit in the normal buffer cache, so this is just for
@@ -15,15 +14,11 @@
 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * IDENTIFICATION
- *	  contrib/neon/inmem_smgr.c
- *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"

 #include "access/xlog.h"
-#include "pagestore_client.h"
 #include "storage/block.h"
 #include "storage/buf_internals.h"
 #include "storage/relfilenode.h"
@@ -33,6 +28,8 @@
 #include "access/xlogutils.h"
 #endif

+#include "inmem_smgr.h"
+
 /* Size of the in-memory smgr */
 #define MAX_PAGES 64

@@ -59,10 +56,34 @@ locate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno)
 	return -1;
 }

+
+/* neon wal-redo storage manager functionality */
+static void inmem_init(void);
+static void inmem_open(SMgrRelation reln);
+static void inmem_close(SMgrRelation reln, ForkNumber forknum);
+static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
+static void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
+static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
+						 BlockNumber blocknum, char *buffer, bool skipFsync);
+static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
+						   BlockNumber blocknum);
+static void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+					   char *buffer);
+static void inmem_write(SMgrRelation reln, ForkNumber forknum,
+						BlockNumber blocknum, char *buffer, bool skipFsync);
+static void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
+							BlockNumber blocknum, BlockNumber nblocks);
+static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
+static void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
+						   BlockNumber nblocks);
+static void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
+
+
 /*
 *	inmem_init() -- Initialize private state
 */
-void
+static void
 inmem_init(void)
 {
 	used_pages = 0;
@@ -71,7 +92,7 @@ inmem_init(void)
 /*
 *	inmem_exists() -- Does the physical file exist?
 */
-bool
+static bool
 inmem_exists(SMgrRelation reln, ForkNumber forknum)
 {
 	for (int i = 0; i < used_pages; i++)
@@ -90,7 +111,7 @@ inmem_exists(SMgrRelation reln, ForkNumber forknum)
 *
 * If isRedo is true, it's okay for the relation to exist already.
 */
-void
+static void
 inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 {
 }
@@ -98,7 +119,7 @@ inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 /*
 *	inmem_unlink() -- Unlink a relation.
 */
-void
+static void
 inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
 {
 }
@@ -112,7 +133,7 @@ inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo)
 *		EOF).  Note that we assume writing a block beyond current EOF
 *		causes intervening file space to become filled with zeroes.
 */
-void
+static void
 inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			 char *buffer, bool skipFsync)
 {
@@ -123,7 +144,7 @@ inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 /*
 *  inmem_open() -- Initialize newly-opened relation.
 */
-void
+static void
 inmem_open(SMgrRelation reln)
 {
 }
@@ -131,7 +152,7 @@ inmem_open(SMgrRelation reln)
 /*
 *	inmem_close() -- Close the specified relation, if it isn't closed already.
 */
-void
+static void
 inmem_close(SMgrRelation reln, ForkNumber forknum)
 {
 }
@@ -139,7 +160,7 @@ inmem_close(SMgrRelation reln, ForkNumber forknum)
 /*
 *	inmem_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
-bool
+static bool
 inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
 	return true;
@@ -148,7 +169,7 @@ inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 /*
 * inmem_writeback() -- Tell the kernel to write pages back to storage.
 */
-void
+static void
 inmem_writeback(SMgrRelation reln, ForkNumber forknum,
 				BlockNumber blocknum, BlockNumber nblocks)
 {
@@ -157,7 +178,7 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
 /*
 *	inmem_read() -- Read the specified block from a relation.
 */
-void
+static void
 inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 		   char *buffer)
 {
@@ -177,7 +198,7 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 *		relation (ie, those before the current EOF).  To extend a relation,
 *		use mdextend().
 */
-void
+static void
 inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			char *buffer, bool skipFsync)
 {
@@ -224,7 +245,7 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 /*
 *	inmem_nblocks() -- Get the number of blocks stored in a relation.
 */
-BlockNumber
+static BlockNumber
 inmem_nblocks(SMgrRelation reln, ForkNumber forknum)
 {
 	/*
@@ -243,7 +264,7 @@ inmem_nblocks(SMgrRelation reln, ForkNumber forknum)
 /*
 *	inmem_truncate() -- Truncate relation to specified number of blocks.
 */
-void
+static void
 inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 {
 }
@@ -251,7 +272,7 @@ inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 /*
 *	inmem_immedsync() -- Immediately sync a relation to stable storage.
 */
-void
+static void
 inmem_immedsync(SMgrRelation reln, ForkNumber forknum)
 {
 }
--- a/pgxn/neon_walredo/inmem_smgr.h
+++ b/pgxn/neon_walredo/inmem_smgr.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * inmem_smgr.h
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef INMEM_SMGR_H
+#define INMEM_SMGR_H
+
+extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode);
+extern void smgr_init_inmem(void);
+
+#endif /* INMEM_SMGR_H */
--- a/pgxn/neon_walredo/neon_seccomp.h
+++ b/pgxn/neon_walredo/neon_seccomp.h
@@ -0,0 +1,22 @@
+#ifndef NEON_SECCOMP_H
+#define NEON_SECCOMP_H
+
+#include <seccomp.h>
+
+typedef struct {
+    int    psr_syscall; /* syscall number */
+    uint32 psr_action;  /* libseccomp action, e.g. SCMP_ACT_ALLOW */
+} PgSeccompRule;
+
+#define PG_SCMP(syscall, action)                \
+    (PgSeccompRule) {                           \
+        .psr_syscall = SCMP_SYS(syscall),       \
+        .psr_action = (action),                 \
+    }
+
+#define PG_SCMP_ALLOW(syscall) \
+    PG_SCMP(syscall, SCMP_ACT_ALLOW)
+
+extern void seccomp_load_rules(PgSeccompRule *syscalls, int count);
+
+#endif /* NEON_SECCOMP_H */
--- a/pgxn/neon_walredo/seccomp.c
+++ b/pgxn/neon_walredo/seccomp.c
@@ -0,0 +1,257 @@
+/*-------------------------------------------------------------------------
+ *
+ * seccomp.c
+ *	  Secure Computing BPF API wrapper.
+ *
+ * Pageserver delegates complex WAL decoding duties to postgres,
+ * which means that the latter might fall victim to carefully designed
+ * malicious WAL records and start doing harmful things to the system.
+ * To prevent this, it has been decided to limit possible interactions
+ * with the outside world using the Secure Computing BPF mode.
+ *
+ * We use this mode to disable all syscalls not in the allowlist. This
+ * approach has its pros & cons:
+ *
+ *  - We have to carefully handpick and maintain the set of syscalls
+ *    required for the WAL redo process. Core dumps help with that.
+ *    The method of trial and error seems to work reasonably well,
+ *    but it would be nice to find a proper way to "prove" that
+ *    the set in question is both necessary and sufficient.
+ *
+ *  - Once we enter the seccomp bpf mode, it's impossible to lift those
+ *    restrictions (otherwise, what kind of "protection" would that be?).
+ *    Thus, we have to either enable extra syscalls for the clean shutdown,
+ *    or exit the process immediately via _exit() instead of proc_exit().
+ *
+ *  - Should we simply use SCMP_ACT_KILL_PROCESS, or implement a custom
+ *    facility to deal with the forbidden syscalls? If we'd like to embed
+ *    a startup security test, we should go with the latter; In that
+ *    case, which one of the following options is preferable?
+ *
+ *      * Catch the denied syscalls with a signal handler using SCMP_ACT_TRAP.
+ *        Provide a common signal handler with a static switch to override
+ *        its behavior for the test case. This would undermine the whole
+ *        purpose of such protection, so we'd have to go further and remap
+ *        the memory backing the switch as readonly, then ban mprotect().
+ *        Ugly and fragile, to say the least.
+ *
+ *      * Yet again, catch the denied syscalls using SCMP_ACT_TRAP.
+ *        Provide 2 different signal handlers: one for a test case,
+ *        another for the main processing loop. Install the first one,
+ *        enable seccomp, perform the test, switch to the second one,
+ *        finally ban sigaction(), presto!
+ *
+ *      * Spoof the result of a syscall using SECCOMP_RET_ERRNO for the
+ *        test, then ban it altogether with another filter. The downside
+ *        of this solution is that we don't actually check that
+ *        SCMP_ACT_KILL_PROCESS/SCMP_ACT_TRAP works.
+ *
+ *    Either approach seems to require two eBPF filter programs,
+ *    which is unfortunate: the man page tells this is uncommon.
+ *    Maybe I (@funbringer) am missing something, though; I encourage
+ *    any reader to get familiar with it and scrutinize my conclusions.
+ *
+ * TODOs and ideas in no particular order:
+ *
+ *  - Do something about mmap() in musl's malloc().
+ *    Definitely not a priority if we don't care about musl.
+ *
+ *  - See if we can untangle PG's shutdown sequence (involving unlink()):
+ *
+ *      * Simplify (or rather get rid of) shmem setup in PG's WAL redo mode.
+ *      * Investigate chroot() or mount namespaces for better FS isolation.
+ *      * (Per Heikki) Simply call _exit(), no big deal.
+ *      * Come up with a better idea?
+ *
+ *  - Make use of seccomp's argument inspection (for what?).
+ *    Unfortunately, it views all syscall arguments as scalars,
+ *    so it won't work for e.g. string comparison in unlink().
+ *
+ *  - Benchmark with bpf jit on/off, try seccomp_syscall_priority().
+ *
+ *  - Test against various linux distros & glibc versions.
+ *    I suspect that certain libc functions might involve slightly
+ *    different syscalls, e.g. select/pselect6/pselect6_time64/whatever.
+ *
+ *  - Test on any arch other than amd64 to see if it works there.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+/*
+ * I couldn't find a good way to do a conditional OBJS += seccomp.o in
+ * the Makefile, so this file is compiled even when seccomp is disabled,
+ * it's just empty in that case.
+ */
+#ifdef HAVE_LIBSECCOMP
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "miscadmin.h"
+
+#include "neon_seccomp.h"
+
+static void die(int code, const char *str);
+
+static bool seccomp_test_sighandler_done = false;
+static void seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt);
+static void seccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt);
+
+static int do_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action);
+
+void
+seccomp_load_rules(PgSeccompRule *rules, int count)
+{
+	struct sigaction action = { .sa_flags = SA_SIGINFO };
+	PgSeccompRule rule;
+	long fd;
+
+	/*
+	 * Install a test signal handler.
+	 * XXX: pqsignal() is too restrictive for our purposes,
+	 * since we'd like to examine the contents of siginfo_t.
+	 */
+	action.sa_sigaction = seccomp_test_sighandler;
+	if (sigaction(SIGSYS, &action, NULL) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: could not install test SIGSYS handler")));
+
+	/*
+	 * First, check that open of a well-known file works.
+	 * XXX: We use raw syscall() to call the very open().
+	 */
+	fd = syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
+	if (seccomp_test_sighandler_done)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: signal handler test flag was set unexpectedly")));
+	if (fd < 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: could not open /dev/null for seccomp testing: %m")));
+	close((int) fd);
+
+	/* Set a trap on open() to test seccomp bpf */
+	rule = PG_SCMP(open, SCMP_ACT_TRAP);
+	if (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: could not load test trap")));
+
+	/* Finally, check that open() now raises SIGSYS */
+	(void) syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
+	if (!seccomp_test_sighandler_done)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: SIGSYS handler doesn't seem to work")));
+
+	/* Now that everything seems to work, install a proper handler */
+	action.sa_sigaction = seccomp_deny_sighandler;
+	if (sigaction(SIGSYS, &action, NULL) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: could not install SIGSYS handler")));
+
+	/* If this succeeds, any syscall not in the list will crash the process */
+	if (do_seccomp_load_rules(rules, count, SCMP_ACT_TRAP) != 0)
+		ereport(FATAL,
+				(errcode(ERRCODE_SYSTEM_ERROR),
+				 errmsg("seccomp: could not enter seccomp mode")));
+}
+
+/*
+ * Enter seccomp mode with a BPF filter that will only allow
+ * certain syscalls to proceed.
+ */
+static int
+do_seccomp_load_rules(PgSeccompRule *rules, int count, uint32 def_action)
+{
+	scmp_filter_ctx ctx;
+	int rc = -1;
+
+	/* Create a context with a default action for syscalls not in the list */
+	if ((ctx = seccomp_init(def_action)) == NULL)
+		goto cleanup;
+
+	for (int i = 0; i < count; i++)
+	{
+		PgSeccompRule *rule = &rules[i];
+		if ((rc = seccomp_rule_add(ctx, rule->psr_action, rule->psr_syscall, 0)) != 0)
+			goto cleanup;
+	}
+
+	/* Try building & loading the program into the kernel */
+	if ((rc = seccomp_load(ctx)) != 0)
+		goto cleanup;
+
+cleanup:
+	/*
+	 * We don't need the context anymore regardless of the result,
+	 * since either we failed or the eBPF program has already been
+	 * loaded into the linux kernel.
+	 */
+	seccomp_release(ctx);
+	return rc;
+}
+
+static void
+die(int code, const char *str)
+{
+	/* work around gcc ignoring that it shouldn't warn on (void) result being unused */
+	ssize_t _unused pg_attribute_unused();
+	/* Best effort write to stderr */
+	_unused = write(fileno(stderr), str, strlen(str));
+
+	/* XXX: we don't want to run any atexit callbacks */
+	_exit(code);
+}
+
+static void
+seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())
+{
+#define DIE_PREFIX "seccomp test signal handler: "
+
+	/* Check that this signal handler is used only for a single test case */
+	if (seccomp_test_sighandler_done)
+		die(1, DIE_PREFIX "test handler should only be used for 1 test\n");
+	seccomp_test_sighandler_done = true;
+
+	if (signum != SIGSYS)
+		die(1, DIE_PREFIX "bad signal number\n");
+
+	/* TODO: maybe somehow extract the hardcoded syscall number */
+	if (info->si_syscall != SCMP_SYS(open))
+		die(1, DIE_PREFIX "bad syscall number\n");
+
+#undef DIE_PREFIX
+}
+
+static void
+seccomp_deny_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unused())
+{
+	/*
+	 * Unfortunately, we can't use seccomp_syscall_resolve_num_arch()
+	 * to resolve the syscall's name, since it calls strdup()
+	 * under the hood (wtf!).
+	 */
+	char buffer[128];
+	(void)snprintf(buffer, lengthof(buffer),
+			"---------------------------------------\n"
+			"seccomp: bad syscall %d\n"
+			"---------------------------------------\n",
+			info->si_syscall);
+
+	/*
+	 * Instead of silently crashing the process with
+	 * a fake SIGSYS caused by SCMP_ACT_KILL_PROCESS,
+	 * we'd like to receive a real SIGSYS to print the
+	 * message and *then* immediately exit.
+	 */
+	die(1, buffer);
+}
+
+#endif		/* HAVE_LIBSECCOMP */
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -0,0 +1,847 @@
+/*-------------------------------------------------------------------------
+ *
+ * walredoproc.c
+ *	  Entry point for WAL redo helper
+ *
+ *
+ * This file contains an alternative main() function for the 'postgres'
+ * binary. In the special mode, we go into a special mode that's similar
+ * to the single user mode. We don't launch postmaster or any auxiliary
+ * processes. Instead, we wait for command from 'stdin', and respond to
+ * 'stdout'.
+ *
+ * The protocol through stdin/stdout is loosely based on the libpq protocol.
+ * The process accepts messages through stdin, and each message has the format:
+ *
+ * char   msgtype;
+ * int32  length; // length of message including 'length' but excluding
+ *                // 'msgtype', in network byte order
+ * <payload>
+ *
+ * There are three message types:
+ *
+ * BeginRedoForBlock ('B'): Prepare for WAL replay for given block
+ * PushPage ('P'): Copy a page image (in the payload) to buffer cache
+ * ApplyRecord ('A'): Apply a WAL record (in the payload)
+ * GetPage ('G'): Return a page image from buffer cache.
+ *
+ * Currently, you only get a response to GetPage requests; the response is
+ * simply a 8k page, without any headers. Errors are logged to stderr.
+ *
+ * FIXME:
+ * - this currently requires a valid PGDATA, and creates a lock file there
+ *   like a normal postmaster. There's no fundamental reason for that, though.
+ * - should have EndRedoForBlock, and flush page cache, to allow using this
+ *   mechanism for more than one block without restarting the process.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
+
+#if defined(HAVE_LIBSECCOMP) && defined(__GLIBC__)
+#define MALLOC_NO_MMAP
+#include <malloc.h>
+#endif
+
+#ifndef HAVE_GETRUSAGE
+#include "rusagestub.h"
+#endif
+
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#if PG_VERSION_NUM >= 150000
+#include "access/xlogrecovery.h"
+#endif
+#include "access/xlogutils.h"
+#include "catalog/pg_class.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "postmaster/postmaster.h"
+#include "storage/buf_internals.h"
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/proc.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/memutils.h"
+#include "utils/ps_status.h"
+
+#include "inmem_smgr.h"
+
+#ifdef HAVE_LIBSECCOMP
+#include "neon_seccomp.h"
+#endif
+
+PG_MODULE_MAGIC;
+
+static int	ReadRedoCommand(StringInfo inBuf);
+static void BeginRedoForBlock(StringInfo input_message);
+static void PushPage(StringInfo input_message);
+static void ApplyRecord(StringInfo input_message);
+static void apply_error_callback(void *arg);
+static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
+static void GetPage(StringInfo input_message);
+static ssize_t buffered_read(void *buf, size_t count);
+
+static BufferTag target_redo_tag;
+
+static XLogReaderState *reader_state;
+
+#define TRACE DEBUG5
+
+#ifdef HAVE_LIBSECCOMP
+static void
+enter_seccomp_mode(void)
+{
+	PgSeccompRule syscalls[] =
+	{
+		/* Hard requirements */
+		PG_SCMP_ALLOW(exit_group),
+		PG_SCMP_ALLOW(pselect6),
+		PG_SCMP_ALLOW(read),
+		PG_SCMP_ALLOW(select),
+		PG_SCMP_ALLOW(write),
+
+		/* Memory allocation */
+		PG_SCMP_ALLOW(brk),
+#ifndef MALLOC_NO_MMAP
+		/* TODO: musl doesn't have mallopt */
+		PG_SCMP_ALLOW(mmap),
+		PG_SCMP_ALLOW(munmap),
+#endif
+		/*
+		 * getpid() is called on assertion failure, in ExceptionalCondition.
+		 * It's not really needed, but seems pointless to hide it either. The
+		 * system call unlikely to expose a kernel vulnerability, and the PID
+		 * is stored in MyProcPid anyway.
+		 */
+		PG_SCMP_ALLOW(getpid),
+
+		/* Enable those for a proper shutdown.
+		PG_SCMP_ALLOW(munmap),
+		PG_SCMP_ALLOW(shmctl),
+		PG_SCMP_ALLOW(shmdt),
+		PG_SCMP_ALLOW(unlink), // shm_unlink
+		*/
+	};
+
+#ifdef MALLOC_NO_MMAP
+	/* Ask glibc not to use mmap() */
+	mallopt(M_MMAP_MAX, 0);
+#endif
+
+	seccomp_load_rules(syscalls, lengthof(syscalls));
+}
+#endif /* HAVE_LIBSECCOMP */
+
+/*
+ * Entry point for the WAL redo process.
+ *
+ * Performs similar initialization as PostgresMain does for normal
+ * backend processes. Some initialization was done in CallExtMain
+ * already.
+ */
+void
+WalRedoMain(int argc, char *argv[])
+{
+	int			firstchar;
+	StringInfoData input_message;
+#ifdef HAVE_LIBSECCOMP
+	bool		enable_seccomp;
+#endif
+
+	am_wal_redo_postgres = true;
+
+	/*
+	 * WAL redo does not need a large number of buffers. And speed of
+	 * DropRelFileNodeAllLocalBuffers() is proportional to the number of
+	 * buffers. So let's keep it small (default value is 1024)
+	 */
+	num_temp_buffers = 4;
+
+	/*
+	 * install the simple in-memory smgr
+	 */
+	smgr_hook = smgr_inmem;
+	smgr_init_hook = smgr_init_inmem;
+
+	/*
+	 * Validate we have been given a reasonable-looking DataDir and change into it.
+	 */
+	checkDataDir();
+	ChangeToDataDir();
+
+	/*
+	 * Create lockfile for data directory.
+	 */
+	CreateDataDirLockFile(false);
+
+	/* read control file (error checking and contains config ) */
+	LocalProcessControlFile(false);
+
+	/*
+	 * process any libraries that should be preloaded at postmaster start
+	 */
+	process_shared_preload_libraries();
+
+	/* Initialize MaxBackends (if under postmaster, was done already) */
+	InitializeMaxBackends();
+
+#if PG_VERSION_NUM >= 150000
+	/*
+	 * Give preloaded libraries a chance to request additional shared memory.
+	 */
+	process_shmem_requests();
+
+	/*
+	 * Now that loadable modules have had their chance to request additional
+	 * shared memory, determine the value of any runtime-computed GUCs that
+	 * depend on the amount of shared memory required.
+	 */
+	InitializeShmemGUCs();
+
+	/*
+	 * Now that modules have been loaded, we can process any custom resource
+	 * managers specified in the wal_consistency_checking GUC.
+	 */
+	InitializeWalConsistencyChecking();
+#endif
+
+	CreateSharedMemoryAndSemaphores();
+
+	/*
+	 * Remember stand-alone backend startup time,roughly at the same point
+	 * during startup that postmaster does so.
+	 */
+	PgStartTime = GetCurrentTimestamp();
+
+	/*
+	 * Create a per-backend PGPROC struct in shared memory. We must do
+	 * this before we can use LWLocks.
+	 */
+	InitAuxiliaryProcess();
+
+	SetProcessingMode(NormalProcessing);
+
+	/* Redo routines won't work if we're not "in recovery" */
+	InRecovery = true;
+
+	/*
+	 * Create the memory context we will use in the main loop.
+	 *
+	 * MessageContext is reset once per iteration of the main loop, ie, upon
+	 * completion of processing of each command message from the client.
+	 */
+	MessageContext = AllocSetContextCreate(TopMemoryContext,
+										   "MessageContext",
+										   ALLOCSET_DEFAULT_SIZES);
+
+	/* we need a ResourceOwner to hold buffer pins */
+	Assert(CurrentResourceOwner == NULL);
+	CurrentResourceOwner = ResourceOwnerCreate(NULL, "wal redo");
+
+	/* Initialize resource managers */
+	for (int rmid = 0; rmid <= RM_MAX_ID; rmid++)
+	{
+		if (RmgrTable[rmid].rm_startup != NULL)
+			RmgrTable[rmid].rm_startup();
+	}
+	reader_state = XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(), NULL);
+
+#ifdef HAVE_LIBSECCOMP
+	/* We prefer opt-out to opt-in for greater security */
+	enable_seccomp = true;
+	for (int i = 1; i < argc; i++)
+		if (strcmp(argv[i], "--disable-seccomp") == 0)
+			enable_seccomp = false;
+
+	/*
+	 * We deliberately delay the transition to the seccomp mode
+	 * until it's time to enter the main processing loop;
+	 * else we'd have to add a lot more syscalls to the allowlist.
+	 */
+	if (enable_seccomp)
+		enter_seccomp_mode();
+#endif /* HAVE_LIBSECCOMP */
+
+	/*
+	 * Main processing loop
+	 */
+	MemoryContextSwitchTo(MessageContext);
+	initStringInfo(&input_message);
+
+	for (;;)
+	{
+		/* Release memory left over from prior query cycle. */
+		resetStringInfo(&input_message);
+
+		set_ps_display("idle");
+
+		/*
+		 * (3) read a command (loop blocks here)
+		 */
+		firstchar = ReadRedoCommand(&input_message);
+		switch (firstchar)
+		{
+			case 'B':			/* BeginRedoForBlock */
+				BeginRedoForBlock(&input_message);
+				break;
+
+			case 'P':			/* PushPage */
+				PushPage(&input_message);
+				break;
+
+			case 'A':			/* ApplyRecord */
+				ApplyRecord(&input_message);
+				break;
+
+			case 'G':			/* GetPage */
+				GetPage(&input_message);
+				break;
+
+				/*
+				 * EOF means we're done. Perform normal shutdown.
+				 */
+			case EOF:
+				ereport(LOG,
+						(errmsg("received EOF on stdin, shutting down")));
+
+#ifdef HAVE_LIBSECCOMP
+				/*
+				 * Skip the shutdown sequence, leaving some garbage behind.
+				 * Hopefully, postgres will clean it up in the next run.
+				 * This way we don't have to enable extra syscalls, which is nice.
+				 * See enter_seccomp_mode() above.
+				 */
+				if (enable_seccomp)
+					_exit(0);
+#endif /* HAVE_LIBSECCOMP */
+				/*
+				 * NOTE: if you are tempted to add more code here, DON'T!
+				 * Whatever you had in mind to do should be set up as an
+				 * on_proc_exit or on_shmem_exit callback, instead. Otherwise
+				 * it will fail to be called during other backend-shutdown
+				 * scenarios.
+				 */
+				proc_exit(0);
+
+			default:
+				ereport(FATAL,
+						(errcode(ERRCODE_PROTOCOL_VIOLATION),
+						 errmsg("invalid frontend message type %d",
+								firstchar)));
+		}
+	}							/* end of input-reading loop */
+}
+
+
+/* Version compatility wrapper for ReadBufferWithoutRelcache */
+static inline Buffer
+NeonRedoReadBuffer(RelFileNode rnode,
+		   ForkNumber forkNum, BlockNumber blockNum,
+		   ReadBufferMode mode)
+{
+#if PG_VERSION_NUM >= 150000
+	return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
+									 NULL, /* no strategy */
+									 true); /* WAL redo is only performed on permanent rels */
+#else
+	return ReadBufferWithoutRelcache(rnode, forkNum, blockNum, mode,
+									 NULL); /* no strategy */
+#endif
+}
+
+
+/*
+ * Some debug function that may be handy for now.
+ */
+pg_attribute_unused()
+static char *
+pprint_buffer(char *data, int len)
+{
+	StringInfoData s;
+
+	initStringInfo(&s);
+	appendStringInfo(&s, "\n");
+	for (int i = 0; i < len; i++) {
+
+		appendStringInfo(&s, "%02x ", (*(((char *) data) + i) & 0xff) );
+		if (i % 32 == 31) {
+			appendStringInfo(&s, "\n");
+		}
+	}
+	appendStringInfo(&s, "\n");
+
+	return s.data;
+}
+
+/* ----------------------------------------------------------------
+ *		routines to obtain user input
+ * ----------------------------------------------------------------
+ */
+
+/*
+ * Read next command from the client.
+ *
+ *	the string entered by the user is placed in its parameter inBuf,
+ *	and we act like a Q message was received.
+ *
+ *	EOF is returned if end-of-file input is seen; time to shut down.
+ * ----------------
+ */
+static int
+ReadRedoCommand(StringInfo inBuf)
+{
+	ssize_t		ret;
+	char		hdr[1 + sizeof(int32)];
+	int			qtype;
+	int32		len;
+
+	/* Read message type and message length */
+	ret = buffered_read(hdr, sizeof(hdr));
+	if (ret != sizeof(hdr))
+	{
+		if (ret == 0)
+			return EOF;
+		else if (ret < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONNECTION_FAILURE),
+					 errmsg("could not read message header: %m")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_PROTOCOL_VIOLATION),
+					 errmsg("unexpected EOF")));
+	}
+
+	qtype = hdr[0];
+	memcpy(&len, &hdr[1], sizeof(int32));
+	len = pg_ntoh32(len);
+
+	if (len < 4)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROTOCOL_VIOLATION),
+				 errmsg("invalid message length")));
+
+	len -= 4;					/* discount length itself */
+
+	/* Read the message payload */
+	enlargeStringInfo(inBuf, len);
+	ret = buffered_read(inBuf->data, len);
+	if (ret != len)
+	{
+		if (ret < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONNECTION_FAILURE),
+					 errmsg("could not read message: %m")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_PROTOCOL_VIOLATION),
+					 errmsg("unexpected EOF")));
+	}
+	inBuf->len = len;
+	inBuf->data[len] = '\0';
+
+	return qtype;
+}
+
+/*
+ * Prepare for WAL replay on given block
+ */
+static void
+BeginRedoForBlock(StringInfo input_message)
+{
+	RelFileNode rnode;
+	ForkNumber forknum;
+	BlockNumber blknum;
+	SMgrRelation reln;
+
+	/*
+	 * message format:
+	 *
+	 * spcNode
+	 * dbNode
+	 * relNode
+	 * ForkNumber
+	 * BlockNumber
+	 */
+	forknum = pq_getmsgbyte(input_message);
+	rnode.spcNode = pq_getmsgint(input_message, 4);
+	rnode.dbNode = pq_getmsgint(input_message, 4);
+	rnode.relNode = pq_getmsgint(input_message, 4);
+	blknum = pq_getmsgint(input_message, 4);
+	wal_redo_buffer = InvalidBuffer;
+
+	INIT_BUFFERTAG(target_redo_tag, rnode, forknum, blknum);
+
+	elog(TRACE, "BeginRedoForBlock %u/%u/%u.%d blk %u",
+		 target_redo_tag.rnode.spcNode,
+		 target_redo_tag.rnode.dbNode,
+		 target_redo_tag.rnode.relNode,
+		 target_redo_tag.forkNum,
+		 target_redo_tag.blockNum);
+
+	reln = smgropen(rnode, InvalidBackendId, RELPERSISTENCE_PERMANENT);
+	if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
+		reln->smgr_cached_nblocks[forknum] < blknum + 1)
+	{
+		reln->smgr_cached_nblocks[forknum] = blknum + 1;
+	}
+}
+
+/*
+ * Receive a page given by the client, and put it into buffer cache.
+ */
+static void
+PushPage(StringInfo input_message)
+{
+	RelFileNode rnode;
+	ForkNumber forknum;
+	BlockNumber blknum;
+	const char *content;
+	Buffer		buf;
+	Page		page;
+
+	/*
+	 * message format:
+	 *
+	 * spcNode
+	 * dbNode
+	 * relNode
+	 * ForkNumber
+	 * BlockNumber
+	 * 8k page content
+	 */
+	forknum = pq_getmsgbyte(input_message);
+	rnode.spcNode = pq_getmsgint(input_message, 4);
+	rnode.dbNode = pq_getmsgint(input_message, 4);
+	rnode.relNode = pq_getmsgint(input_message, 4);
+	blknum = pq_getmsgint(input_message, 4);
+	content = pq_getmsgbytes(input_message, BLCKSZ);
+
+	buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_ZERO_AND_LOCK);
+	wal_redo_buffer = buf;
+	page = BufferGetPage(buf);
+	memcpy(page, content, BLCKSZ);
+	MarkBufferDirty(buf); /* pro forma */
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * Receive a WAL record, and apply it.
+ *
+ * All the pages should be loaded into the buffer cache by PushPage calls already.
+ */
+static void
+ApplyRecord(StringInfo input_message)
+{
+	char	   *errormsg;
+	XLogRecPtr	lsn;
+	XLogRecord *record;
+	int			nleft;
+	ErrorContextCallback errcallback;
+#if PG_VERSION_NUM >= 150000
+	DecodedXLogRecord *decoded;
+#endif
+
+	/*
+	 * message format:
+	 *
+	 * LSN (the *end* of the record)
+	 * record
+	 */
+	lsn = pq_getmsgint64(input_message);
+
+	smgrinit();					/* reset inmem smgr state */
+
+	/* note: the input must be aligned here */
+	record = (XLogRecord *) pq_getmsgbytes(input_message, sizeof(XLogRecord));
+
+	nleft = input_message->len - input_message->cursor;
+	if (record->xl_tot_len != sizeof(XLogRecord) + nleft)
+		elog(ERROR, "mismatch between record (%d) and message size (%d)",
+			 record->xl_tot_len, (int) sizeof(XLogRecord) + nleft);
+
+	/* Setup error traceback support for ereport() */
+	errcallback.callback = apply_error_callback;
+	errcallback.arg = (void *) reader_state;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
+	XLogBeginRead(reader_state, lsn);
+
+#if PG_VERSION_NUM >= 150000
+	decoded = (DecodedXLogRecord *) XLogReadRecordAlloc(reader_state, record->xl_tot_len, true);
+
+	if (!DecodeXLogRecord(reader_state, decoded, record, lsn, &errormsg))
+		elog(ERROR, "failed to decode WAL record: %s", errormsg);
+	else
+	{
+		/* Record the location of the next record. */
+		decoded->next_lsn = reader_state->NextRecPtr;
+
+		/*
+		 * If it's in the decode buffer, mark the decode buffer space as
+		 * occupied.
+		 */
+		if (!decoded->oversized)
+		{
+			/* The new decode buffer head must be MAXALIGNed. */
+			Assert(decoded->size == MAXALIGN(decoded->size));
+			if ((char *) decoded == reader_state->decode_buffer)
+				reader_state->decode_buffer_tail = reader_state->decode_buffer + decoded->size;
+			else
+				reader_state->decode_buffer_tail += decoded->size;
+		}
+
+		/* Insert it into the queue of decoded records. */
+		Assert(reader_state->decode_queue_tail != decoded);
+		if (reader_state->decode_queue_tail)
+			reader_state->decode_queue_tail->next = decoded;
+		reader_state->decode_queue_tail = decoded;
+		if (!reader_state->decode_queue_head)
+			reader_state->decode_queue_head = decoded;
+
+		/*
+		 * Update the pointers to the beginning and one-past-the-end of this
+		 * record, again for the benefit of historical code that expected the
+		 * decoder to track this rather than accessing these fields of the record
+		 * itself.
+		 */
+		reader_state->record = reader_state->decode_queue_head;
+		reader_state->ReadRecPtr = reader_state->record->lsn;
+		reader_state->EndRecPtr = reader_state->record->next_lsn;
+	}
+#else
+	/*
+	 * In lieu of calling XLogReadRecord, store the record 'decoded_record'
+	 * buffer directly.
+	 */
+	reader_state->ReadRecPtr = lsn;
+	reader_state->decoded_record = record;
+	if (!DecodeXLogRecord(reader_state, record, &errormsg))
+		elog(ERROR, "failed to decode WAL record: %s", errormsg);
+#endif
+
+	/* Ignore any other blocks than the ones the caller is interested in */
+	redo_read_buffer_filter = redo_block_filter;
+
+	RmgrTable[record->xl_rmid].rm_redo(reader_state);
+
+	/*
+	 * If no base image of the page was provided by PushPage, initialize
+	 * wal_redo_buffer here. The first WAL record must initialize the page
+	 * in that case.
+	 */
+	if (BufferIsInvalid(wal_redo_buffer))
+	{
+		wal_redo_buffer = NeonRedoReadBuffer(target_redo_tag.rnode,
+											 target_redo_tag.forkNum,
+											 target_redo_tag.blockNum,
+											 RBM_NORMAL);
+		Assert(!BufferIsInvalid(wal_redo_buffer));
+		ReleaseBuffer(wal_redo_buffer);
+	}
+
+	redo_read_buffer_filter = NULL;
+
+	/* Pop the error context stack */
+	error_context_stack = errcallback.previous;
+
+	elog(TRACE, "applied WAL record with LSN %X/%X",
+		 (uint32) (lsn >> 32), (uint32) lsn);
+#if PG_VERSION_NUM >= 150000
+	if (decoded && decoded->oversized)
+		pfree(decoded);
+#endif
+}
+
+/*
+ * Error context callback for errors occurring during ApplyRecord
+ */
+static void
+apply_error_callback(void *arg)
+{
+	XLogReaderState *record = (XLogReaderState *) arg;
+	StringInfoData buf;
+
+	initStringInfo(&buf);
+	xlog_outdesc(&buf, record);
+
+	/* translator: %s is a WAL record description */
+	errcontext("WAL redo at %X/%X for %s",
+			   LSN_FORMAT_ARGS(record->ReadRecPtr),
+			   buf.data);
+
+	pfree(buf.data);
+}
+
+
+
+static bool
+redo_block_filter(XLogReaderState *record, uint8 block_id)
+{
+	BufferTag	target_tag;
+
+#if PG_VERSION_NUM >= 150000
+	XLogRecGetBlockTag(record, block_id,
+					   &target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum);
+#else
+	if (!XLogRecGetBlockTag(record, block_id,
+							&target_tag.rnode, &target_tag.forkNum, &target_tag.blockNum))
+	{
+		/* Caller specified a bogus block_id */
+		elog(PANIC, "failed to locate backup block with ID %d", block_id);
+	}
+#endif
+
+	/*
+	 * Can a WAL redo function ever access a relation other than the one that
+	 * it modifies? I don't see why it would.
+	 */
+	if (!RelFileNodeEquals(target_tag.rnode, target_redo_tag.rnode))
+		elog(WARNING, "REDO accessing unexpected page: %u/%u/%u.%u blk %u",
+			 target_tag.rnode.spcNode, target_tag.rnode.dbNode, target_tag.rnode.relNode, target_tag.forkNum, target_tag.blockNum);
+
+	/*
+	 * If this block isn't one we are currently restoring, then return 'true'
+	 * so that this gets ignored
+	 */
+	return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
+}
+
+/*
+ * Get a page image back from buffer cache.
+ *
+ * After applying some records.
+ */
+static void
+GetPage(StringInfo input_message)
+{
+	RelFileNode rnode;
+	ForkNumber forknum;
+	BlockNumber blknum;
+	Buffer		buf;
+	Page		page;
+	int			tot_written;
+
+	/*
+	 * message format:
+	 *
+	 * spcNode
+	 * dbNode
+	 * relNode
+	 * ForkNumber
+	 * BlockNumber
+	 */
+	forknum = pq_getmsgbyte(input_message);
+	rnode.spcNode = pq_getmsgint(input_message, 4);
+	rnode.dbNode = pq_getmsgint(input_message, 4);
+	rnode.relNode = pq_getmsgint(input_message, 4);
+	blknum = pq_getmsgint(input_message, 4);
+
+	/* FIXME: check that we got a BeginRedoForBlock message or this earlier */
+
+	buf = NeonRedoReadBuffer(rnode, forknum, blknum, RBM_NORMAL);
+	Assert(buf == wal_redo_buffer);
+	page = BufferGetPage(buf);
+	/* single thread, so don't bother locking the page */
+
+	/* Response: Page content */
+	tot_written = 0;
+	do {
+		ssize_t		rc;
+
+		rc = write(STDOUT_FILENO, &page[tot_written], BLCKSZ - tot_written);
+		if (rc < 0) {
+			/* If interrupted by signal, just retry */
+			if (errno == EINTR)
+				continue;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write to stdout: %m")));
+		}
+		tot_written += rc;
+	} while (tot_written < BLCKSZ);
+
+	ReleaseBuffer(buf);
+	DropRelFileNodeAllLocalBuffers(rnode);
+	wal_redo_buffer = InvalidBuffer;
+
+	elog(TRACE, "Page sent back for block %u", blknum);
+}
+
+
+/* Buffer used by buffered_read() */
+static char stdin_buf[16 * 1024];
+static size_t stdin_len = 0;	/* # of bytes in buffer */
+static size_t stdin_ptr = 0;	/* # of bytes already consumed */
+
+/*
+ * Like read() on stdin, but buffered.
+ *
+ * We cannot use libc's buffered fread(), because it uses syscalls that we
+ * have disabled with seccomp(). Depending on the platform, it can call
+ * 'fstat' or 'newfstatat'. 'fstat' is probably harmless, but 'newfstatat'
+ * seems problematic because it allows interrogating files by path name.
+ *
+ * The return value is the number of bytes read. On error, -1 is returned, and
+ * errno is set appropriately. Unlike read(), this fills the buffer completely
+ * unless an error happens or EOF is reached.
+ */
+static ssize_t
+buffered_read(void *buf, size_t count)
+{
+	char	   *dst = buf;
+
+	while (count > 0)
+	{
+		size_t		nthis;
+
+		if (stdin_ptr == stdin_len)
+		{
+			ssize_t		ret;
+
+			ret = read(STDIN_FILENO, stdin_buf, sizeof(stdin_buf));
+			if (ret < 0)
+			{
+				/* don't do anything here that could set 'errno' */
+				return ret;
+			}
+			if (ret == 0)
+			{
+				/* EOF */
+				break;
+			}
+			stdin_len = (size_t) ret;
+			stdin_ptr = 0;
+		}
+		nthis = Min(stdin_len - stdin_ptr, count);
+
+		memcpy(dst, &stdin_buf[stdin_ptr], nthis);
+
+		stdin_ptr += nthis;
+		count -= nthis;
+		dst += nthis;
+	}
+
+	return (dst - (char *) buf);
+}
--- a/poetry.lock
+++ b/poetry.lock
@@ -1568,7 +1568,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.9"
-content-hash = "17cdbfe90f1b06dffaf24c3e076384ec08dd4a2dce5a05e50565f7364932eb2d"
+content-hash = "9352a89d49d34807f6a58f6c3f898acbd8cf3570e0f45ede973673644bde4d0e"

 [metadata.files]
 aiopg = [
@@ -1978,6 +1978,7 @@ prometheus-client = [
 psycopg2-binary = [
    {file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
+    {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
@@ -2011,6 +2012,7 @@ psycopg2-binary = [
    {file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
    {file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
+    {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
@@ -2022,6 +2024,7 @@ psycopg2-binary = [
    {file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
+    {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
@@ -2038,18 +2041,7 @@ py = [
    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
 ]
 pyasn1 = [
-    {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"},
-    {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"},
-    {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"},
-    {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"},
    {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
-    {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"},
-    {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"},
-    {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"},
-    {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"},
-    {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"},
-    {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"},
-    {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"},
    {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
 ]
 pycodestyle = [
@@ -2159,6 +2151,13 @@ pyyaml = [
    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
    {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
    {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
+    {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
+    {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
+    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
+    {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
+    {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
    {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -4,7 +4,7 @@
 # version, we can consider updating.
 # See https://tracker.debian.org/pkg/rustc for more details on Debian rustc package,
 # we use "unstable" version number as the highest version used in the project by default.
-channel = "1.61" # do update GitHub CI cache values for rust builds, when changing this value
+channel = "1.62.1" # do update GitHub CI cache values for rust builds, when changing this value
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -12,7 +12,7 @@ fs2 = "0.4.3"
 serde_json = "1"
 tracing = "0.1.27"
 clap = "4.0"
-daemonize = "0.4.1"
+nix = "0.25"
 tokio = { version = "1.17", features = ["macros", "fs"] }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -4,8 +4,7 @@
 use anyhow::{bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, Command};
 use const_format::formatcp;
-use daemonize::Daemonize;
-use fs2::FileExt;
+use nix::unistd::Pid;
 use remote_storage::RemoteStorageConfig;
 use std::fs::{self, File};
 use std::io::{ErrorKind, Write};
@@ -16,6 +15,7 @@ use tokio::sync::mpsc;
 use toml_edit::Document;
 use tracing::*;
 use url::{ParseError, Url};
+use utils::lock_file;

 use metrics::set_build_info_metric;
 use safekeeper::broker;
@@ -35,12 +35,10 @@ use utils::{
    http::endpoint,
    id::NodeId,
    logging::{self, LogFormat},
-    project_git_version,
-    shutdown::exit_now,
-    signals, tcp_listener,
+    project_git_version, signals, tcp_listener,
 };

-const LOCK_FILE_NAME: &str = "safekeeper.lock";
+const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";
 project_git_version!(GIT_VERSION);

@@ -65,10 +63,6 @@ fn main() -> anyhow::Result<()> {
        conf.no_sync = true;
    }

-    if arg_matches.get_flag("daemonize") {
-        conf.daemonize = true;
-    }
-
    if let Some(addr) = arg_matches.get_one::<String>("listen-pg") {
        conf.listen_pg_addr = addr.to_string();
    }
@@ -143,19 +137,33 @@ fn main() -> anyhow::Result<()> {
 }

 fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bool) -> Result<()> {
-    let log_file = logging::init("safekeeper.log", conf.daemonize, conf.log_format)?;
-
+    logging::init(conf.log_format)?;
    info!("version: {GIT_VERSION}");

    // Prevent running multiple safekeepers on the same directory
-    let lock_file_path = conf.workdir.join(LOCK_FILE_NAME);
-    let lock_file = File::create(&lock_file_path).context("failed to open lockfile")?;
-    lock_file.try_lock_exclusive().with_context(|| {
-        format!(
-            "control file {} is locked by some other process",
-            lock_file_path.display()
-        )
-    })?;
+    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
+    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
+        lock_file::LockCreationResult::Created {
+            new_lock_contents,
+            file,
+        } => {
+            info!("Created lock file at {lock_file_path:?} with contents {new_lock_contents}");
+            file
+        }
+        lock_file::LockCreationResult::AlreadyLocked {
+            existing_lock_contents,
+        } => anyhow::bail!(
+            "Could not lock pid file; safekeeper is already running in {:?} with PID {}",
+            conf.workdir,
+            existing_lock_contents
+        ),
+        lock_file::LockCreationResult::CreationFailed(e) => {
+            return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
+        }
+    };
+    // ensure that the lock file is held even if the main thread of the process is panics
+    // we need to release the lock file only when the current process is gone
+    let _ = Box::leak(Box::new(lock_file));

    // Set or read our ID.
    set_id(&mut conf, given_id)?;
@@ -187,31 +195,6 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        }
    };

-    // XXX: Don't spawn any threads before daemonizing!
-    if conf.daemonize {
-        info!("daemonizing...");
-
-        // There should'n be any logging to stdin/stdout. Redirect it to the main log so
-        // that we will see any accidental manual fprintf's or backtraces.
-        let stdout = log_file.try_clone().unwrap();
-        let stderr = log_file;
-
-        let daemonize = Daemonize::new()
-            .pid_file("safekeeper.pid")
-            .working_directory(Path::new("."))
-            .stdout(stdout)
-            .stderr(stderr);
-
-        // XXX: The parent process should exit abruptly right after
-        // it has spawned a child to prevent coverage machinery from
-        // dumping stats into a `profraw` file now owned by the child.
-        // Otherwise, the coverage data will be damaged.
-        match daemonize.exit_action(|| exit_now(0)).start() {
-            Ok(_) => info!("Success, daemonized"),
-            Err(err) => bail!("Error: {err}. could not daemonize. bailing."),
-        }
-    }
-
    // Register metrics collector for active timelines. It's important to do this
    // after daemonizing, otherwise process collector will be upset.
    let timeline_collector = safekeeper::metrics::TimelineCollector::new();
@@ -384,13 +367,6 @@ fn cli() -> Command {
                .short('p')
                .long("pageserver"),
        )
-        .arg(
-            Arg::new("daemonize")
-                .short('d')
-                .long("daemonize")
-                .action(ArgAction::SetTrue)
-                .help("Run in the background"),
-        )
        .arg(
            Arg::new("no-sync")
                .short('n')
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -54,7 +54,6 @@ pub struct SafeKeeperConf {
    // data directories to avoid clashing with each other.
    pub workdir: PathBuf,

-    pub daemonize: bool,
    pub no_sync: bool,
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
@@ -88,7 +87,6 @@ impl Default for SafeKeeperConf {
            // command line, so that when the server is running, all paths are relative
            // to that.
            workdir: PathBuf::from("./"),
-            daemonize: false,
            no_sync: false,
            listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -4,18 +4,12 @@
 # Outline of steps:
 # 1. Get `(last_lsn, prev_lsn)` from old pageserver
 # 2. Get `fullbackup` from old pageserver, which creates a basebackup tar file
-# 3. This tar file might be missing relation files for empty relations, if the pageserver
-#    is old enough (we didn't always store those). So to recreate them, we start a local
-#    vanilla postgres on this basebackup and ask it what relations should exist, then touch
-#    any missing files and re-pack the tar.
-#    TODO This functionality is no longer needed, so we can delete it later if we don't
-#         end up using the same utils for the pg 15 upgrade. Not sure.
-# 4. We import the patched basebackup into a new pageserver
-# 5. We export again via fullbackup, now from the new pageserver and compare the returned
+# 3. We import the basebackup into a new pageserver
+# 4. We export again via fullbackup, now from the new pageserver and compare the returned
 #    tar file with the one we imported. This confirms that we imported everything that was
 #    exported, but doesn't guarantee correctness (what if we didn't **export** everything
 #    initially?)
-# 6. We wait for the new pageserver's remote_consistent_lsn to catch up
+# 5. We wait for the new pageserver's remote_consistent_lsn to catch up
 #
 # For more context on how to use this, see:
 # https://github.com/neondatabase/cloud/wiki/Storage-format-migration
@@ -24,17 +18,13 @@ import argparse
 import os
 import shutil
 import subprocess
-import tempfile
 import time
 import uuid
-from contextlib import closing
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, cast
+from typing import Any, Dict, List, Optional

 import psycopg2
 import requests
-from psycopg2.extensions import connection as PgConnection
-from psycopg2.extensions import parse_dsn

 ###############################################
 ### client-side utils copied from test fixtures
@@ -135,105 +125,6 @@ class PgBin:
        )


-class PgProtocol:
-    """Reusable connection logic"""
-
-    def __init__(self, **kwargs):
-        self.default_options = kwargs
-
-    def conn_options(self, **kwargs):
-        conn_options = self.default_options.copy()
-        if "dsn" in kwargs:
-            conn_options.update(parse_dsn(kwargs["dsn"]))
-        conn_options.update(kwargs)
-
-        # Individual statement timeout in seconds. 2 minutes should be
-        # enough for our tests, but if you need a longer, you can
-        # change it by calling "SET statement_timeout" after
-        # connecting.
-        conn_options["options"] = f"-cstatement_timeout=120s {conn_options.get('options', '')}"
-
-        return conn_options
-
-    # autocommit=True here by default because that's what we need most of the time
-    def connect(self, autocommit=True, **kwargs) -> PgConnection:
-        """
-        Connect to the node.
-        Returns psycopg2's connection object.
-        This method passes all extra params to connstr.
-        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
-
-        # WARNING: this setting affects *all* tests!
-        conn.autocommit = autocommit
-        return conn
-
-    def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
-        """
-        Execute query against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        return self.safe_psql_many([query], **kwargs)[0]
-
-    def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
-        """
-        Execute queries against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        result: List[List[Any]] = []
-        with closing(self.connect(**kwargs)) as conn:
-            with conn.cursor() as cur:
-                for query in queries:
-                    print(f"Executing query: {query}")
-                    cur.execute(query)
-
-                    if cur.description is None:
-                        result.append([])  # query didn't return data
-                    else:
-                        result.append(cast(List[Any], cur.fetchall()))
-        return result
-
-
-class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
-        super().__init__(host="localhost", port=port, dbname="postgres")
-        self.pgdatadir = pgdatadir
-        self.pg_bin = pg_bin
-        self.running = False
-        if init:
-            self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
-        self.configure([f"port = {port}\n"])
-
-    def configure(self, options: List[str]):
-        """Append lines into postgresql.conf file."""
-        assert not self.running
-        with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
-            conf_file.write("\n".join(options))
-
-    def start(self, log_path: Optional[str] = None):
-        assert not self.running
-        self.running = True
-
-        if log_path is None:
-            log_path = os.path.join(self.pgdatadir, "pg.log")
-
-        self.pg_bin.run_capture(
-            ["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"]
-        )
-
-    def stop(self):
-        assert self.running
-        self.running = False
-        self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"])
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        if self.running:
-            self.stop()
-
-
 class NeonPageserverApiException(Exception):
    pass

@@ -370,84 +261,6 @@ def pack_base(log_dir, restored_dir, output_tar):
    shutil.move(tmp_tar_path, output_tar)


-def reconstruct_paths(log_dir, pg_bin, base_tar):
-    """Reconstruct what relation files should exist in the datadir by querying postgres."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        # Start a vanilla postgres from the given datadir and query it to find
-        # what relfiles should exist, but possibly don't.
-        port = "55439"  # Probably free
-        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
-            vanilla_pg.configure([f"port={port}"])
-            vanilla_pg.start(log_path=os.path.join(log_dir, "tmp_pg.log"))
-
-            # Create database based on template0 because we can't connect to template0
-            query = "create database template0copy template template0"
-            vanilla_pg.safe_psql(query, user="cloud_admin")
-            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
-
-            # Get all databases
-            query = "select oid, datname from pg_database"
-            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
-            template0_oid = [
-                oid for (oid, database) in oid_dbname_pairs if database == "template0"
-            ][0]
-
-            # Get rel paths for each database
-            for oid, database in oid_dbname_pairs:
-                if database == "template0":
-                    # We can't connect to template0
-                    continue
-
-                query = "select relname, pg_relation_filepath(oid) from pg_class"
-                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
-                for relname, filepath in result:
-                    if filepath is not None:
-
-                        if database == "template0copy":
-                            # Add all template0copy paths to template0
-                            prefix = f"base/{oid}/"
-                            if filepath.startswith(prefix):
-                                suffix = filepath[len(prefix) :]
-                                yield f"base/{template0_oid}/{suffix}"
-                            elif filepath.startswith("global"):
-                                print(f"skipping {database} global file {filepath}")
-                            else:
-                                raise AssertionError
-                        else:
-                            yield filepath
-
-
-def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
-    """Add the appropriate empty files to a basebadkup tar."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
-
-        # Touch files that don't exist
-        for path in paths:
-            absolute_path = os.path.join(restored_dir, path)
-            exists = os.path.exists(absolute_path)
-            if not exists:
-                print(f"File {absolute_path} didn't exist. Creating..")
-                Path(absolute_path).touch()
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-
-# HACK This is a workaround for exporting from old pageservers that
-#      can't export empty relations. In this case we need to start
-#      a vanilla postgres from the exported datadir, and query it
-#      to see what empty relations are missing, and then create
-#      those empty files before importing.
-def add_missing_rels(base_tar, output_tar, log_dir, pg_bin):
-    reconstructed_paths = set(reconstruct_paths(log_dir, pg_bin, base_tar))
-    touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
-
-
 def get_rlsn(pageserver_connstr, tenant_id, timeline_id):
    conn = psycopg2.connect(pageserver_connstr)
    conn.autocommit = True
@@ -516,7 +329,6 @@ def export_timeline(
    pg_version,
 ):
    # Choose filenames
-    incomplete_filename = tar_filename + ".incomplete"
    stderr_filename = os.path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr")

    # Construct export command
@@ -525,18 +337,14 @@ def export_timeline(

    # Run export command
    print(f"Running: {cmd}")
-    with open(incomplete_filename, "w") as stdout_f:
+    with open(tar_filename, "w") as stdout_f:
        with open(stderr_filename, "w") as stderr_f:
-            print(f"(capturing output to {incomplete_filename})")
+            print(f"(capturing output to {tar_filename})")
            pg_bin = PgBin(args.work_dir, args.pg_distrib_dir, pg_version)
            subprocess.run(
                cmd, stdout=stdout_f, stderr=stderr_f, env=pg_bin._build_env(None), check=True
            )

-    # Add missing rels
-    pg_bin = PgBin(args.work_dir, args.pg_distrib_dir, pg_version)
-    add_missing_rels(incomplete_filename, tar_filename, args.work_dir, pg_bin)
-
    # Log more info
    file_size = os.path.getsize(tar_filename)
    print(f"Done export: {tar_filename}, size {file_size}")
@@ -633,6 +441,13 @@ def main(args: argparse.Namespace):
                raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}")


+def non_zero_tcp_port(arg: Any):
+    port = int(arg)
+    if port < 1 or port > 65535:
+        raise argparse.ArgumentTypeError(f"invalid tcp port: {arg}")
+    return port
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
@@ -728,5 +543,13 @@ if __name__ == "__main__":
        default=False,
        help="directory where temporary tar files are stored",
    )
+    parser.add_argument(
+        "--tmp-pg-port",
+        dest="tmp_pg_port",
+        required=False,
+        default=55439,
+        type=non_zero_tcp_port,
+        help="localhost port to use for temporary postgres instance",
+    )
    args = parser.parse_args()
    main(args)
--- a/scripts/reformat
+++ b/scripts/reformat
@@ -6,6 +6,6 @@ set -euox pipefail
 echo 'Reformatting Rust code'
 cargo fmt
 echo 'Reformatting Python code'
-poetry run isort test_runner
-poetry run flake8 test_runner
-poetry run black test_runner
+poetry run isort test_runner scripts
+poetry run flake8 test_runner scripts
+poetry run black test_runner scripts
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -19,7 +19,7 @@ from dataclasses import dataclass, field
 from enum import Flag, auto
 from functools import cached_property
 from pathlib import Path
-from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast

 import asyncpg
 import backoff  # type: ignore
@@ -36,7 +36,7 @@ from psycopg2.extensions import connection as PgConnection
 from psycopg2.extensions import make_dsn, parse_dsn
 from typing_extensions import Literal

-from .utils import allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture
+from .utils import Fn, allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture

 """
 This file contains pytest fixtures. A fixture is a test resource that can be
@@ -56,7 +56,6 @@ put directly-importable functions into utils.py or another separate file.
 """

 Env = Dict[str, str]
-Fn = TypeVar("Fn", bound=Callable[..., Any])

 DEFAULT_OUTPUT_DIR = "test_output"
 DEFAULT_BRANCH_NAME = "main"
@@ -965,11 +964,11 @@ def neon_env_builder(
        yield builder


-class NeonPageserverApiException(Exception):
+class PageserverApiException(Exception):
    pass


-class NeonPageserverHttpClient(requests.Session):
+class PageserverHttpClient(requests.Session):
    def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
        super().__init__()
        self.port = port
@@ -987,7 +986,7 @@ class NeonPageserverHttpClient(requests.Session):
                msg = res.json()["msg"]
            except:  # noqa: E722
                msg = ""
-            raise NeonPageserverApiException(msg) from e
+            raise PageserverApiException(msg) from e

    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
@@ -1624,8 +1623,6 @@ class ComputeCtl(AbstractNeonCli):
 class NeonPageserver(PgProtocol):
    """
    An object representing a running pageserver.
-
-    Initializes the repository via `neon init`.
    """

    TEMP_FILE_SUFFIX = "___temp"
@@ -1674,8 +1671,8 @@ class NeonPageserver(PgProtocol):
        if '"profiling"' not in self.version:
            pytest.skip("pageserver was built without 'profiling' feature")

-    def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
-        return NeonPageserverHttpClient(
+    def http_client(self, auth_token: Optional[str] = None) -> PageserverHttpClient:
+        return PageserverHttpClient(
            port=self.service_port.http,
            auth_token=auth_token,
            is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
@@ -2260,11 +2257,6 @@ class PostgresFactory:
        return self


-def read_pid(path: Path) -> int:
-    """Read content of file into number"""
-    return int(path.read_text())
-
-
@dataclass
 class SafekeeperPort:
    pg: int
@@ -2688,26 +2680,8 @@ def check_restored_datadir_content(
    assert (mismatch, error) == ([], [])


-def wait_until(number_of_iterations: int, interval: float, func):
-    """
-    Wait until 'func' returns successfully, without exception. Returns the
-    last return value from the function.
-    """
-    last_exception = None
-    for i in range(number_of_iterations):
-        try:
-            res = func()
-        except Exception as e:
-            log.info("waiting for %s iteration %s failed", func, i + 1)
-            last_exception = e
-            time.sleep(interval)
-            continue
-        return res
-    raise Exception("timed out while waiting for %s" % func) from last_exception
-
-
 def assert_no_in_progress_downloads_for_tenant(
-    pageserver_http_client: NeonPageserverHttpClient,
+    pageserver_http_client: PageserverHttpClient,
    tenant: TenantId,
 ):
    tenant_status = pageserver_http_client.tenant_status(tenant)
@@ -2715,7 +2689,7 @@ def assert_no_in_progress_downloads_for_tenant(


 def remote_consistent_lsn(
-    pageserver_http_client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
+    pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
 ) -> Lsn:
    detail = pageserver_http_client.timeline_detail(tenant, timeline)

@@ -2730,7 +2704,7 @@ def remote_consistent_lsn(


 def wait_for_upload(
-    pageserver_http_client: NeonPageserverHttpClient,
+    pageserver_http_client: PageserverHttpClient,
    tenant: TenantId,
    timeline: TimelineId,
    lsn: Lsn,
@@ -2754,7 +2728,7 @@ def wait_for_upload(


 def last_record_lsn(
-    pageserver_http_client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
+    pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
 ) -> Lsn:
    detail = pageserver_http_client.timeline_detail(tenant, timeline)

@@ -2764,7 +2738,7 @@ def last_record_lsn(


 def wait_for_last_record_lsn(
-    pageserver_http_client: NeonPageserverHttpClient,
+    pageserver_http_client: PageserverHttpClient,
    tenant: TenantId,
    timeline: TimelineId,
    lsn: Lsn,
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -4,13 +4,16 @@ import re
 import shutil
 import subprocess
 import tarfile
+import time
 from pathlib import Path
-from typing import Any, List, Tuple
+from typing import Any, Callable, List, Tuple, TypeVar

 import allure  # type: ignore
 from fixtures.log_helper import log
 from psycopg2.extensions import cursor

+Fn = TypeVar("Fn", bound=Callable[..., Any])
+

 def get_self_dir() -> str:
    """Get the path to the directory where this script lives."""
@@ -188,3 +191,57 @@ def allure_attach_from_dir(dir: Path):
                extension = attachment.suffix.removeprefix(".")

            allure.attach.file(source, name, attachment_type, extension)
+
+
+def start_in_background(
+    command: list[str], cwd: Path, log_file_name: str, is_started: Fn
+) -> subprocess.Popen[bytes]:
+    """Starts a process, creates the logfile and redirects stderr and stdout there. Runs the start checks before the process is started, or errors."""
+
+    log.info(f'Running command "{" ".join(command)}"')
+
+    with open(cwd / log_file_name, "wb") as log_file:
+        spawned_process = subprocess.Popen(command, stdout=log_file, stderr=log_file, cwd=cwd)
+        error = None
+        try:
+            return_code = spawned_process.poll()
+            if return_code is not None:
+                error = f"expected subprocess to run but it exited with code {return_code}"
+            else:
+                attempts = 10
+                try:
+                    wait_until(
+                        number_of_iterations=attempts,
+                        interval=1,
+                        func=is_started,
+                    )
+                except Exception:
+                    error = f"Failed to get correct status from subprocess in {attempts} attempts"
+        except Exception as e:
+            error = f"expected subprocess to start but it failed with exception: {e}"
+
+        if error is not None:
+            log.error(error)
+            spawned_process.kill()
+            raise Exception(f"Failed to run subprocess as {command}, reason: {error}")
+
+        log.info("subprocess spawned")
+        return spawned_process
+
+
+def wait_until(number_of_iterations: int, interval: float, func: Fn):
+    """
+    Wait until 'func' returns successfully, without exception. Returns the
+    last return value from the function.
+    """
+    last_exception = None
+    for i in range(number_of_iterations):
+        try:
+            res = func()
+        except Exception as e:
+            log.info("waiting for %s iteration %s failed", func, i + 1)
+            last_exception = e
+            time.sleep(interval)
+            continue
+        return res
+    raise Exception("timed out while waiting for %s" % func) from last_exception
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -1,7 +1,7 @@
 from contextlib import closing

 import pytest
-from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
+from fixtures.neon_fixtures import NeonEnvBuilder, PageserverApiException
 from fixtures.types import TenantId


@@ -39,7 +39,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):

    # fail to create branch using token with different tenant_id
    with pytest.raises(
-        NeonPageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
+        PageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
    ):
        invalid_tenant_http_client.timeline_create(
            tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id
@@ -50,7 +50,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):

    # fail to create tenant using tenant token
    with pytest.raises(
-        NeonPageserverApiException,
+        PageserverApiException,
        match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
    ):
        tenant_http_client.tenant_create()
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -10,7 +10,7 @@ import toml
 from fixtures.neon_fixtures import (
    NeonCli,
    NeonEnvBuilder,
-    NeonPageserverHttpClient,
+    PageserverHttpClient,
    PgBin,
    PortDistributor,
    wait_for_last_record_lsn,
@@ -177,7 +177,7 @@ def test_backward_compatibility(
        cli.raw_cli(["start"])
        request.addfinalizer(lambda: cli.raw_cli(["stop"]))

-        result = cli.pg_start("main")
+        result = cli.pg_start("main", port=port_distributor.get_port())
        request.addfinalizer(lambda: cli.pg_stop("main"))
    except Exception:
        breaking_changes_allowed = (
@@ -208,7 +208,7 @@ def test_backward_compatibility(
    timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
    pageserver_port = snapshot_config["pageserver"]["listen_http_addr"].split(":")[-1]
    auth_token = snapshot_config["pageserver"]["auth_token"]
-    pageserver_http = NeonPageserverHttpClient(
+    pageserver_http = PageserverHttpClient(
        port=pageserver_port,
        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
        auth_token=auth_token,
--- a/test_runner/regress/test_neon_cli.py
+++ b/test_runner/regress/test_neon_cli.py
@@ -5,13 +5,13 @@ from fixtures.neon_fixtures import (
    DEFAULT_BRANCH_NAME,
    NeonEnv,
    NeonEnvBuilder,
-    NeonPageserverHttpClient,
+    PageserverHttpClient,
 )
 from fixtures.types import TenantId, TimelineId


 def helper_compare_timeline_list(
-    pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv, initial_tenant: TenantId
+    pageserver_http_client: PageserverHttpClient, env: NeonEnv, initial_tenant: TenantId
 ):
    """
    Compare timelines list returned by CLI and directly via API.
@@ -56,7 +56,7 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
    assert nested_timeline_id in timelines_cli


-def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
+def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env: NeonEnv):
    tenants = pageserver_http_client.tenant_list()
    tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))

--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -1,9 +1,9 @@
 import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PageserverHttpClient


-def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
+def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
    tenant_id, timeline_id = env.neon_cli.create_tenant()
    pg = env.postgres.create_start("main", tenant_id=tenant_id)
    # we rely upon autocommit after each statement
--- a/test_runner/regress/test_pageserver_api.py
+++ b/test_runner/regress/test_pageserver_api.py
@@ -6,12 +6,12 @@ from fixtures.neon_fixtures import (
    DEFAULT_BRANCH_NAME,
    NeonEnv,
    NeonEnvBuilder,
-    NeonPageserverHttpClient,
+    PageserverHttpClient,
    neon_binpath,
    pg_distrib_dir,
-    wait_until,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
+from fixtures.utils import wait_until


 # test that we cannot override node id after init
@@ -29,8 +29,9 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
            stderr=subprocess.PIPE,
        )

-    # remove initial config
+    # remove initial config and stop existing pageserver
    pageserver_config.unlink()
+    neon_simple_env.pageserver.stop()

    bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'])
    assert (
@@ -60,7 +61,7 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
    assert "has node id already, it cannot be overridden" in bad_update.stderr


-def check_client(client: NeonPageserverHttpClient, initial_tenant: TenantId):
+def check_client(client: PageserverHttpClient, initial_tenant: TenantId):
    client.check_status()

    # check initial tenant is there
@@ -116,7 +117,7 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):


 def expect_updated_msg_lsn(
-    client: NeonPageserverHttpClient,
+    client: PageserverHttpClient,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    prev_msg_lsn: Optional[Lsn],
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -15,10 +15,9 @@ from fixtures.neon_fixtures import (
    available_remote_storages,
    wait_for_last_record_lsn,
    wait_for_upload,
-    wait_until,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import query_scalar
+from fixtures.utils import query_scalar, wait_until


 #
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -2,16 +2,12 @@ from threading import Thread

 import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import (
-    NeonEnvBuilder,
-    NeonPageserverApiException,
-    NeonPageserverHttpClient,
-)
+from fixtures.neon_fixtures import NeonEnvBuilder, PageserverApiException, PageserverHttpClient
 from fixtures.types import TenantId, TimelineId


 def do_gc_target(
-    pageserver_http: NeonPageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
+    pageserver_http: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
 ):
    """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
    try:
@@ -27,7 +23,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    # first check for non existing tenant
    tenant_id = TenantId.generate()
    with pytest.raises(
-        expected_exception=NeonPageserverApiException,
+        expected_exception=PageserverApiException,
        match=f"Tenant not found for id {tenant_id}",
    ):
        pageserver_http.tenant_detach(tenant_id)
@@ -49,7 +45,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):

    # gc should not try to even start
    with pytest.raises(
-        expected_exception=NeonPageserverApiException, match="gc target timeline does not exist"
+        expected_exception=PageserverApiException, match="gc target timeline does not exist"
    ):
        bogus_timeline_id = TimelineId.generate()
        pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
@@ -78,6 +74,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()

    with pytest.raises(
-        expected_exception=NeonPageserverApiException, match=f"Tenant {tenant_id} not found"
+        expected_exception=PageserverApiException, match=f"Tenant {tenant_id} not found"
    ):
        pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -1,7 +1,5 @@
 import os
 import pathlib
-import signal
-import subprocess
 import threading
 from contextlib import closing, contextmanager
 from typing import Any, Dict, Optional, Tuple
@@ -12,7 +10,7 @@ from fixtures.neon_fixtures import (
    Etcd,
    NeonEnv,
    NeonEnvBuilder,
-    NeonPageserverHttpClient,
+    PageserverHttpClient,
    PortDistributor,
    Postgres,
    assert_no_in_progress_downloads_for_tenant,
@@ -21,10 +19,9 @@ from fixtures.neon_fixtures import (
    pg_distrib_dir,
    wait_for_last_record_lsn,
    wait_for_upload,
-    wait_until,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import query_scalar, subprocess_capture
+from fixtures.utils import query_scalar, start_in_background, subprocess_capture, wait_until


 def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
@@ -32,7 +29,7 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):


@contextmanager
-def new_pageserver_helper(
+def new_pageserver_service(
    new_pageserver_dir: pathlib.Path,
    pageserver_bin: pathlib.Path,
    remote_storage_mock_path: pathlib.Path,
@@ -49,7 +46,6 @@ def new_pageserver_helper(
        str(pageserver_bin),
        "--workdir",
        str(new_pageserver_dir),
-        "--daemonize",
        "--update-config",
        f"-c listen_pg_addr='localhost:{pg_port}'",
        f"-c listen_http_addr='localhost:{http_port}'",
@@ -61,16 +57,26 @@ def new_pageserver_helper(
        cmd.append(
            f"-c broker_endpoints=['{broker.client_url()}']",
        )
-
-    log.info("starting new pageserver %s", cmd)
-    out = subprocess.check_output(cmd, text=True)
-    log.info("started new pageserver %s", out)
+    pageserver_client = PageserverHttpClient(
+        port=http_port,
+        auth_token=None,
+        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
+    )
    try:
-        yield
+        pageserver_process = start_in_background(
+            cmd, new_pageserver_dir, "pageserver.log", pageserver_client.check_status
+        )
+    except Exception as e:
+        log.error(e)
+        pageserver_process.kill()
+        raise Exception(f"Failed to start pageserver as {cmd}, reason: {e}")
+
+    log.info("new pageserver started")
+    try:
+        yield pageserver_process
    finally:
        log.info("stopping new pageserver")
-        pid = int((new_pageserver_dir / "pageserver.pid").read_text())
-        os.kill(pid, signal.SIGQUIT)
+        pageserver_process.kill()


@contextmanager
@@ -113,7 +119,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
 def populate_branch(
    pg: Postgres,
    tenant_id: TenantId,
-    ps_http: NeonPageserverHttpClient,
+    ps_http: PageserverHttpClient,
    create_table: bool,
    expected_sum: Optional[int],
 ) -> Tuple[TimelineId, Lsn]:
@@ -146,7 +152,7 @@ def populate_branch(


 def ensure_checkpoint(
-    pageserver_http: NeonPageserverHttpClient,
+    pageserver_http: PageserverHttpClient,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    current_lsn: Lsn,
@@ -159,7 +165,7 @@ def ensure_checkpoint(


 def check_timeline_attached(
-    new_pageserver_http_client: NeonPageserverHttpClient,
+    new_pageserver_http_client: PageserverHttpClient,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    old_timeline_detail: Dict[str, Any],
@@ -346,13 +352,13 @@ def test_tenant_relocation(
    log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
    pageserver_bin = pathlib.Path(neon_binpath) / "pageserver"

-    new_pageserver_http = NeonPageserverHttpClient(
+    new_pageserver_http = PageserverHttpClient(
        port=new_pageserver_http_port,
        auth_token=None,
        is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
    )

-    with new_pageserver_helper(
+    with new_pageserver_service(
        new_pageserver_dir,
        pageserver_bin,
        remote_storage_mock_path,
@@ -386,6 +392,8 @@ def test_tenant_relocation(
                pg_distrib_dir,
                "--work-dir",
                os.path.join(test_output_dir),
+                "--tmp-pg-port",
+                str(port_distributor.get_port()),
            ]
            subprocess_capture(test_output_dir, cmd, check=True)
        elif method == "minor":
--- a/test_runner/regress/test_tenant_tasks.py
+++ b/test_runner/regress/test_tenant_tasks.py
@@ -1,6 +1,7 @@
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.types import TenantId, TimelineId
+from fixtures.utils import wait_until


 def get_only_element(l):  # noqa: E741
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -25,10 +25,9 @@ from fixtures.neon_fixtures import (
    available_remote_storages,
    wait_for_last_record_lsn,
    wait_for_upload,
-    wait_until,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import query_scalar
+from fixtures.utils import query_scalar, wait_until


 async def tenant_workload(env: NeonEnv, pg: Postgres):
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -1,6 +1,7 @@
 import pytest
-from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until
+from fixtures.neon_fixtures import NeonEnv, PageserverApiException
 from fixtures.types import TenantId, TimelineId
+from fixtures.utils import wait_until


 def test_timeline_delete(neon_simple_env: NeonEnv):
@@ -11,13 +12,13 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
    # first try to delete non existing timeline
    # for existing tenant:
    invalid_timeline_id = TimelineId.generate()
-    with pytest.raises(NeonPageserverApiException, match="timeline not found"):
+    with pytest.raises(PageserverApiException, match="timeline not found"):
        ps_http.timeline_delete(tenant_id=env.initial_tenant, timeline_id=invalid_timeline_id)

    # for non existing tenant:
    invalid_tenant_id = TenantId.generate()
    with pytest.raises(
-        NeonPageserverApiException,
+        PageserverApiException,
        match=f"Tenant {invalid_tenant_id} not found in the local state",
    ):
        ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
@@ -32,7 +33,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):

    ps_http = env.pageserver.http_client()
    with pytest.raises(
-        NeonPageserverApiException, match="Cannot delete timeline which has child timelines"
+        PageserverApiException, match="Cannot delete timeline which has child timelines"
    ):

        timeline_path = (
@@ -64,7 +65,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):

    # check 404
    with pytest.raises(
-        NeonPageserverApiException,
+        PageserverApiException,
        match=f"Timeline {env.initial_tenant}/{leaf_timeline_id} was not found",
    ):
        ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -11,7 +11,7 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnv,
    NeonEnvBuilder,
-    NeonPageserverHttpClient,
+    PageserverHttpClient,
    PgBin,
    PortDistributor,
    Postgres,
@@ -462,7 +462,7 @@ def assert_physical_size(env: NeonEnv, tenant_id: TenantId, timeline_id: Timelin
 # Timeline logical size initialization is an asynchronous background task that runs once,
 # try a few times to ensure it's activated properly
 def wait_for_timeline_size_init(
-    client: NeonPageserverHttpClient, tenant: TenantId, timeline: TimelineId
+    client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
 ):
    for i in range(10):
        timeline_details = client.timeline_detail(
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -27,6 +27,7 @@ from fixtures.neon_fixtures import (
    RemoteStorageKind,
    RemoteStorageUsers,
    Safekeeper,
+    SafekeeperHttpClient,
    SafekeeperPort,
    available_remote_storages,
    neon_binpath,
@@ -34,7 +35,7 @@ from fixtures.neon_fixtures import (
    wait_for_upload,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import get_dir_size, query_scalar
+from fixtures.utils import get_dir_size, query_scalar, start_in_background


 def wait_lsn_force_checkpoint(
@@ -841,7 +842,7 @@ class SafekeeperEnv:
        safekeeper_dir = self.repo_dir / f"sk{i}"
        safekeeper_dir.mkdir(exist_ok=True)

-        args = [
+        cmd = [
            self.bin_safekeeper,
            "-l",
            f"127.0.0.1:{port.pg}",
@@ -853,11 +854,22 @@ class SafekeeperEnv:
            str(i),
            "--broker-endpoints",
            self.broker.client_url(),
-            "--daemonize",
        ]
+        log.info(f'Running command "{" ".join(cmd)}"')

-        log.info(f'Running command "{" ".join(args)}"')
-        return subprocess.run(args, check=True)
+        safekeeper_client = SafekeeperHttpClient(
+            port=port.http,
+            auth_token=None,
+        )
+        try:
+            safekeeper_process = start_in_background(
+                cmd, safekeeper_dir, "safekeeper.log", safekeeper_client.check_status
+            )
+            return safekeeper_process
+        except Exception as e:
+            log.error(e)
+            safekeeper_process.kill()
+            raise Exception(f"Failed to start safekepeer as {cmd}, reason: {e}")

    def get_safekeeper_connstrs(self):
        return ",".join([sk_proc.args[2] for sk_proc in self.safekeepers])
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
Author	SHA1	Message	Date
Joonas Koivunen	ca1ed3dc3b	drive by typo fix	2022-11-02 21:11:05 +02:00
Joonas Koivunen	dc2554dff6	chore: remove no longer needed empty rel fix this seems to have been fixed long enough ago.	2022-11-02 21:10:44 +02:00
Joonas Koivunen	5112142997	fix: use different port for temporary postgres (#2743 ) `test_tenant_relocation` ends up starting a temporary postgres instance with a fixed port. the change makes the port configurable at scripts/export_import_between_pageservers.py and uses that in test_tenant_relocation.	2022-11-02 18:37:48 +00:00
bojanserafimov	a0a74868a4	Fix clippy (#2742 )	2022-11-02 12:30:09 -04:00
Christian Schwarz	b154992510	timeline_list_handler: avoid spawn_blocking As per https://github.com/neondatabase/neon/issues/2731#issuecomment-1299335813 refs https://github.com/neondatabase/neon/issues/2731	2022-11-02 16:22:58 +01:00
Christian Schwarz	a86a38c96e	README: fix instructions on how to run tests The `make debug` target doesn't exist, and I can't find it in the Git history.	2022-11-02 16:22:58 +01:00
Christian Schwarz	590f894db8	tenant_status: remove unnecessary spawn_blocking The spawn_blocking is pointless in this cases: get_tenant is not expected to block for any meaningful amount of time. There are get_tenant calls in most other functions in the file too, and they don't bother with spawn_blocking. Let's remove the spawn_blocking from tenant_status, too, to be consistent. fixes https://github.com/neondatabase/neon/issues/2731	2022-11-02 16:22:58 +01:00
Alexander Bayandin	0a0595b98d	test_backward_compatibility: assign random port to compute (#2738 )	2022-11-02 15:22:38 +00:00
Dmitry Rodionov	e56d11c8e1	fix style if possible (cannot really split long lines in mermaid)	2022-11-02 17:15:49 +02:00
Dmitry Rodionov	ccdc3188ed	update according to discussion and comments	2022-11-02 17:15:49 +02:00
Dmitry Rodionov	67401cbdb8	pageserver s3 coordination	2022-11-02 17:15:49 +02:00
Kirill Bulatov	d42700280f	Remove daemonize from storage components (#2677 ) Move daemonization logic into `control_plane`. Storage binaries now only crate a lockfile to avoid concurrent services running in the same directory.	2022-11-02 02:26:37 +02:00
Kirill Bulatov	6df4d5c911	Bump rustc to 1.62.1 (#2728 ) Changelog: https://github.com/rust-lang/rust/blob/master/RELEASES.md#version-1621-2022-07-19	2022-11-02 01:21:33 +02:00
Dmitry Rodionov	32d14403bd	remove wrong is_active filter for timelines in compaction/gc Gc needs to know about all branch points, not only ones for timelines that are active at the moment of gc. If timeline is inactive then we wont know about branch point. In this case gc can delete data that is needed by child timeline. For compaction it is less severe. Delaying compaction can cause an effect on performance. So it is still better to run it. There is a logic to exit it quickly if there is nothing to compact	2022-11-01 18:07:08 +02:00
Dmitry Ivanov	0df3467146	Refactoring: replace `utils::connstring` with `Url`-based APIs	2022-11-01 18:17:36 +03:00
Dmitry Rodionov	c64a121aa8	do not nest wal_connection_manager span inside parent one	2022-11-01 15:08:23 +02:00
Heikki Linnakangas	22cc8760b9	Move walredo process code under pgxn in the main 'neon' repository. - Refactor the way the WalProposerMain function is called when started with --sync-safekeepers. The postgres binary now explicitly loads the 'neon.so' library and calls the WalProposerMain in it. This is simpler than the global function callback "hook" we previously used. - Move the WAL redo process code to a new library, neon_walredo.so, and use the same mechanism as for --sync-safekeepers to call the WalRedoMain function, when launched with --walredo argument. - Also move the seccomp code to neon_walredo.so library. I kept the configure check in the postgres side for now, though.	2022-10-31 01:11:50 +01:00