Report startup metrics and failure reason from compute_ctl (#1581)

+ neondatabase/cloud#1103 This adds a couple of control endpoints to simplify compute state discovery for control-plane. For example, now we may figure out that Postgres wasn't able to start or basebackup failed within seconds instead of just blindly polling the compute readiness for a minute or two. Also we now expose startup metrics (time of the each step: basebackup, sync safekeepers, config, total). Console grabs them after each successful start and report as histogram to prometheus and grafana. OpenAPI spec is added and up-tp date, but is not currently used in the console yet.
2026-01-06 04:52:55 +00:00 · 2022-05-09 19:45:28 +03:00
parent b9f84f4a83
commit 772c2fb4ff
18 changed files with 787 additions and 429 deletions
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -1,9 +1,9 @@
 # Compute node tools

-Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
-`ExecStart` option. It will handle all the `zenith` specifics during compute node
+Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+`ExecStart` option. It will handle all the `Neon` specifics during compute node
 initialization:
- `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
+- `compute_ctl` accepts cluster (compute node) specification as a JSON file.
 - Every start is a fresh start, so the data directory is removed and
  initialized again on each run.
 - Next it will put configuration files into the `PGDATA` directory.
@@ -13,18 +13,18 @@ initialization:
 - Check and alter/drop/create roles and databases.
 - Hang waiting on the `postmaster` process to exit.

-Also `zenith_ctl` spawns two separate service threads:
+Also `compute_ctl` spawns two separate service threads:
 - `compute-monitor` checks the last Postgres activity timestamp and saves it
-  into the shared `ComputeState`;
+  into the shared `ComputeNode`;
 - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
  last activity requests.

 Usage example:
 ```sh
-zenith_ctl -D /var/db/postgres/compute \
-           -C 'postgresql://zenith_admin@localhost/postgres' \
-           -S /var/db/postgres/specs/current.json \
-           -b /usr/local/bin/postgres
+compute_ctl -D /var/db/postgres/compute \
+            -C 'postgresql://zenith_admin@localhost/postgres' \
+            -S /var/db/postgres/specs/current.json \
+            -b /usr/local/bin/postgres
 ```

 ## Tests
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -0,0 +1,174 @@
+//!
+//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
+//! `ExecStart` option. It will handle all the `Neon` specifics during compute node
+//! initialization:
+//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
+//! - Every start is a fresh start, so the data directory is removed and
+//!   initialized again on each run.
+//! - Next it will put configuration files into the `PGDATA` directory.
+//! - Sync safekeepers and get commit LSN.
+//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
+//! - Try to start `postgres` and wait until it is ready to accept connections.
+//! - Check and alter/drop/create roles and databases.
+//! - Hang waiting on the `postmaster` process to exit.
+//!
+//! Also `compute_ctl` spawns two separate service threads:
+//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
+//!   into the shared `ComputeNode`;
+//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
+//!   last activity requests.
+//!
+//! Usage example:
+//! ```sh
+//! compute_ctl -D /var/db/postgres/compute \
+//!             -C 'postgresql://zenith_admin@localhost/postgres' \
+//!             -S /var/db/postgres/specs/current.json \
+//!             -b /usr/local/bin/postgres
+//! ```
+//!
+use std::fs::File;
+use std::panic;
+use std::path::Path;
+use std::process::exit;
+use std::sync::{Arc, RwLock};
+use std::{thread, time::Duration};
+
+use anyhow::Result;
+use chrono::Utc;
+use clap::Arg;
+use log::{error, info};
+
+use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
+use compute_tools::http::api::launch_http_server;
+use compute_tools::logger::*;
+use compute_tools::monitor::launch_monitor;
+use compute_tools::params::*;
+use compute_tools::pg_helpers::*;
+use compute_tools::spec::*;
+
+fn main() -> Result<()> {
+    // TODO: re-use `utils::logging` later
+    init_logger(DEFAULT_LOG_LEVEL)?;
+
+    // Env variable is set by `cargo`
+    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
+    let matches = clap::App::new("compute_ctl")
+        .version(version.unwrap_or("unknown"))
+        .arg(
+            Arg::new("connstr")
+                .short('C')
+                .long("connstr")
+                .value_name("DATABASE_URL")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgdata")
+                .short('D')
+                .long("pgdata")
+                .value_name("DATADIR")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgbin")
+                .short('b')
+                .long("pgbin")
+                .value_name("POSTGRES_PATH"),
+        )
+        .arg(
+            Arg::new("spec")
+                .short('s')
+                .long("spec")
+                .value_name("SPEC_JSON"),
+        )
+        .arg(
+            Arg::new("spec-path")
+                .short('S')
+                .long("spec-path")
+                .value_name("SPEC_PATH"),
+        )
+        .get_matches();
+
+    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
+    let connstr = matches
+        .value_of("connstr")
+        .expect("Postgres connection string is required");
+    let spec = matches.value_of("spec");
+    let spec_path = matches.value_of("spec-path");
+
+    // Try to use just 'postgres' if no path is provided
+    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
+
+    let spec: ComputeSpec = match spec {
+        // First, try to get cluster spec from the cli argument
+        Some(json) => serde_json::from_str(json)?,
+        None => {
+            // Second, try to read it from the file if path is provided
+            if let Some(sp) = spec_path {
+                let path = Path::new(sp);
+                let file = File::open(path)?;
+                serde_json::from_reader(file)?
+            } else {
+                panic!("cluster spec should be provided via --spec or --spec-path argument");
+            }
+        }
+    };
+
+    let pageserver_connstr = spec
+        .cluster
+        .settings
+        .find("zenith.page_server_connstring")
+        .expect("pageserver connstr should be provided");
+    let tenant = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_tenant")
+        .expect("tenant id should be provided");
+    let timeline = spec
+        .cluster
+        .settings
+        .find("zenith.zenith_timeline")
+        .expect("tenant id should be provided");
+
+    let compute_state = ComputeNode {
+        start_time: Utc::now(),
+        connstr: connstr.to_string(),
+        pgdata: pgdata.to_string(),
+        pgbin: pgbin.to_string(),
+        spec,
+        tenant,
+        timeline,
+        pageserver_connstr,
+        metrics: ComputeMetrics::new(),
+        state: RwLock::new(ComputeState::new()),
+    };
+    let compute = Arc::new(compute_state);
+
+    // Launch service threads first, so we were able to serve availability
+    // requests, while configuration is still in progress.
+    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
+    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
+
+    // Run compute (Postgres) and hang waiting on it.
+    match compute.prepare_and_run() {
+        Ok(ec) => {
+            let code = ec.code().unwrap_or(1);
+            info!("Postgres exited with code {}, shutting down", code);
+            exit(code)
+        }
+        Err(error) => {
+            error!("could not start the compute node: {}", error);
+
+            let mut state = compute.state.write().unwrap();
+            state.error = Some(format!("{:?}", error));
+            state.status = ComputeStatus::Failed;
+            drop(state);
+
+            // Keep serving HTTP requests, so the cloud control plane was able to
+            // get the actual error.
+            info!("giving control plane 30s to collect the error before shutdown");
+            thread::sleep(Duration::from_secs(30));
+            info!("shutting down");
+            Err(error)
+        }
+    }
+}
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -1,252 +0,0 @@
-//!
-//! Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
-//! `ExecStart` option. It will handle all the `zenith` specifics during compute node
-//! initialization:
-//! - `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
-//! - Every start is a fresh start, so the data directory is removed and
-//!   initialized again on each run.
-//! - Next it will put configuration files into the `PGDATA` directory.
-//! - Sync safekeepers and get commit LSN.
-//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
-//! - Try to start `postgres` and wait until it is ready to accept connections.
-//! - Check and alter/drop/create roles and databases.
-//! - Hang waiting on the `postmaster` process to exit.
-//!
-//! Also `zenith_ctl` spawns two separate service threads:
-//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
-//!   into the shared `ComputeState`;
-//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
-//!   last activity requests.
-//!
-//! Usage example:
-//! ```sh
-//! zenith_ctl -D /var/db/postgres/compute \
-//!            -C 'postgresql://zenith_admin@localhost/postgres' \
-//!            -S /var/db/postgres/specs/current.json \
-//!            -b /usr/local/bin/postgres
-//! ```
-//!
-use std::fs::File;
-use std::panic;
-use std::path::Path;
-use std::process::{exit, Command, ExitStatus};
-use std::sync::{Arc, RwLock};
-
-use anyhow::{Context, Result};
-use chrono::Utc;
-use clap::Arg;
-use log::info;
-use postgres::{Client, NoTls};
-
-use compute_tools::checker::create_writablity_check_data;
-use compute_tools::config;
-use compute_tools::http_api::launch_http_server;
-use compute_tools::logger::*;
-use compute_tools::monitor::launch_monitor;
-use compute_tools::params::*;
-use compute_tools::pg_helpers::*;
-use compute_tools::spec::*;
-use compute_tools::zenith::*;
-
-/// Do all the preparations like PGDATA directory creation, configuration,
-/// safekeepers sync, basebackup, etc.
-fn prepare_pgdata(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
-    let state = state.read().unwrap();
-    let spec = &state.spec;
-    let pgdata_path = Path::new(&state.pgdata);
-    let pageserver_connstr = spec
-        .cluster
-        .settings
-        .find("zenith.page_server_connstring")
-        .expect("pageserver connstr should be provided");
-    let tenant = spec
-        .cluster
-        .settings
-        .find("zenith.zenith_tenant")
-        .expect("tenant id should be provided");
-    let timeline = spec
-        .cluster
-        .settings
-        .find("zenith.zenith_timeline")
-        .expect("tenant id should be provided");
-
-    info!(
-        "starting cluster #{}, operation #{}",
-        spec.cluster.cluster_id,
-        spec.operation_uuid.as_ref().unwrap()
-    );
-
-    // Remove/create an empty pgdata directory and put configuration there.
-    create_pgdata(&state.pgdata)?;
-    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
-
-    info!("starting safekeepers syncing");
-    let lsn = sync_safekeepers(&state.pgdata, &state.pgbin)
-        .with_context(|| "failed to sync safekeepers")?;
-    info!("safekeepers synced at LSN {}", lsn);
-
-    info!(
-        "getting basebackup@{} from pageserver {}",
-        lsn, pageserver_connstr
-    );
-    get_basebackup(&state.pgdata, &pageserver_connstr, &tenant, &timeline, &lsn).with_context(
-        || {
-            format!(
-                "failed to get basebackup@{} from pageserver {}",
-                lsn, pageserver_connstr
-            )
-        },
-    )?;
-
-    // Update pg_hba.conf received with basebackup.
-    update_pg_hba(pgdata_path)?;
-
-    Ok(())
-}
-
-/// Start Postgres as a child process and manage DBs/roles.
-/// After that this will hang waiting on the postmaster process to exit.
-fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
-    let read_state = state.read().unwrap();
-    let pgdata_path = Path::new(&read_state.pgdata);
-
-    // Run postgres as a child process.
-    let mut pg = Command::new(&read_state.pgbin)
-        .args(&["-D", &read_state.pgdata])
-        .spawn()
-        .expect("cannot start postgres process");
-
-    // Try default Postgres port if it is not provided
-    let port = read_state
-        .spec
-        .cluster
-        .settings
-        .find("port")
-        .unwrap_or_else(|| "5432".to_string());
-    wait_for_postgres(&port, pgdata_path)?;
-
-    let mut client = Client::connect(&read_state.connstr, NoTls)?;
-
-    handle_roles(&read_state.spec, &mut client)?;
-    handle_databases(&read_state.spec, &mut client)?;
-    handle_grants(&read_state.spec, &mut client)?;
-    create_writablity_check_data(&mut client)?;
-
-    // 'Close' connection
-    drop(client);
-
-    info!(
-        "finished configuration of cluster #{}",
-        read_state.spec.cluster.cluster_id
-    );
-
-    // Release the read lock.
-    drop(read_state);
-
-    // Get the write lock, update state and release the lock, so HTTP API
-    // was able to serve requests, while we are blocked waiting on
-    // Postgres.
-    let mut state = state.write().unwrap();
-    state.ready = true;
-    drop(state);
-
-    // Wait for child postgres process basically forever. In this state Ctrl+C
-    // will be propagated to postgres and it will be shut down as well.
-    let ecode = pg.wait().expect("failed to wait on postgres");
-
-    Ok(ecode)
-}
-
-fn main() -> Result<()> {
-    // TODO: re-use `utils::logging` later
-    init_logger(DEFAULT_LOG_LEVEL)?;
-
-    // Env variable is set by `cargo`
-    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
-    let matches = clap::App::new("zenith_ctl")
-        .version(version.unwrap_or("unknown"))
-        .arg(
-            Arg::new("connstr")
-                .short('C')
-                .long("connstr")
-                .value_name("DATABASE_URL")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgdata")
-                .short('D')
-                .long("pgdata")
-                .value_name("DATADIR")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgbin")
-                .short('b')
-                .long("pgbin")
-                .value_name("POSTGRES_PATH"),
-        )
-        .arg(
-            Arg::new("spec")
-                .short('s')
-                .long("spec")
-                .value_name("SPEC_JSON"),
-        )
-        .arg(
-            Arg::new("spec-path")
-                .short('S')
-                .long("spec-path")
-                .value_name("SPEC_PATH"),
-        )
-        .get_matches();
-
-    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
-    let connstr = matches
-        .value_of("connstr")
-        .expect("Postgres connection string is required");
-    let spec = matches.value_of("spec");
-    let spec_path = matches.value_of("spec-path");
-
-    // Try to use just 'postgres' if no path is provided
-    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
-
-    let spec: ClusterSpec = match spec {
-        // First, try to get cluster spec from the cli argument
-        Some(json) => serde_json::from_str(json)?,
-        None => {
-            // Second, try to read it from the file if path is provided
-            if let Some(sp) = spec_path {
-                let path = Path::new(sp);
-                let file = File::open(path)?;
-                serde_json::from_reader(file)?
-            } else {
-                panic!("cluster spec should be provided via --spec or --spec-path argument");
-            }
-        }
-    };
-
-    let compute_state = ComputeState {
-        connstr: connstr.to_string(),
-        pgdata: pgdata.to_string(),
-        pgbin: pgbin.to_string(),
-        spec,
-        ready: false,
-        last_active: Utc::now(),
-    };
-    let compute_state = Arc::new(RwLock::new(compute_state));
-
-    // Launch service threads first, so we were able to serve availability
-    // requests, while configuration is still in progress.
-    let mut _threads = vec![
-        launch_http_server(&compute_state).expect("cannot launch compute monitor thread"),
-        launch_monitor(&compute_state).expect("cannot launch http endpoint thread"),
-    ];
-
-    prepare_pgdata(&compute_state)?;
-
-    // Run compute (Postgres) and hang waiting on it. Panic if any error happens,
-    // it will help us to trigger unwind and kill postmaster as well.
-    match run_compute(&compute_state) {
-        Ok(ec) => exit(ec.success() as i32),
-        Err(error) => panic!("cannot start compute node, error: {}", error),
-    }
-}
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,11 +1,11 @@
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;

 use anyhow::{anyhow, Result};
 use log::error;
 use postgres::Client;
 use tokio_postgres::NoTls;

-use crate::zenith::ComputeState;
+use crate::compute::ComputeNode;

 pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
    let query = "
@@ -23,9 +23,9 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
    Ok(())
 }

-pub async fn check_writability(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
-    let connstr = state.read().unwrap().connstr.clone();
-    let (client, connection) = tokio_postgres::connect(&connstr, NoTls).await?;
+pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
+    let connstr = &compute.connstr;
+    let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
    if client.is_closed() {
        return Err(anyhow!("connection to postgres closed"));
    }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -0,0 +1,315 @@
+//
+// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
+// but there are several things that makes `PostgresNode` usage inconvenient in the
+// cloud:
+// - it inherits from `LocalEnv`, which contains **all-all** the information about
+//   a complete service running
+// - it uses `PageServerNode` with information about http endpoint, which we do not
+//   need in the cloud again
+// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
+//
+// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
+// attributes (not required for the cloud). Yet, it is still tempting to unify these
+// `PostgresNode` and `ComputeNode` and use one in both places.
+//
+// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
+//
+use std::fs;
+use std::os::unix::fs::PermissionsExt;
+use std::path::Path;
+use std::process::{Command, ExitStatus, Stdio};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::RwLock;
+
+use anyhow::{Context, Result};
+use chrono::{DateTime, Utc};
+use log::info;
+use postgres::{Client, NoTls};
+use serde::{Serialize, Serializer};
+
+use crate::checker::create_writablity_check_data;
+use crate::config;
+use crate::pg_helpers::*;
+use crate::spec::*;
+
+/// Compute node info shared across several `compute_ctl` threads.
+pub struct ComputeNode {
+    pub start_time: DateTime<Utc>,
+    pub connstr: String,
+    pub pgdata: String,
+    pub pgbin: String,
+    pub spec: ComputeSpec,
+    pub tenant: String,
+    pub timeline: String,
+    pub pageserver_connstr: String,
+    pub metrics: ComputeMetrics,
+    /// Volatile part of the `ComputeNode` so should be used under `RwLock`
+    /// to allow HTTP API server to serve status requests, while configuration
+    /// is in progress.
+    pub state: RwLock<ComputeState>,
+}
+
+fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    x.to_rfc3339().serialize(s)
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "snake_case")]
+pub struct ComputeState {
+    pub status: ComputeStatus,
+    /// Timestamp of the last Postgres activity
+    #[serde(serialize_with = "rfc3339_serialize")]
+    pub last_active: DateTime<Utc>,
+    pub error: Option<String>,
+}
+
+impl ComputeState {
+    pub fn new() -> Self {
+        Self {
+            status: ComputeStatus::Init,
+            last_active: Utc::now(),
+            error: None,
+        }
+    }
+}
+
+impl Default for ComputeState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ComputeStatus {
+    Init,
+    Running,
+    Failed,
+}
+
+#[derive(Serialize)]
+pub struct ComputeMetrics {
+    pub sync_safekeepers_ms: AtomicU64,
+    pub basebackup_ms: AtomicU64,
+    pub config_ms: AtomicU64,
+    pub total_startup_ms: AtomicU64,
+}
+
+impl ComputeMetrics {
+    pub fn new() -> Self {
+        Self {
+            sync_safekeepers_ms: AtomicU64::new(0),
+            basebackup_ms: AtomicU64::new(0),
+            config_ms: AtomicU64::new(0),
+            total_startup_ms: AtomicU64::new(0),
+        }
+    }
+}
+
+impl Default for ComputeMetrics {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ComputeNode {
+    pub fn set_status(&self, status: ComputeStatus) {
+        self.state.write().unwrap().status = status;
+    }
+
+    pub fn get_status(&self) -> ComputeStatus {
+        self.state.read().unwrap().status
+    }
+
+    // Remove `pgdata` directory and create it again with right permissions.
+    fn create_pgdata(&self) -> Result<()> {
+        // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
+        // If it is something different then create_dir() will error out anyway.
+        let _ok = fs::remove_dir_all(&self.pgdata);
+        fs::create_dir(&self.pgdata)?;
+        fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?;
+
+        Ok(())
+    }
+
+    // Get basebackup from the libpq connection to pageserver using `connstr` and
+    // unarchive it to `pgdata` directory overriding all its previous content.
+    fn get_basebackup(&self, lsn: &str) -> Result<()> {
+        let start_time = Utc::now();
+
+        let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
+        let basebackup_cmd = match lsn {
+            "0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
+            _ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
+        };
+        let copyreader = client.copy_out(basebackup_cmd.as_str())?;
+        let mut ar = tar::Archive::new(copyreader);
+
+        ar.unpack(&self.pgdata)?;
+
+        self.metrics.basebackup_ms.store(
+            Utc::now()
+                .signed_duration_since(start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+
+        Ok(())
+    }
+
+    // Run `postgres` in a special mode with `--sync-safekeepers` argument
+    // and return the reported LSN back to the caller.
+    fn sync_safekeepers(&self) -> Result<String> {
+        let start_time = Utc::now();
+
+        let sync_handle = Command::new(&self.pgbin)
+            .args(&["--sync-safekeepers"])
+            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
+            .stdout(Stdio::piped())
+            .spawn()
+            .expect("postgres --sync-safekeepers failed to start");
+
+        // `postgres --sync-safekeepers` will print all log output to stderr and
+        // final LSN to stdout. So we pipe only stdout, while stderr will be automatically
+        // redirected to the caller output.
+        let sync_output = sync_handle
+            .wait_with_output()
+            .expect("postgres --sync-safekeepers failed");
+        if !sync_output.status.success() {
+            anyhow::bail!(
+                "postgres --sync-safekeepers exited with non-zero status: {}",
+                sync_output.status,
+            );
+        }
+
+        self.metrics.sync_safekeepers_ms.store(
+            Utc::now()
+                .signed_duration_since(start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+
+        let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
+
+        Ok(lsn)
+    }
+
+    /// Do all the preparations like PGDATA directory creation, configuration,
+    /// safekeepers sync, basebackup, etc.
+    pub fn prepare_pgdata(&self) -> Result<()> {
+        let spec = &self.spec;
+        let pgdata_path = Path::new(&self.pgdata);
+
+        // Remove/create an empty pgdata directory and put configuration there.
+        self.create_pgdata()?;
+        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
+
+        info!("starting safekeepers syncing");
+        let lsn = self
+            .sync_safekeepers()
+            .with_context(|| "failed to sync safekeepers")?;
+        info!("safekeepers synced at LSN {}", lsn);
+
+        info!(
+            "getting basebackup@{} from pageserver {}",
+            lsn, &self.pageserver_connstr
+        );
+        self.get_basebackup(&lsn).with_context(|| {
+            format!(
+                "failed to get basebackup@{} from pageserver {}",
+                lsn, &self.pageserver_connstr
+            )
+        })?;
+
+        // Update pg_hba.conf received with basebackup.
+        update_pg_hba(pgdata_path)?;
+
+        Ok(())
+    }
+
+    /// Start Postgres as a child process and manage DBs/roles.
+    /// After that this will hang waiting on the postmaster process to exit.
+    pub fn run(&self) -> Result<ExitStatus> {
+        let start_time = Utc::now();
+
+        let pgdata_path = Path::new(&self.pgdata);
+
+        // Run postgres as a child process.
+        let mut pg = Command::new(&self.pgbin)
+            .args(&["-D", &self.pgdata])
+            .spawn()
+            .expect("cannot start postgres process");
+
+        // Try default Postgres port if it is not provided
+        let port = self
+            .spec
+            .cluster
+            .settings
+            .find("port")
+            .unwrap_or_else(|| "5432".to_string());
+        wait_for_postgres(&mut pg, &port, pgdata_path)?;
+
+        let mut client = Client::connect(&self.connstr, NoTls)?;
+
+        handle_roles(&self.spec, &mut client)?;
+        handle_databases(&self.spec, &mut client)?;
+        handle_grants(&self.spec, &mut client)?;
+        create_writablity_check_data(&mut client)?;
+
+        // 'Close' connection
+        drop(client);
+        let startup_end_time = Utc::now();
+
+        self.metrics.config_ms.store(
+            startup_end_time
+                .signed_duration_since(start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+        self.metrics.total_startup_ms.store(
+            startup_end_time
+                .signed_duration_since(self.start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+
+        self.set_status(ComputeStatus::Running);
+
+        info!(
+            "finished configuration of compute for project {}",
+            self.spec.cluster.cluster_id
+        );
+
+        // Wait for child Postgres process basically forever. In this state Ctrl+C
+        // will propagate to Postgres and it will be shut down as well.
+        let ecode = pg
+            .wait()
+            .expect("failed to start waiting on Postgres process");
+
+        Ok(ecode)
+    }
+
+    pub fn prepare_and_run(&self) -> Result<ExitStatus> {
+        info!(
+            "starting compute for project {}, operation {}, tenant {}, timeline {}",
+            self.spec.cluster.cluster_id,
+            self.spec.operation_uuid.as_ref().unwrap(),
+            self.tenant,
+            self.timeline,
+        );
+
+        self.prepare_pgdata()?;
+        self.run()
+    }
+}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,7 +6,7 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::PgOptionsSerialize;
-use crate::zenith::ClusterSpec;
+use crate::spec::ComputeSpec;

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -32,20 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 }

 /// Create or completely rewrite configuration file specified by `path`
-pub fn write_postgres_conf(path: &Path, spec: &ClusterSpec) -> Result<()> {
+pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
    let mut postgres_conf = File::create(path)?;

-    write_zenith_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;

    Ok(())
 }

 // Write Postgres config block wrapped with generated comment section
-fn write_zenith_managed_block(file: &mut File, buf: &str) -> Result<()> {
-    writeln!(file, "# Managed by Zenith: begin")?;
+fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
+    writeln!(file, "# Managed by compute_ctl: begin")?;
    writeln!(file, "{}", buf)?;
-    writeln!(file, "# Managed by Zenith: end")?;
+    writeln!(file, "# Managed by compute_ctl: end")?;

    Ok(())
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -1,37 +1,64 @@
 use std::convert::Infallible;
 use std::net::SocketAddr;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use std::thread;

 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
 use log::{error, info};
+use serde_json;

-use crate::zenith::*;
+use crate::compute::{ComputeNode, ComputeStatus};

 // Service function to handle all available routes.
-async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
+async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
    match (req.method(), req.uri().path()) {
        // Timestamp of the last Postgres activity in the plain text.
+        // DEPRECATED in favour of /status
        (&Method::GET, "/last_activity") => {
            info!("serving /last_active GET request");
-            let state = state.read().unwrap();
+            let state = compute.state.read().unwrap();

            // Use RFC3339 format for consistency.
            Response::new(Body::from(state.last_active.to_rfc3339()))
        }

-        // Has compute setup process finished? -> true/false
+        // Has compute setup process finished? -> true/false.
+        // DEPRECATED in favour of /status
        (&Method::GET, "/ready") => {
            info!("serving /ready GET request");
-            let state = state.read().unwrap();
-            Response::new(Body::from(format!("{}", state.ready)))
+            let status = compute.get_status();
+            Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
        }

+        // Serialized compute state.
+        (&Method::GET, "/status") => {
+            info!("serving /status GET request");
+            let state = compute.state.read().unwrap();
+            Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
+        }
+
+        // Startup metrics in JSON format. Keep /metrics reserved for a possible
+        // future use for Prometheus metrics format.
+        (&Method::GET, "/metrics.json") => {
+            info!("serving /metrics.json GET request");
+            Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
+        }
+
+        // DEPRECATED, use POST instead
        (&Method::GET, "/check_writability") => {
            info!("serving /check_writability GET request");
-            let res = crate::checker::check_writability(&state).await;
+            let res = crate::checker::check_writability(&compute).await;
+            match res {
+                Ok(_) => Response::new(Body::from("true")),
+                Err(e) => Response::new(Body::from(e.to_string())),
+            }
+        }
+
+        (&Method::POST, "/check_writability") => {
+            info!("serving /check_writability POST request");
+            let res = crate::checker::check_writability(&compute).await;
            match res {
                Ok(_) => Response::new(Body::from("true")),
                Err(e) => Response::new(Body::from(e.to_string())),
@@ -49,7 +76,7 @@ async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Respons

 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
-async fn serve(state: Arc<RwLock<ComputeState>>) {
+async fn serve(state: Arc<ComputeNode>) {
    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));

    let make_service = make_service_fn(move |_conn| {
@@ -73,7 +100,7 @@ async fn serve(state: Arc<RwLock<ComputeState>>) {
 }

 /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -0,0 +1 @@
+pub mod api;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -0,0 +1,158 @@
+openapi: "3.0.2"
+info:
+  title: Compute node control API
+  version: "1.0"
+
+servers:
+  - url: "http://localhost:3080"
+
+paths:
+  /status:
+    get:
+      tags:
+      - "info"
+      summary: Get compute node internal status
+      description: ""
+      operationId: getComputeStatus
+      responses:
+        "200":
+          description: ComputeState
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ComputeState"
+
+  /metrics.json:
+    get:
+      tags:
+      - "info"
+      summary: Get compute node startup metrics in JSON format
+      description: ""
+      operationId: getComputeMetricsJSON
+      responses:
+        "200":
+          description: ComputeMetrics
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ComputeMetrics"
+
+  /ready:
+    get:
+      deprecated: true
+      tags:
+      - "info"
+      summary: Check whether compute startup process finished successfully
+      description: ""
+      operationId: computeIsReady
+      responses:
+        "200":
+          description: Compute is ready ('true') or not ('false')
+          content:
+            text/plain:
+              schema:
+                type: string
+                example: "true"
+
+  /last_activity:
+    get:
+      deprecated: true
+      tags:
+      - "info"
+      summary: Get timestamp of the last compute activity
+      description: ""
+      operationId: getLastComputeActivityTS
+      responses:
+        "200":
+          description: Timestamp of the last compute activity
+          content:
+            text/plain:
+              schema:
+                type: string
+                example: "2022-10-12T07:20:50.52Z"
+
+  /check_writability:
+    get:
+      deprecated: true
+      tags:
+      - "check"
+      summary: Check that we can write new data on this compute
+      description: ""
+      operationId: checkComputeWritabilityDeprecated
+      responses:
+        "200":
+          description: Check result
+          content:
+            text/plain:
+              schema:
+                type: string
+                description: Error text or 'true' if check passed
+                example: "true"
+
+    post:
+      tags:
+      - "check"
+      summary: Check that we can write new data on this compute
+      description: ""
+      operationId: checkComputeWritability
+      responses:
+        "200":
+          description: Check result
+          content:
+            text/plain:
+              schema:
+                type: string
+                description: Error text or 'true' if check passed
+                example: "true"
+
+components:
+  securitySchemes:
+    JWT:
+      type: http
+      scheme: bearer
+      bearerFormat: JWT
+
+  schemas:
+    ComputeMetrics:
+      type: object
+      description: Compute startup metrics
+      required:
+        - sync_safekeepers_ms
+        - basebackup_ms
+        - config_ms
+        - total_startup_ms
+      properties:
+        sync_safekeepers_ms:
+          type: integer
+        basebackup_ms:
+          type: integer
+        config_ms:
+          type: integer
+        total_startup_ms:
+          type: integer
+
+    ComputeState:
+      type: object
+      required:
+        - status
+        - last_active
+      properties:
+        status:
+          $ref: '#/components/schemas/ComputeStatus'
+        last_active:
+          type: string
+          description: The last detected compute activity timestamp in UTC and RFC3339 format
+          example: "2022-10-12T07:20:50.52Z"
+        error:
+          type: string
+          description: Text of the error during compute startup, if any
+
+    ComputeStatus:
+      type: string
+      enum:
+        - init
+        - failed
+        - running
+
+security:
+  - JWT: []
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -4,11 +4,11 @@
 //!
 pub mod checker;
 pub mod config;
-pub mod http_api;
+pub mod http;
 #[macro_use]
 pub mod logger;
+pub mod compute;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
-pub mod zenith;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,4 +1,4 @@
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use std::{thread, time};

 use anyhow::Result;
@@ -6,16 +6,16 @@ use chrono::{DateTime, Utc};
 use log::{debug, info};
 use postgres::{Client, NoTls};

-use crate::zenith::ComputeState;
+use crate::compute::ComputeNode;

 const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds

 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
 // XXX: the only expected panic is at `RwLock` unwrap().
-fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
+fn watch_compute_activity(compute: &Arc<ComputeNode>) {
    // Suppose that `connstr` doesn't change
-    let connstr = state.read().unwrap().connstr.clone();
+    let connstr = compute.connstr.clone();
    // Define `client` outside of the loop to reuse existing connection if it's active.
    let mut client = Client::connect(&connstr, NoTls);
    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
@@ -46,7 +46,7 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
                            AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
                        &[],
                    );
-                let mut last_active = state.read().unwrap().last_active;
+                let mut last_active = compute.state.read().unwrap().last_active;

                if let Ok(backs) = backends {
                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];
@@ -83,14 +83,14 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
                }

                // Update the last activity in the shared state if we got a more recent one.
-                let mut state = state.write().unwrap();
+                let mut state = compute.state.write().unwrap();
                if last_active > state.last_active {
                    state.last_active = last_active;
                    debug!("set the last compute activity time to: {}", last_active);
                }
            }
            Err(e) => {
-                info!("cannot connect to postgres: {}, retrying", e);
+                debug!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
                client = Client::connect(&connstr, NoTls);
@@ -100,7 +100,7 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
 }

 /// Launch a separate compute monitor thread and return its `JoinHandle`.
-pub fn launch_monitor(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,7 +1,9 @@
+use std::fs::File;
+use std::io::{BufRead, BufReader};
 use std::net::{SocketAddr, TcpStream};
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
-use std::process::Command;
+use std::process::Child;
 use std::str::FromStr;
 use std::{fs, thread, time};

@@ -220,12 +222,12 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
 /// Wait for Postgres to become ready to accept connections:
 /// - state should be `ready` in the `pgdata/postmaster.pid`
 /// - and we should be able to connect to 127.0.0.1:5432
-pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
+pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
    let pid_path = pgdata.join("postmaster.pid");
    let mut slept: u64 = 0; // ms
    let pause = time::Duration::from_millis(100);

-    let timeout = time::Duration::from_millis(200);
+    let timeout = time::Duration::from_millis(10);
    let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();

    loop {
@@ -236,14 +238,19 @@ pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
            bail!("timed out while waiting for Postgres to start");
        }

+        if let Ok(Some(status)) = pg.try_wait() {
+            // Postgres exited, that is not what we expected, bail out earlier.
+            let code = status.code().unwrap_or(-1);
+            bail!("Postgres exited unexpectedly with code {}", code);
+        }
+
        if pid_path.exists() {
-            // XXX: dumb and the simplest way to get the last line in a text file
-            // TODO: better use `.lines().last()` later
-            let stdout = Command::new("tail")
-                .args(&["-n1", pid_path.to_str().unwrap()])
-                .output()?
-                .stdout;
-            let status = String::from_utf8(stdout)?;
+            let file = BufReader::new(File::open(&pid_path)?);
+            let status = file
+                .lines()
+                .last()
+                .unwrap()
+                .unwrap_or_else(|_| "unknown".to_string());
            let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();

            // Now Postgres is ready to accept connections
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -3,16 +3,53 @@ use std::path::Path;
 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
 use postgres::Client;
+use serde::Deserialize;

 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
-use crate::zenith::ClusterSpec;
+
+/// Cluster spec or configuration represented as an optional number of
+/// delta operations + final cluster state description.
+#[derive(Clone, Deserialize)]
+pub struct ComputeSpec {
+    pub format_version: f32,
+    pub timestamp: String,
+    pub operation_uuid: Option<String>,
+    /// Expected cluster state at the end of transition process.
+    pub cluster: Cluster,
+    pub delta_operations: Option<Vec<DeltaOp>>,
+}
+
+/// Cluster state seen from the perspective of the external tools
+/// like Rails web console.
+#[derive(Clone, Deserialize)]
+pub struct Cluster {
+    pub cluster_id: String,
+    pub name: String,
+    pub state: Option<String>,
+    pub roles: Vec<Role>,
+    pub databases: Vec<Database>,
+    pub settings: GenericOptions,
+}
+
+/// Single cluster state changing operation that could not be represented as
+/// a static `Cluster` structure. For example:
+/// - DROP DATABASE
+/// - DROP ROLE
+/// - ALTER ROLE name RENAME TO new_name
+/// - ALTER DATABASE name RENAME TO new_name
+#[derive(Clone, Deserialize)]
+pub struct DeltaOp {
+    pub action: String,
+    pub name: PgIdent,
+    pub new_name: Option<PgIdent>,
+}

 /// It takes cluster specification and does the following:
 /// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
 /// - Update `pg_hba.conf` to allow external connections.
-pub fn handle_configuration(spec: &ClusterSpec, pgdata_path: &Path) -> Result<()> {
+pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
    // File `postgresql.conf` is no longer included into `basebackup`, so just
    // always write all config into it creating new file.
    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
@@ -39,7 +76,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {

 /// Given a cluster spec json and open transaction it handles roles creation,
 /// deletion and update.
-pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

@@ -165,7 +202,7 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
 /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
 /// atomicity should be enough here due to the order of operations and various checks,
 /// which together provide us idempotency.
-pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    // Print a list of existing Postgres databases (only in debug mode)
@@ -254,7 +291,7 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {

 // Grant CREATE ON DATABASE to the database owner
 // to allow clients create trusted extensions.
-pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

    for db in &spec.cluster.databases {
--- a/compute_tools/src/zenith.rs
+++ b/compute_tools/src/zenith.rs
@@ -1,109 +0,0 @@
-use std::process::{Command, Stdio};
-
-use anyhow::Result;
-use chrono::{DateTime, Utc};
-use postgres::{Client, NoTls};
-use serde::Deserialize;
-
-use crate::pg_helpers::*;
-
-/// Compute node state shared across several `zenith_ctl` threads.
-/// Should be used under `RwLock` to allow HTTP API server to serve
-/// status requests, while configuration is in progress.
-pub struct ComputeState {
-    pub connstr: String,
-    pub pgdata: String,
-    pub pgbin: String,
-    pub spec: ClusterSpec,
-    /// Compute setup process has finished
-    pub ready: bool,
-    /// Timestamp of the last Postgres activity
-    pub last_active: DateTime<Utc>,
-}
-
-/// Cluster spec or configuration represented as an optional number of
-/// delta operations + final cluster state description.
-#[derive(Clone, Deserialize)]
-pub struct ClusterSpec {
-    pub format_version: f32,
-    pub timestamp: String,
-    pub operation_uuid: Option<String>,
-    /// Expected cluster state at the end of transition process.
-    pub cluster: Cluster,
-    pub delta_operations: Option<Vec<DeltaOp>>,
-}
-
-/// Cluster state seen from the perspective of the external tools
-/// like Rails web console.
-#[derive(Clone, Deserialize)]
-pub struct Cluster {
-    pub cluster_id: String,
-    pub name: String,
-    pub state: Option<String>,
-    pub roles: Vec<Role>,
-    pub databases: Vec<Database>,
-    pub settings: GenericOptions,
-}
-
-/// Single cluster state changing operation that could not be represented as
-/// a static `Cluster` structure. For example:
-/// - DROP DATABASE
-/// - DROP ROLE
-/// - ALTER ROLE name RENAME TO new_name
-/// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Deserialize)]
-pub struct DeltaOp {
-    pub action: String,
-    pub name: PgIdent,
-    pub new_name: Option<PgIdent>,
-}
-
-/// Get basebackup from the libpq connection to pageserver using `connstr` and
-/// unarchive it to `pgdata` directory overriding all its previous content.
-pub fn get_basebackup(
-    pgdata: &str,
-    connstr: &str,
-    tenant: &str,
-    timeline: &str,
-    lsn: &str,
-) -> Result<()> {
-    let mut client = Client::connect(connstr, NoTls)?;
-    let basebackup_cmd = match lsn {
-        "0/0" => format!("basebackup {} {}", tenant, timeline), // First start of the compute
-        _ => format!("basebackup {} {} {}", tenant, timeline, lsn),
-    };
-    let copyreader = client.copy_out(basebackup_cmd.as_str())?;
-    let mut ar = tar::Archive::new(copyreader);
-
-    ar.unpack(&pgdata)?;
-
-    Ok(())
-}
-
-/// Run `postgres` in a special mode with `--sync-safekeepers` argument
-/// and return the reported LSN back to the caller.
-pub fn sync_safekeepers(pgdata: &str, pgbin: &str) -> Result<String> {
-    let sync_handle = Command::new(&pgbin)
-        .args(&["--sync-safekeepers"])
-        .env("PGDATA", &pgdata) // we cannot use -D in this mode
-        .stdout(Stdio::piped())
-        .spawn()
-        .expect("postgres --sync-safekeepers failed to start");
-
-    // `postgres --sync-safekeepers` will print all log output to stderr and
-    // final LSN to stdout. So we pipe only stdout, while stderr will be automatically
-    // redirected to the caller output.
-    let sync_output = sync_handle
-        .wait_with_output()
-        .expect("postgres --sync-safekeepers failed");
-    if !sync_output.status.success() {
-        anyhow::bail!(
-            "postgres --sync-safekeepers exited with non-zero status: {}",
-            sync_output.status,
-        );
-    }
-
-    let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
-
-    Ok(lsn)
-}
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -4,12 +4,12 @@ mod pg_helpers_tests {
    use std::fs::File;

    use compute_tools::pg_helpers::*;
-    use compute_tools::zenith::ClusterSpec;
+    use compute_tools::spec::ComputeSpec;

    #[test]
    fn params_serialize() {
        let file = File::open("tests/cluster_spec.json").unwrap();
-        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
            spec.cluster.databases.first().unwrap().to_pg_options(),
@@ -24,7 +24,7 @@ mod pg_helpers_tests {
    #[test]
    fn settings_serialize() {
        let file = File::open("tests/cluster_spec.json").unwrap();
-        let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
            spec.cluster.settings.as_pg_settings(),