diff --git a/Dockerfile.compute-tools b/Dockerfile.compute-tools index bbe0f517ce..f0c9b9d56a 100644 --- a/Dockerfile.compute-tools +++ b/Dockerfile.compute-tools @@ -15,4 +15,4 @@ RUN set -e \ # Final image that only has one binary FROM debian:buster-slim -COPY --from=rust-build /home/circleci/project/target/release/zenith_ctl /usr/local/bin/zenith_ctl +COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl diff --git a/compute_tools/README.md b/compute_tools/README.md index ccae3d2842..15876ed246 100644 --- a/compute_tools/README.md +++ b/compute_tools/README.md @@ -1,9 +1,9 @@ # Compute node tools -Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd` -`ExecStart` option. It will handle all the `zenith` specifics during compute node +Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd` +`ExecStart` option. It will handle all the `Neon` specifics during compute node initialization: -- `zenith_ctl` accepts cluster (compute node) specification as a JSON file. +- `compute_ctl` accepts cluster (compute node) specification as a JSON file. - Every start is a fresh start, so the data directory is removed and initialized again on each run. - Next it will put configuration files into the `PGDATA` directory. @@ -13,18 +13,18 @@ initialization: - Check and alter/drop/create roles and databases. - Hang waiting on the `postmaster` process to exit. -Also `zenith_ctl` spawns two separate service threads: +Also `compute_ctl` spawns two separate service threads: - `compute-monitor` checks the last Postgres activity timestamp and saves it - into the shared `ComputeState`; + into the shared `ComputeNode`; - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the last activity requests. Usage example: ```sh -zenith_ctl -D /var/db/postgres/compute \ - -C 'postgresql://zenith_admin@localhost/postgres' \ - -S /var/db/postgres/specs/current.json \ - -b /usr/local/bin/postgres +compute_ctl -D /var/db/postgres/compute \ + -C 'postgresql://zenith_admin@localhost/postgres' \ + -S /var/db/postgres/specs/current.json \ + -b /usr/local/bin/postgres ``` ## Tests diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs new file mode 100644 index 0000000000..5c951b7779 --- /dev/null +++ b/compute_tools/src/bin/compute_ctl.rs @@ -0,0 +1,174 @@ +//! +//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd` +//! `ExecStart` option. It will handle all the `Neon` specifics during compute node +//! initialization: +//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file. +//! - Every start is a fresh start, so the data directory is removed and +//! initialized again on each run. +//! - Next it will put configuration files into the `PGDATA` directory. +//! - Sync safekeepers and get commit LSN. +//! - Get `basebackup` from pageserver using the returned on the previous step LSN. +//! - Try to start `postgres` and wait until it is ready to accept connections. +//! - Check and alter/drop/create roles and databases. +//! - Hang waiting on the `postmaster` process to exit. +//! +//! Also `compute_ctl` spawns two separate service threads: +//! - `compute-monitor` checks the last Postgres activity timestamp and saves it +//! into the shared `ComputeNode`; +//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the +//! last activity requests. +//! +//! Usage example: +//! ```sh +//! compute_ctl -D /var/db/postgres/compute \ +//! -C 'postgresql://zenith_admin@localhost/postgres' \ +//! -S /var/db/postgres/specs/current.json \ +//! -b /usr/local/bin/postgres +//! ``` +//! +use std::fs::File; +use std::panic; +use std::path::Path; +use std::process::exit; +use std::sync::{Arc, RwLock}; +use std::{thread, time::Duration}; + +use anyhow::Result; +use chrono::Utc; +use clap::Arg; +use log::{error, info}; + +use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus}; +use compute_tools::http::api::launch_http_server; +use compute_tools::logger::*; +use compute_tools::monitor::launch_monitor; +use compute_tools::params::*; +use compute_tools::pg_helpers::*; +use compute_tools::spec::*; + +fn main() -> Result<()> { + // TODO: re-use `utils::logging` later + init_logger(DEFAULT_LOG_LEVEL)?; + + // Env variable is set by `cargo` + let version: Option<&str> = option_env!("CARGO_PKG_VERSION"); + let matches = clap::App::new("compute_ctl") + .version(version.unwrap_or("unknown")) + .arg( + Arg::new("connstr") + .short('C') + .long("connstr") + .value_name("DATABASE_URL") + .required(true), + ) + .arg( + Arg::new("pgdata") + .short('D') + .long("pgdata") + .value_name("DATADIR") + .required(true), + ) + .arg( + Arg::new("pgbin") + .short('b') + .long("pgbin") + .value_name("POSTGRES_PATH"), + ) + .arg( + Arg::new("spec") + .short('s') + .long("spec") + .value_name("SPEC_JSON"), + ) + .arg( + Arg::new("spec-path") + .short('S') + .long("spec-path") + .value_name("SPEC_PATH"), + ) + .get_matches(); + + let pgdata = matches.value_of("pgdata").expect("PGDATA path is required"); + let connstr = matches + .value_of("connstr") + .expect("Postgres connection string is required"); + let spec = matches.value_of("spec"); + let spec_path = matches.value_of("spec-path"); + + // Try to use just 'postgres' if no path is provided + let pgbin = matches.value_of("pgbin").unwrap_or("postgres"); + + let spec: ComputeSpec = match spec { + // First, try to get cluster spec from the cli argument + Some(json) => serde_json::from_str(json)?, + None => { + // Second, try to read it from the file if path is provided + if let Some(sp) = spec_path { + let path = Path::new(sp); + let file = File::open(path)?; + serde_json::from_reader(file)? + } else { + panic!("cluster spec should be provided via --spec or --spec-path argument"); + } + } + }; + + let pageserver_connstr = spec + .cluster + .settings + .find("zenith.page_server_connstring") + .expect("pageserver connstr should be provided"); + let tenant = spec + .cluster + .settings + .find("zenith.zenith_tenant") + .expect("tenant id should be provided"); + let timeline = spec + .cluster + .settings + .find("zenith.zenith_timeline") + .expect("tenant id should be provided"); + + let compute_state = ComputeNode { + start_time: Utc::now(), + connstr: connstr.to_string(), + pgdata: pgdata.to_string(), + pgbin: pgbin.to_string(), + spec, + tenant, + timeline, + pageserver_connstr, + metrics: ComputeMetrics::new(), + state: RwLock::new(ComputeState::new()), + }; + let compute = Arc::new(compute_state); + + // Launch service threads first, so we were able to serve availability + // requests, while configuration is still in progress. + let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread"); + let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread"); + + // Run compute (Postgres) and hang waiting on it. + match compute.prepare_and_run() { + Ok(ec) => { + let code = ec.code().unwrap_or(1); + info!("Postgres exited with code {}, shutting down", code); + exit(code) + } + Err(error) => { + error!("could not start the compute node: {}", error); + + let mut state = compute.state.write().unwrap(); + state.error = Some(format!("{:?}", error)); + state.status = ComputeStatus::Failed; + drop(state); + + // Keep serving HTTP requests, so the cloud control plane was able to + // get the actual error. + info!("giving control plane 30s to collect the error before shutdown"); + thread::sleep(Duration::from_secs(30)); + info!("shutting down"); + Err(error) + } + } +} diff --git a/compute_tools/src/bin/zenith_ctl.rs b/compute_tools/src/bin/zenith_ctl.rs deleted file mode 100644 index 3685f8e8b4..0000000000 --- a/compute_tools/src/bin/zenith_ctl.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! -//! Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd` -//! `ExecStart` option. It will handle all the `zenith` specifics during compute node -//! initialization: -//! - `zenith_ctl` accepts cluster (compute node) specification as a JSON file. -//! - Every start is a fresh start, so the data directory is removed and -//! initialized again on each run. -//! - Next it will put configuration files into the `PGDATA` directory. -//! - Sync safekeepers and get commit LSN. -//! - Get `basebackup` from pageserver using the returned on the previous step LSN. -//! - Try to start `postgres` and wait until it is ready to accept connections. -//! - Check and alter/drop/create roles and databases. -//! - Hang waiting on the `postmaster` process to exit. -//! -//! Also `zenith_ctl` spawns two separate service threads: -//! - `compute-monitor` checks the last Postgres activity timestamp and saves it -//! into the shared `ComputeState`; -//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the -//! last activity requests. -//! -//! Usage example: -//! ```sh -//! zenith_ctl -D /var/db/postgres/compute \ -//! -C 'postgresql://zenith_admin@localhost/postgres' \ -//! -S /var/db/postgres/specs/current.json \ -//! -b /usr/local/bin/postgres -//! ``` -//! -use std::fs::File; -use std::panic; -use std::path::Path; -use std::process::{exit, Command, ExitStatus}; -use std::sync::{Arc, RwLock}; - -use anyhow::{Context, Result}; -use chrono::Utc; -use clap::Arg; -use log::info; -use postgres::{Client, NoTls}; - -use compute_tools::checker::create_writablity_check_data; -use compute_tools::config; -use compute_tools::http_api::launch_http_server; -use compute_tools::logger::*; -use compute_tools::monitor::launch_monitor; -use compute_tools::params::*; -use compute_tools::pg_helpers::*; -use compute_tools::spec::*; -use compute_tools::zenith::*; - -/// Do all the preparations like PGDATA directory creation, configuration, -/// safekeepers sync, basebackup, etc. -fn prepare_pgdata(state: &Arc>) -> Result<()> { - let state = state.read().unwrap(); - let spec = &state.spec; - let pgdata_path = Path::new(&state.pgdata); - let pageserver_connstr = spec - .cluster - .settings - .find("zenith.page_server_connstring") - .expect("pageserver connstr should be provided"); - let tenant = spec - .cluster - .settings - .find("zenith.zenith_tenant") - .expect("tenant id should be provided"); - let timeline = spec - .cluster - .settings - .find("zenith.zenith_timeline") - .expect("tenant id should be provided"); - - info!( - "starting cluster #{}, operation #{}", - spec.cluster.cluster_id, - spec.operation_uuid.as_ref().unwrap() - ); - - // Remove/create an empty pgdata directory and put configuration there. - create_pgdata(&state.pgdata)?; - config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?; - - info!("starting safekeepers syncing"); - let lsn = sync_safekeepers(&state.pgdata, &state.pgbin) - .with_context(|| "failed to sync safekeepers")?; - info!("safekeepers synced at LSN {}", lsn); - - info!( - "getting basebackup@{} from pageserver {}", - lsn, pageserver_connstr - ); - get_basebackup(&state.pgdata, &pageserver_connstr, &tenant, &timeline, &lsn).with_context( - || { - format!( - "failed to get basebackup@{} from pageserver {}", - lsn, pageserver_connstr - ) - }, - )?; - - // Update pg_hba.conf received with basebackup. - update_pg_hba(pgdata_path)?; - - Ok(()) -} - -/// Start Postgres as a child process and manage DBs/roles. -/// After that this will hang waiting on the postmaster process to exit. -fn run_compute(state: &Arc>) -> Result { - let read_state = state.read().unwrap(); - let pgdata_path = Path::new(&read_state.pgdata); - - // Run postgres as a child process. - let mut pg = Command::new(&read_state.pgbin) - .args(&["-D", &read_state.pgdata]) - .spawn() - .expect("cannot start postgres process"); - - // Try default Postgres port if it is not provided - let port = read_state - .spec - .cluster - .settings - .find("port") - .unwrap_or_else(|| "5432".to_string()); - wait_for_postgres(&port, pgdata_path)?; - - let mut client = Client::connect(&read_state.connstr, NoTls)?; - - handle_roles(&read_state.spec, &mut client)?; - handle_databases(&read_state.spec, &mut client)?; - handle_grants(&read_state.spec, &mut client)?; - create_writablity_check_data(&mut client)?; - - // 'Close' connection - drop(client); - - info!( - "finished configuration of cluster #{}", - read_state.spec.cluster.cluster_id - ); - - // Release the read lock. - drop(read_state); - - // Get the write lock, update state and release the lock, so HTTP API - // was able to serve requests, while we are blocked waiting on - // Postgres. - let mut state = state.write().unwrap(); - state.ready = true; - drop(state); - - // Wait for child postgres process basically forever. In this state Ctrl+C - // will be propagated to postgres and it will be shut down as well. - let ecode = pg.wait().expect("failed to wait on postgres"); - - Ok(ecode) -} - -fn main() -> Result<()> { - // TODO: re-use `utils::logging` later - init_logger(DEFAULT_LOG_LEVEL)?; - - // Env variable is set by `cargo` - let version: Option<&str> = option_env!("CARGO_PKG_VERSION"); - let matches = clap::App::new("zenith_ctl") - .version(version.unwrap_or("unknown")) - .arg( - Arg::new("connstr") - .short('C') - .long("connstr") - .value_name("DATABASE_URL") - .required(true), - ) - .arg( - Arg::new("pgdata") - .short('D') - .long("pgdata") - .value_name("DATADIR") - .required(true), - ) - .arg( - Arg::new("pgbin") - .short('b') - .long("pgbin") - .value_name("POSTGRES_PATH"), - ) - .arg( - Arg::new("spec") - .short('s') - .long("spec") - .value_name("SPEC_JSON"), - ) - .arg( - Arg::new("spec-path") - .short('S') - .long("spec-path") - .value_name("SPEC_PATH"), - ) - .get_matches(); - - let pgdata = matches.value_of("pgdata").expect("PGDATA path is required"); - let connstr = matches - .value_of("connstr") - .expect("Postgres connection string is required"); - let spec = matches.value_of("spec"); - let spec_path = matches.value_of("spec-path"); - - // Try to use just 'postgres' if no path is provided - let pgbin = matches.value_of("pgbin").unwrap_or("postgres"); - - let spec: ClusterSpec = match spec { - // First, try to get cluster spec from the cli argument - Some(json) => serde_json::from_str(json)?, - None => { - // Second, try to read it from the file if path is provided - if let Some(sp) = spec_path { - let path = Path::new(sp); - let file = File::open(path)?; - serde_json::from_reader(file)? - } else { - panic!("cluster spec should be provided via --spec or --spec-path argument"); - } - } - }; - - let compute_state = ComputeState { - connstr: connstr.to_string(), - pgdata: pgdata.to_string(), - pgbin: pgbin.to_string(), - spec, - ready: false, - last_active: Utc::now(), - }; - let compute_state = Arc::new(RwLock::new(compute_state)); - - // Launch service threads first, so we were able to serve availability - // requests, while configuration is still in progress. - let mut _threads = vec![ - launch_http_server(&compute_state).expect("cannot launch compute monitor thread"), - launch_monitor(&compute_state).expect("cannot launch http endpoint thread"), - ]; - - prepare_pgdata(&compute_state)?; - - // Run compute (Postgres) and hang waiting on it. Panic if any error happens, - // it will help us to trigger unwind and kill postmaster as well. - match run_compute(&compute_state) { - Ok(ec) => exit(ec.success() as i32), - Err(error) => panic!("cannot start compute node, error: {}", error), - } -} diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index 63da6ea23e..dbb70a74cf 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,11 +1,11 @@ -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use anyhow::{anyhow, Result}; use log::error; use postgres::Client; use tokio_postgres::NoTls; -use crate::zenith::ComputeState; +use crate::compute::ComputeNode; pub fn create_writablity_check_data(client: &mut Client) -> Result<()> { let query = " @@ -23,9 +23,9 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> { Ok(()) } -pub async fn check_writability(state: &Arc>) -> Result<()> { - let connstr = state.read().unwrap().connstr.clone(); - let (client, connection) = tokio_postgres::connect(&connstr, NoTls).await?; +pub async fn check_writability(compute: &Arc) -> Result<()> { + let connstr = &compute.connstr; + let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?; if client.is_closed() { return Err(anyhow!("connection to postgres closed")); } diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs new file mode 100644 index 0000000000..a8422fb2b2 --- /dev/null +++ b/compute_tools/src/compute.rs @@ -0,0 +1,315 @@ +// +// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`, +// but there are several things that makes `PostgresNode` usage inconvenient in the +// cloud: +// - it inherits from `LocalEnv`, which contains **all-all** the information about +// a complete service running +// - it uses `PageServerNode` with information about http endpoint, which we do not +// need in the cloud again +// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud +// +// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required +// attributes (not required for the cloud). Yet, it is still tempting to unify these +// `PostgresNode` and `ComputeNode` and use one in both places. +// +// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`. +// +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::process::{Command, ExitStatus, Stdio}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::RwLock; + +use anyhow::{Context, Result}; +use chrono::{DateTime, Utc}; +use log::info; +use postgres::{Client, NoTls}; +use serde::{Serialize, Serializer}; + +use crate::checker::create_writablity_check_data; +use crate::config; +use crate::pg_helpers::*; +use crate::spec::*; + +/// Compute node info shared across several `compute_ctl` threads. +pub struct ComputeNode { + pub start_time: DateTime, + pub connstr: String, + pub pgdata: String, + pub pgbin: String, + pub spec: ComputeSpec, + pub tenant: String, + pub timeline: String, + pub pageserver_connstr: String, + pub metrics: ComputeMetrics, + /// Volatile part of the `ComputeNode` so should be used under `RwLock` + /// to allow HTTP API server to serve status requests, while configuration + /// is in progress. + pub state: RwLock, +} + +fn rfc3339_serialize(x: &DateTime, s: S) -> Result +where + S: Serializer, +{ + x.to_rfc3339().serialize(s) +} + +#[derive(Serialize)] +#[serde(rename_all = "snake_case")] +pub struct ComputeState { + pub status: ComputeStatus, + /// Timestamp of the last Postgres activity + #[serde(serialize_with = "rfc3339_serialize")] + pub last_active: DateTime, + pub error: Option, +} + +impl ComputeState { + pub fn new() -> Self { + Self { + status: ComputeStatus::Init, + last_active: Utc::now(), + error: None, + } + } +} + +impl Default for ComputeState { + fn default() -> Self { + Self::new() + } +} + +#[derive(Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ComputeStatus { + Init, + Running, + Failed, +} + +#[derive(Serialize)] +pub struct ComputeMetrics { + pub sync_safekeepers_ms: AtomicU64, + pub basebackup_ms: AtomicU64, + pub config_ms: AtomicU64, + pub total_startup_ms: AtomicU64, +} + +impl ComputeMetrics { + pub fn new() -> Self { + Self { + sync_safekeepers_ms: AtomicU64::new(0), + basebackup_ms: AtomicU64::new(0), + config_ms: AtomicU64::new(0), + total_startup_ms: AtomicU64::new(0), + } + } +} + +impl Default for ComputeMetrics { + fn default() -> Self { + Self::new() + } +} + +impl ComputeNode { + pub fn set_status(&self, status: ComputeStatus) { + self.state.write().unwrap().status = status; + } + + pub fn get_status(&self) -> ComputeStatus { + self.state.read().unwrap().status + } + + // Remove `pgdata` directory and create it again with right permissions. + fn create_pgdata(&self) -> Result<()> { + // Ignore removal error, likely it is a 'No such file or directory (os error 2)'. + // If it is something different then create_dir() will error out anyway. + let _ok = fs::remove_dir_all(&self.pgdata); + fs::create_dir(&self.pgdata)?; + fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?; + + Ok(()) + } + + // Get basebackup from the libpq connection to pageserver using `connstr` and + // unarchive it to `pgdata` directory overriding all its previous content. + fn get_basebackup(&self, lsn: &str) -> Result<()> { + let start_time = Utc::now(); + + let mut client = Client::connect(&self.pageserver_connstr, NoTls)?; + let basebackup_cmd = match lsn { + "0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute + _ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn), + }; + let copyreader = client.copy_out(basebackup_cmd.as_str())?; + let mut ar = tar::Archive::new(copyreader); + + ar.unpack(&self.pgdata)?; + + self.metrics.basebackup_ms.store( + Utc::now() + .signed_duration_since(start_time) + .to_std() + .unwrap() + .as_millis() as u64, + Ordering::Relaxed, + ); + + Ok(()) + } + + // Run `postgres` in a special mode with `--sync-safekeepers` argument + // and return the reported LSN back to the caller. + fn sync_safekeepers(&self) -> Result { + let start_time = Utc::now(); + + let sync_handle = Command::new(&self.pgbin) + .args(&["--sync-safekeepers"]) + .env("PGDATA", &self.pgdata) // we cannot use -D in this mode + .stdout(Stdio::piped()) + .spawn() + .expect("postgres --sync-safekeepers failed to start"); + + // `postgres --sync-safekeepers` will print all log output to stderr and + // final LSN to stdout. So we pipe only stdout, while stderr will be automatically + // redirected to the caller output. + let sync_output = sync_handle + .wait_with_output() + .expect("postgres --sync-safekeepers failed"); + if !sync_output.status.success() { + anyhow::bail!( + "postgres --sync-safekeepers exited with non-zero status: {}", + sync_output.status, + ); + } + + self.metrics.sync_safekeepers_ms.store( + Utc::now() + .signed_duration_since(start_time) + .to_std() + .unwrap() + .as_millis() as u64, + Ordering::Relaxed, + ); + + let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim()); + + Ok(lsn) + } + + /// Do all the preparations like PGDATA directory creation, configuration, + /// safekeepers sync, basebackup, etc. + pub fn prepare_pgdata(&self) -> Result<()> { + let spec = &self.spec; + let pgdata_path = Path::new(&self.pgdata); + + // Remove/create an empty pgdata directory and put configuration there. + self.create_pgdata()?; + config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?; + + info!("starting safekeepers syncing"); + let lsn = self + .sync_safekeepers() + .with_context(|| "failed to sync safekeepers")?; + info!("safekeepers synced at LSN {}", lsn); + + info!( + "getting basebackup@{} from pageserver {}", + lsn, &self.pageserver_connstr + ); + self.get_basebackup(&lsn).with_context(|| { + format!( + "failed to get basebackup@{} from pageserver {}", + lsn, &self.pageserver_connstr + ) + })?; + + // Update pg_hba.conf received with basebackup. + update_pg_hba(pgdata_path)?; + + Ok(()) + } + + /// Start Postgres as a child process and manage DBs/roles. + /// After that this will hang waiting on the postmaster process to exit. + pub fn run(&self) -> Result { + let start_time = Utc::now(); + + let pgdata_path = Path::new(&self.pgdata); + + // Run postgres as a child process. + let mut pg = Command::new(&self.pgbin) + .args(&["-D", &self.pgdata]) + .spawn() + .expect("cannot start postgres process"); + + // Try default Postgres port if it is not provided + let port = self + .spec + .cluster + .settings + .find("port") + .unwrap_or_else(|| "5432".to_string()); + wait_for_postgres(&mut pg, &port, pgdata_path)?; + + let mut client = Client::connect(&self.connstr, NoTls)?; + + handle_roles(&self.spec, &mut client)?; + handle_databases(&self.spec, &mut client)?; + handle_grants(&self.spec, &mut client)?; + create_writablity_check_data(&mut client)?; + + // 'Close' connection + drop(client); + let startup_end_time = Utc::now(); + + self.metrics.config_ms.store( + startup_end_time + .signed_duration_since(start_time) + .to_std() + .unwrap() + .as_millis() as u64, + Ordering::Relaxed, + ); + self.metrics.total_startup_ms.store( + startup_end_time + .signed_duration_since(self.start_time) + .to_std() + .unwrap() + .as_millis() as u64, + Ordering::Relaxed, + ); + + self.set_status(ComputeStatus::Running); + + info!( + "finished configuration of compute for project {}", + self.spec.cluster.cluster_id + ); + + // Wait for child Postgres process basically forever. In this state Ctrl+C + // will propagate to Postgres and it will be shut down as well. + let ecode = pg + .wait() + .expect("failed to start waiting on Postgres process"); + + Ok(ecode) + } + + pub fn prepare_and_run(&self) -> Result { + info!( + "starting compute for project {}, operation {}, tenant {}, timeline {}", + self.spec.cluster.cluster_id, + self.spec.operation_uuid.as_ref().unwrap(), + self.tenant, + self.timeline, + ); + + self.prepare_pgdata()?; + self.run() + } +} diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index 22134db0f8..6cbd0e3d4c 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -6,7 +6,7 @@ use std::path::Path; use anyhow::Result; use crate::pg_helpers::PgOptionsSerialize; -use crate::zenith::ClusterSpec; +use crate::spec::ComputeSpec; /// Check that `line` is inside a text file and put it there if it is not. /// Create file if it doesn't exist. @@ -32,20 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result { } /// Create or completely rewrite configuration file specified by `path` -pub fn write_postgres_conf(path: &Path, spec: &ClusterSpec) -> Result<()> { +pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> { // File::create() destroys the file content if it exists. let mut postgres_conf = File::create(path)?; - write_zenith_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?; + write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?; Ok(()) } // Write Postgres config block wrapped with generated comment section -fn write_zenith_managed_block(file: &mut File, buf: &str) -> Result<()> { - writeln!(file, "# Managed by Zenith: begin")?; +fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> { + writeln!(file, "# Managed by compute_ctl: begin")?; writeln!(file, "{}", buf)?; - writeln!(file, "# Managed by Zenith: end")?; + writeln!(file, "# Managed by compute_ctl: end")?; Ok(()) } diff --git a/compute_tools/src/http_api.rs b/compute_tools/src/http/api.rs similarity index 56% rename from compute_tools/src/http_api.rs rename to compute_tools/src/http/api.rs index 7e1a876044..4c8bbc608b 100644 --- a/compute_tools/src/http_api.rs +++ b/compute_tools/src/http/api.rs @@ -1,37 +1,64 @@ use std::convert::Infallible; use std::net::SocketAddr; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use std::thread; use anyhow::Result; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server, StatusCode}; use log::{error, info}; +use serde_json; -use crate::zenith::*; +use crate::compute::{ComputeNode, ComputeStatus}; // Service function to handle all available routes. -async fn routes(req: Request, state: Arc>) -> Response { +async fn routes(req: Request, compute: Arc) -> Response { match (req.method(), req.uri().path()) { // Timestamp of the last Postgres activity in the plain text. + // DEPRECATED in favour of /status (&Method::GET, "/last_activity") => { info!("serving /last_active GET request"); - let state = state.read().unwrap(); + let state = compute.state.read().unwrap(); // Use RFC3339 format for consistency. Response::new(Body::from(state.last_active.to_rfc3339())) } - // Has compute setup process finished? -> true/false + // Has compute setup process finished? -> true/false. + // DEPRECATED in favour of /status (&Method::GET, "/ready") => { info!("serving /ready GET request"); - let state = state.read().unwrap(); - Response::new(Body::from(format!("{}", state.ready))) + let status = compute.get_status(); + Response::new(Body::from(format!("{}", status == ComputeStatus::Running))) } + // Serialized compute state. + (&Method::GET, "/status") => { + info!("serving /status GET request"); + let state = compute.state.read().unwrap(); + Response::new(Body::from(serde_json::to_string(&*state).unwrap())) + } + + // Startup metrics in JSON format. Keep /metrics reserved for a possible + // future use for Prometheus metrics format. + (&Method::GET, "/metrics.json") => { + info!("serving /metrics.json GET request"); + Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap())) + } + + // DEPRECATED, use POST instead (&Method::GET, "/check_writability") => { info!("serving /check_writability GET request"); - let res = crate::checker::check_writability(&state).await; + let res = crate::checker::check_writability(&compute).await; + match res { + Ok(_) => Response::new(Body::from("true")), + Err(e) => Response::new(Body::from(e.to_string())), + } + } + + (&Method::POST, "/check_writability") => { + info!("serving /check_writability POST request"); + let res = crate::checker::check_writability(&compute).await; match res { Ok(_) => Response::new(Body::from("true")), Err(e) => Response::new(Body::from(e.to_string())), @@ -49,7 +76,7 @@ async fn routes(req: Request, state: Arc>) -> Respons // Main Hyper HTTP server function that runs it and blocks waiting on it forever. #[tokio::main] -async fn serve(state: Arc>) { +async fn serve(state: Arc) { let addr = SocketAddr::from(([0, 0, 0, 0], 3080)); let make_service = make_service_fn(move |_conn| { @@ -73,7 +100,7 @@ async fn serve(state: Arc>) { } /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`. -pub fn launch_http_server(state: &Arc>) -> Result> { +pub fn launch_http_server(state: &Arc) -> Result> { let state = Arc::clone(state); Ok(thread::Builder::new() diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs new file mode 100644 index 0000000000..e5fdf85eed --- /dev/null +++ b/compute_tools/src/http/mod.rs @@ -0,0 +1 @@ +pub mod api; diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml new file mode 100644 index 0000000000..9c0f8e3ccd --- /dev/null +++ b/compute_tools/src/http/openapi_spec.yaml @@ -0,0 +1,158 @@ +openapi: "3.0.2" +info: + title: Compute node control API + version: "1.0" + +servers: + - url: "http://localhost:3080" + +paths: + /status: + get: + tags: + - "info" + summary: Get compute node internal status + description: "" + operationId: getComputeStatus + responses: + "200": + description: ComputeState + content: + application/json: + schema: + $ref: "#/components/schemas/ComputeState" + + /metrics.json: + get: + tags: + - "info" + summary: Get compute node startup metrics in JSON format + description: "" + operationId: getComputeMetricsJSON + responses: + "200": + description: ComputeMetrics + content: + application/json: + schema: + $ref: "#/components/schemas/ComputeMetrics" + + /ready: + get: + deprecated: true + tags: + - "info" + summary: Check whether compute startup process finished successfully + description: "" + operationId: computeIsReady + responses: + "200": + description: Compute is ready ('true') or not ('false') + content: + text/plain: + schema: + type: string + example: "true" + + /last_activity: + get: + deprecated: true + tags: + - "info" + summary: Get timestamp of the last compute activity + description: "" + operationId: getLastComputeActivityTS + responses: + "200": + description: Timestamp of the last compute activity + content: + text/plain: + schema: + type: string + example: "2022-10-12T07:20:50.52Z" + + /check_writability: + get: + deprecated: true + tags: + - "check" + summary: Check that we can write new data on this compute + description: "" + operationId: checkComputeWritabilityDeprecated + responses: + "200": + description: Check result + content: + text/plain: + schema: + type: string + description: Error text or 'true' if check passed + example: "true" + + post: + tags: + - "check" + summary: Check that we can write new data on this compute + description: "" + operationId: checkComputeWritability + responses: + "200": + description: Check result + content: + text/plain: + schema: + type: string + description: Error text or 'true' if check passed + example: "true" + +components: + securitySchemes: + JWT: + type: http + scheme: bearer + bearerFormat: JWT + + schemas: + ComputeMetrics: + type: object + description: Compute startup metrics + required: + - sync_safekeepers_ms + - basebackup_ms + - config_ms + - total_startup_ms + properties: + sync_safekeepers_ms: + type: integer + basebackup_ms: + type: integer + config_ms: + type: integer + total_startup_ms: + type: integer + + ComputeState: + type: object + required: + - status + - last_active + properties: + status: + $ref: '#/components/schemas/ComputeStatus' + last_active: + type: string + description: The last detected compute activity timestamp in UTC and RFC3339 format + example: "2022-10-12T07:20:50.52Z" + error: + type: string + description: Text of the error during compute startup, if any + + ComputeStatus: + type: string + enum: + - init + - failed + - running + +security: + - JWT: [] diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index ffb9700a49..aee6b53e6a 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -4,11 +4,11 @@ //! pub mod checker; pub mod config; -pub mod http_api; +pub mod http; #[macro_use] pub mod logger; +pub mod compute; pub mod monitor; pub mod params; pub mod pg_helpers; pub mod spec; -pub mod zenith; diff --git a/compute_tools/src/monitor.rs b/compute_tools/src/monitor.rs index 596981b2d2..496a5aae3b 100644 --- a/compute_tools/src/monitor.rs +++ b/compute_tools/src/monitor.rs @@ -1,4 +1,4 @@ -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use std::{thread, time}; use anyhow::Result; @@ -6,16 +6,16 @@ use chrono::{DateTime, Utc}; use log::{debug, info}; use postgres::{Client, NoTls}; -use crate::zenith::ComputeState; +use crate::compute::ComputeNode; const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds // Spin in a loop and figure out the last activity time in the Postgres. // Then update it in the shared state. This function never errors out. // XXX: the only expected panic is at `RwLock` unwrap(). -fn watch_compute_activity(state: &Arc>) { +fn watch_compute_activity(compute: &Arc) { // Suppose that `connstr` doesn't change - let connstr = state.read().unwrap().connstr.clone(); + let connstr = compute.connstr.clone(); // Define `client` outside of the loop to reuse existing connection if it's active. let mut client = Client::connect(&connstr, NoTls); let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL); @@ -46,7 +46,7 @@ fn watch_compute_activity(state: &Arc>) { AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors? &[], ); - let mut last_active = state.read().unwrap().last_active; + let mut last_active = compute.state.read().unwrap().last_active; if let Ok(backs) = backends { let mut idle_backs: Vec> = vec![]; @@ -83,14 +83,14 @@ fn watch_compute_activity(state: &Arc>) { } // Update the last activity in the shared state if we got a more recent one. - let mut state = state.write().unwrap(); + let mut state = compute.state.write().unwrap(); if last_active > state.last_active { state.last_active = last_active; debug!("set the last compute activity time to: {}", last_active); } } Err(e) => { - info!("cannot connect to postgres: {}, retrying", e); + debug!("cannot connect to postgres: {}, retrying", e); // Establish a new connection and try again. client = Client::connect(&connstr, NoTls); @@ -100,7 +100,7 @@ fn watch_compute_activity(state: &Arc>) { } /// Launch a separate compute monitor thread and return its `JoinHandle`. -pub fn launch_monitor(state: &Arc>) -> Result> { +pub fn launch_monitor(state: &Arc) -> Result> { let state = Arc::clone(state); Ok(thread::Builder::new() diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 1409a81b6b..74856eac63 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -1,7 +1,9 @@ +use std::fs::File; +use std::io::{BufRead, BufReader}; use std::net::{SocketAddr, TcpStream}; use std::os::unix::fs::PermissionsExt; use std::path::Path; -use std::process::Command; +use std::process::Child; use std::str::FromStr; use std::{fs, thread, time}; @@ -220,12 +222,12 @@ pub fn get_existing_dbs(client: &mut Client) -> Result> { /// Wait for Postgres to become ready to accept connections: /// - state should be `ready` in the `pgdata/postmaster.pid` /// - and we should be able to connect to 127.0.0.1:5432 -pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> { +pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> { let pid_path = pgdata.join("postmaster.pid"); let mut slept: u64 = 0; // ms let pause = time::Duration::from_millis(100); - let timeout = time::Duration::from_millis(200); + let timeout = time::Duration::from_millis(10); let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap(); loop { @@ -236,14 +238,19 @@ pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> { bail!("timed out while waiting for Postgres to start"); } + if let Ok(Some(status)) = pg.try_wait() { + // Postgres exited, that is not what we expected, bail out earlier. + let code = status.code().unwrap_or(-1); + bail!("Postgres exited unexpectedly with code {}", code); + } + if pid_path.exists() { - // XXX: dumb and the simplest way to get the last line in a text file - // TODO: better use `.lines().last()` later - let stdout = Command::new("tail") - .args(&["-n1", pid_path.to_str().unwrap()]) - .output()? - .stdout; - let status = String::from_utf8(stdout)?; + let file = BufReader::new(File::open(&pid_path)?); + let status = file + .lines() + .last() + .unwrap() + .unwrap_or_else(|_| "unknown".to_string()); let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok(); // Now Postgres is ready to accept connections diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 334e0a9e05..e88df56a65 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -3,16 +3,53 @@ use std::path::Path; use anyhow::Result; use log::{info, log_enabled, warn, Level}; use postgres::Client; +use serde::Deserialize; use crate::config; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; -use crate::zenith::ClusterSpec; + +/// Cluster spec or configuration represented as an optional number of +/// delta operations + final cluster state description. +#[derive(Clone, Deserialize)] +pub struct ComputeSpec { + pub format_version: f32, + pub timestamp: String, + pub operation_uuid: Option, + /// Expected cluster state at the end of transition process. + pub cluster: Cluster, + pub delta_operations: Option>, +} + +/// Cluster state seen from the perspective of the external tools +/// like Rails web console. +#[derive(Clone, Deserialize)] +pub struct Cluster { + pub cluster_id: String, + pub name: String, + pub state: Option, + pub roles: Vec, + pub databases: Vec, + pub settings: GenericOptions, +} + +/// Single cluster state changing operation that could not be represented as +/// a static `Cluster` structure. For example: +/// - DROP DATABASE +/// - DROP ROLE +/// - ALTER ROLE name RENAME TO new_name +/// - ALTER DATABASE name RENAME TO new_name +#[derive(Clone, Deserialize)] +pub struct DeltaOp { + pub action: String, + pub name: PgIdent, + pub new_name: Option, +} /// It takes cluster specification and does the following: /// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file. /// - Update `pg_hba.conf` to allow external connections. -pub fn handle_configuration(spec: &ClusterSpec, pgdata_path: &Path) -> Result<()> { +pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> { // File `postgresql.conf` is no longer included into `basebackup`, so just // always write all config into it creating new file. config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?; @@ -39,7 +76,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { /// Given a cluster spec json and open transaction it handles roles creation, /// deletion and update. -pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> { +pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> { let mut xact = client.transaction()?; let existing_roles: Vec = get_existing_roles(&mut xact)?; @@ -165,7 +202,7 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> { /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level /// atomicity should be enough here due to the order of operations and various checks, /// which together provide us idempotency. -pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> { +pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { let existing_dbs: Vec = get_existing_dbs(client)?; // Print a list of existing Postgres databases (only in debug mode) @@ -254,7 +291,7 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> { // Grant CREATE ON DATABASE to the database owner // to allow clients create trusted extensions. -pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> { +pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> { info!("cluster spec grants:"); for db in &spec.cluster.databases { diff --git a/compute_tools/src/zenith.rs b/compute_tools/src/zenith.rs deleted file mode 100644 index ba7dc20787..0000000000 --- a/compute_tools/src/zenith.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::process::{Command, Stdio}; - -use anyhow::Result; -use chrono::{DateTime, Utc}; -use postgres::{Client, NoTls}; -use serde::Deserialize; - -use crate::pg_helpers::*; - -/// Compute node state shared across several `zenith_ctl` threads. -/// Should be used under `RwLock` to allow HTTP API server to serve -/// status requests, while configuration is in progress. -pub struct ComputeState { - pub connstr: String, - pub pgdata: String, - pub pgbin: String, - pub spec: ClusterSpec, - /// Compute setup process has finished - pub ready: bool, - /// Timestamp of the last Postgres activity - pub last_active: DateTime, -} - -/// Cluster spec or configuration represented as an optional number of -/// delta operations + final cluster state description. -#[derive(Clone, Deserialize)] -pub struct ClusterSpec { - pub format_version: f32, - pub timestamp: String, - pub operation_uuid: Option, - /// Expected cluster state at the end of transition process. - pub cluster: Cluster, - pub delta_operations: Option>, -} - -/// Cluster state seen from the perspective of the external tools -/// like Rails web console. -#[derive(Clone, Deserialize)] -pub struct Cluster { - pub cluster_id: String, - pub name: String, - pub state: Option, - pub roles: Vec, - pub databases: Vec, - pub settings: GenericOptions, -} - -/// Single cluster state changing operation that could not be represented as -/// a static `Cluster` structure. For example: -/// - DROP DATABASE -/// - DROP ROLE -/// - ALTER ROLE name RENAME TO new_name -/// - ALTER DATABASE name RENAME TO new_name -#[derive(Clone, Deserialize)] -pub struct DeltaOp { - pub action: String, - pub name: PgIdent, - pub new_name: Option, -} - -/// Get basebackup from the libpq connection to pageserver using `connstr` and -/// unarchive it to `pgdata` directory overriding all its previous content. -pub fn get_basebackup( - pgdata: &str, - connstr: &str, - tenant: &str, - timeline: &str, - lsn: &str, -) -> Result<()> { - let mut client = Client::connect(connstr, NoTls)?; - let basebackup_cmd = match lsn { - "0/0" => format!("basebackup {} {}", tenant, timeline), // First start of the compute - _ => format!("basebackup {} {} {}", tenant, timeline, lsn), - }; - let copyreader = client.copy_out(basebackup_cmd.as_str())?; - let mut ar = tar::Archive::new(copyreader); - - ar.unpack(&pgdata)?; - - Ok(()) -} - -/// Run `postgres` in a special mode with `--sync-safekeepers` argument -/// and return the reported LSN back to the caller. -pub fn sync_safekeepers(pgdata: &str, pgbin: &str) -> Result { - let sync_handle = Command::new(&pgbin) - .args(&["--sync-safekeepers"]) - .env("PGDATA", &pgdata) // we cannot use -D in this mode - .stdout(Stdio::piped()) - .spawn() - .expect("postgres --sync-safekeepers failed to start"); - - // `postgres --sync-safekeepers` will print all log output to stderr and - // final LSN to stdout. So we pipe only stdout, while stderr will be automatically - // redirected to the caller output. - let sync_output = sync_handle - .wait_with_output() - .expect("postgres --sync-safekeepers failed"); - if !sync_output.status.success() { - anyhow::bail!( - "postgres --sync-safekeepers exited with non-zero status: {}", - sync_output.status, - ); - } - - let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim()); - - Ok(lsn) -} diff --git a/compute_tools/tests/pg_helpers_tests.rs b/compute_tools/tests/pg_helpers_tests.rs index 472a49af4b..33f903f0e1 100644 --- a/compute_tools/tests/pg_helpers_tests.rs +++ b/compute_tools/tests/pg_helpers_tests.rs @@ -4,12 +4,12 @@ mod pg_helpers_tests { use std::fs::File; use compute_tools::pg_helpers::*; - use compute_tools::zenith::ClusterSpec; + use compute_tools::spec::ComputeSpec; #[test] fn params_serialize() { let file = File::open("tests/cluster_spec.json").unwrap(); - let spec: ClusterSpec = serde_json::from_reader(file).unwrap(); + let spec: ComputeSpec = serde_json::from_reader(file).unwrap(); assert_eq!( spec.cluster.databases.first().unwrap().to_pg_options(), @@ -24,7 +24,7 @@ mod pg_helpers_tests { #[test] fn settings_serialize() { let file = File::open("tests/cluster_spec.json").unwrap(); - let spec: ClusterSpec = serde_json::from_reader(file).unwrap(); + let spec: ComputeSpec = serde_json::from_reader(file).unwrap(); assert_eq!( spec.cluster.settings.as_pg_settings(), diff --git a/docs/docker.md b/docs/docker.md index cc54d012dd..100cdd248b 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -1,20 +1,20 @@ -# Docker images of Zenith +# Docker images of Neon ## Images Currently we build two main images: -- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile). -- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres). +- [neondatabase/neon](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile). +- [neondatabase/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). -And additional intermediate images: +And additional intermediate image: -- [zenithdb/compute-tools](https://hub.docker.com/repository/docker/zenithdb/compute-tools) — compute node configuration management tools. +- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools. ## Building pipeline -1. Image `zenithdb/compute-tools` is re-built automatically. +We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs -2. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo. +1. `neondatabase/compute-tools` and `neondatabase/compute-node` -3. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically. +2. `neondatabase/neon` diff --git a/vendor/postgres b/vendor/postgres index 1db115cecb..79af2faf08 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 1db115cecb3dbc2a74c5efa964fdf3a8a341c4d2 +Subproject commit 79af2faf08d9bec1b1664a72936727dcca36d253