diff --git a/compute_tools/README.md b/compute_tools/README.md index 8d84031efc..49f1368f0e 100644 --- a/compute_tools/README.md +++ b/compute_tools/README.md @@ -46,11 +46,14 @@ stateDiagram-v2 Configuration --> Failed : Failed to configure the compute Configuration --> Running : Compute has been configured Empty --> Init : Compute spec is immediately available - Empty --> TerminationPending : Requested termination + Empty --> TerminationPendingFast : Requested termination + Empty --> TerminationPendingImmediate : Requested termination Init --> Failed : Failed to start Postgres Init --> Running : Started Postgres - Running --> TerminationPending : Requested termination - TerminationPending --> Terminated : Terminated compute + Running --> TerminationPendingFast : Requested termination + Running --> TerminationPendingImmediate : Requested termination + TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status + TerminationPendingImmediate --> Terminated : Terminated compute immediately Failed --> [*] : Compute exited Terminated --> [*] : Compute exited ``` diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 4a29c232ac..c05cc229a2 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -956,14 +956,20 @@ impl ComputeNode { None }; - let mut delay_exit = false; let mut state = self.state.lock().unwrap(); state.terminate_flush_lsn = lsn; - if let ComputeStatus::TerminationPending { mode } = state.status { + + let delay_exit = state.status == ComputeStatus::TerminationPendingFast; + if state.status == ComputeStatus::TerminationPendingFast + || state.status == ComputeStatus::TerminationPendingImmediate + { + info!( + "Changing compute status from {} to {}", + state.status, + ComputeStatus::Terminated + ); state.status = ComputeStatus::Terminated; self.state_changed.notify_all(); - // we were asked to terminate gracefully, don't exit to avoid restart - delay_exit = mode == compute_api::responses::TerminateMode::Fast } drop(state); @@ -1901,7 +1907,8 @@ impl ComputeNode { // exit loop ComputeStatus::Failed - | ComputeStatus::TerminationPending { .. } + | ComputeStatus::TerminationPendingFast + | ComputeStatus::TerminationPendingImmediate | ComputeStatus::Terminated => break 'cert_update, // wait diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml index 3c58b284b3..93a357e160 100644 --- a/compute_tools/src/http/openapi_spec.yaml +++ b/compute_tools/src/http/openapi_spec.yaml @@ -371,9 +371,28 @@ paths: summary: Terminate Postgres and wait for it to exit description: "" operationId: terminate + parameters: + - name: mode + in: query + description: "Terminate mode: fast (wait 30s before returning) and immediate" + required: false + schema: + type: string + enum: ["fast", "immediate"] + default: fast responses: 200: description: Result + content: + application/json: + schema: + $ref: "#/components/schemas/TerminateResponse" + 201: + description: Result if compute is already terminated + content: + application/json: + schema: + $ref: "#/components/schemas/TerminateResponse" 412: description: "wrong state" content: @@ -530,11 +549,14 @@ components: type: string enum: - empty - - init - - failed - - running - configuration_pending + - init + - running - configuration + - failed + - termination_pending_fast + - termination_pending_immediate + - terminated example: running ExtensionInstallRequest: @@ -660,6 +682,17 @@ components: description: Role name. example: "neon" + TerminateResponse: + type: object + required: + - lsn + properties: + lsn: + type: string + nullable: true + description: "last WAL flush LSN" + example: "0/028F10D8" + SetRoleGrantsResponse: type: object required: diff --git a/compute_tools/src/http/routes/terminate.rs b/compute_tools/src/http/routes/terminate.rs index 32d90a5990..5b30b020c8 100644 --- a/compute_tools/src/http/routes/terminate.rs +++ b/compute_tools/src/http/routes/terminate.rs @@ -3,7 +3,7 @@ use crate::http::JsonResponse; use axum::extract::State; use axum::response::Response; use axum_extra::extract::OptionalQuery; -use compute_api::responses::{ComputeStatus, TerminateResponse}; +use compute_api::responses::{ComputeStatus, TerminateMode, TerminateResponse}; use http::StatusCode; use serde::Deserialize; use std::sync::Arc; @@ -12,7 +12,7 @@ use tracing::info; #[derive(Deserialize, Default)] pub struct TerminateQuery { - mode: compute_api::responses::TerminateMode, + mode: TerminateMode, } /// Terminate the compute. @@ -24,16 +24,16 @@ pub(in crate::http) async fn terminate( { let mut state = compute.state.lock().unwrap(); if state.status == ComputeStatus::Terminated { - return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn); + let response = TerminateResponse { + lsn: state.terminate_flush_lsn, + }; + return JsonResponse::success(StatusCode::CREATED, response); } if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) { return JsonResponse::invalid_status(state.status); } - state.set_status( - ComputeStatus::TerminationPending { mode }, - &compute.state_changed, - ); + state.set_status(mode.into(), &compute.state_changed); } forward_termination_signal(false); diff --git a/compute_tools/src/monitor.rs b/compute_tools/src/monitor.rs index 8a2f6addad..fa01545856 100644 --- a/compute_tools/src/monitor.rs +++ b/compute_tools/src/monitor.rs @@ -84,7 +84,8 @@ impl ComputeMonitor { if matches!( compute_status, ComputeStatus::Terminated - | ComputeStatus::TerminationPending { .. } + | ComputeStatus::TerminationPendingFast + | ComputeStatus::TerminationPendingImmediate | ComputeStatus::Failed ) { info!( diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 74ab15dc97..ad2067e0f2 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -922,7 +922,8 @@ impl Endpoint { ComputeStatus::Empty | ComputeStatus::ConfigurationPending | ComputeStatus::Configuration - | ComputeStatus::TerminationPending { .. } + | ComputeStatus::TerminationPendingFast + | ComputeStatus::TerminationPendingImmediate | ComputeStatus::Terminated => { bail!("unexpected compute status: {:?}", state.status) } diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index e10c381fb4..2fe233214a 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -121,6 +121,15 @@ pub enum TerminateMode { Immediate, } +impl From for ComputeStatus { + fn from(mode: TerminateMode) -> Self { + match mode { + TerminateMode::Fast => ComputeStatus::TerminationPendingFast, + TerminateMode::Immediate => ComputeStatus::TerminationPendingImmediate, + } + } +} + #[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum ComputeStatus { @@ -141,7 +150,9 @@ pub enum ComputeStatus { // control-plane to terminate it. Failed, // Termination requested - TerminationPending { mode: TerminateMode }, + TerminationPendingFast, + // Termination requested, without waiting 30s before returning from /terminate + TerminationPendingImmediate, // Terminated Postgres Terminated, } @@ -160,7 +171,10 @@ impl Display for ComputeStatus { ComputeStatus::Running => f.write_str("running"), ComputeStatus::Configuration => f.write_str("configuration"), ComputeStatus::Failed => f.write_str("failed"), - ComputeStatus::TerminationPending { .. } => f.write_str("termination-pending"), + ComputeStatus::TerminationPendingFast => f.write_str("termination-pending-fast"), + ComputeStatus::TerminationPendingImmediate => { + f.write_str("termination-pending-immediate") + } ComputeStatus::Terminated => f.write_str("terminated"), } }