From 4e6a23f779537126a07e43eea824fb7a4e1bb11d Mon Sep 17 00:00:00 2001
From: Alexey Kondratov <kondratov.aleksey@gmail.com>
Date: Tue, 4 Apr 2023 16:50:05 +0200
Subject: [PATCH] Polish API handler and refresh OpenAPI spec

---
 compute_tools/src/compute.rs             |  9 ++-
 compute_tools/src/configurator.rs        | 11 ++-
 compute_tools/src/http/api.rs            | 36 +++++----
 compute_tools/src/http/mod.rs            |  1 +
 compute_tools/src/http/models.rs         | 16 ++++
 compute_tools/src/http/openapi_spec.yaml | 98 ++++++++++++++++++++----
 6 files changed, 134 insertions(+), 37 deletions(-)
 create mode 100644 compute_tools/src/http/models.rs
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 1402b95cc2..7848b3f50a 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -88,7 +88,7 @@ pub struct ComputeState {
 impl ComputeState {
     pub fn new() -> Self {
         Self {
-            status: ComputeStatus::WaitingSpec,
+            status: ComputeStatus::Empty,
             last_active: Utc::now(),
             error: None,
             spec: ComputeSpec::default(),
@@ -111,8 +111,9 @@ impl Default for ComputeState {
 pub enum ComputeStatus {
     // Spec wasn't provided as start, waiting for it to be
     // provided by control-plane.
-    WaitingSpec,
-    // Compute node has initial spec and is starting up.
+    Empty,
+    // Compute node has spec and initial startup and
+    // configuration is in progress.
     Init,
     // Compute is configured and running.
     Running,
@@ -123,7 +124,7 @@ pub enum ComputeStatus {
     // Control-plane requested reconfiguration.
     ConfigurationPending,
     // New spec is being applied.
-    Reconfiguration,
+    Configuration,
 }
 
 #[derive(Default, Serialize)]
diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs
index 229f673914..2852e06938 100644
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -15,22 +15,21 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
         let mut state = state_changed.wait(state).unwrap();
 
         if state.status == ComputeStatus::ConfigurationPending {
-            info!("got reconfiguration request");
-            state.status = ComputeStatus::Reconfiguration;
+            info!("got configuration request");
+            state.status = ComputeStatus::Configuration;
             state_changed.notify_all();
             drop(state);
 
             let mut new_status = ComputeStatus::Failed;
             if let Err(e) = compute.reconfigure() {
-                error!("could not reconfigure compute node: {}", e);
+                error!("could not configure compute node: {}", e);
             } else {
                 new_status = ComputeStatus::Running;
-                info!("compute node reconfigured");
+                info!("compute node configured");
             }
 
             // XXX: used to test that API is blocking
-            // TODO: remove before merge
-            std::thread::sleep(std::time::Duration::from_millis(2000));
+            // std::thread::sleep(std::time::Duration::from_millis(2000));
 
             compute.set_status(new_status);
         } else if state.status == ComputeStatus::Failed {
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
index 06b0098b2a..0a5cc27491 100644
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -4,7 +4,7 @@ use std::sync::Arc;
 use std::thread;
 
 use crate::compute::{ComputeNode, ComputeStatus};
-use crate::spec::ComputeSpec;
+use crate::http::models::{ConfigurationRequest, GenericAPIError};
 
 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
@@ -82,37 +82,35 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
             ))
         }
 
-        // Accept spec in JSON format and request compute reconfiguration from
+        // Accept spec in JSON format and request compute configuration from
         // the configurator thread. If anything goes wrong after we set the
         // compute state to `ConfigurationPending` and / or sent spec to the
         // configurator thread, we basically leave compute in the potentially
         // wrong state. That said, it's control-plane's responsibility to
         // watch compute state after reconfiguration request and to clean
         // restart in case of errors.
-        //
-        // TODO: Errors should be in JSON format with proper status codes.
-        (&Method::POST, "/spec") => {
-            info!("serving /spec POST request");
+        (&Method::POST, "/configure") => {
+            info!("serving /configure POST request");
             if !compute.live_config_allowed {
                 let msg = "live reconfiguration is not allowed for this compute node";
                 error!(msg);
-                return Response::new(Body::from(msg));
+                return render_json_error(msg, StatusCode::PRECONDITION_FAILED);
             }
 
             let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
             let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
-            if let Ok(spec) = serde_json::from_str::<ComputeSpec>(&spec_raw) {
+            if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
+                let spec = request.spec;
                 let (state, state_changed) = &compute.state;
                 let mut state = state.lock().unwrap();
-                if !(state.status == ComputeStatus::WaitingSpec
-                    || state.status == ComputeStatus::Running)
+                if !(state.status == ComputeStatus::Empty || state.status == ComputeStatus::Running)
                 {
                     let msg = format!(
                         "invalid compute status for reconfiguration request: {}",
                         serde_json::to_string(&*state).unwrap()
                     );
                     error!(msg);
-                    return Response::new(Body::from(msg));
+                    return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
                 }
                 state.spec = spec;
                 state.status = ComputeStatus::ConfigurationPending;
@@ -129,11 +127,13 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
                         state.status
                     );
                 }
-                Response::new(Body::from("ok"))
+
+                // Return current compute state if everything went well.
+                Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
             } else {
                 let msg = "invalid spec";
                 error!(msg);
-                Response::new(Body::from(msg))
+                render_json_error(msg, StatusCode::BAD_REQUEST)
             }
         }
 
@@ -146,6 +146,16 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
     }
 }
 
+fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
+    let error = GenericAPIError {
+        error: e.to_string(),
+    };
+    Response::builder()
+        .status(status)
+        .body(Body::from(serde_json::to_string(&error).unwrap()))
+        .unwrap()
+}
+
 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
 async fn serve(state: Arc<ComputeNode>) {
diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs
index e5fdf85eed..169399a4fb 100644
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1 +1,2 @@
 pub mod api;
+pub mod models;
diff --git a/compute_tools/src/http/models.rs b/compute_tools/src/http/models.rs
new file mode 100644
index 0000000000..98383c218b
--- /dev/null
+++ b/compute_tools/src/http/models.rs
@@ -0,0 +1,16 @@
+use serde::{Deserialize, Serialize};
+
+use crate::spec::ComputeSpec;
+
+/// We now pass only `spec` in the configuration request, but later we can
+/// extend it and something like `restart: bool` or something else. So put
+/// `spec` into a struct initially to be more flexible in the future.
+#[derive(Deserialize, Debug)]
+pub struct ConfigurationRequest {
+    pub spec: ComputeSpec,
+}
+
+#[derive(Serialize, Debug)]
+pub struct GenericAPIError {
+    pub error: String,
+}
diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml
index 5c74dfd2d2..2a648e0710 100644
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -11,7 +11,7 @@ paths:
     get:
       tags:
       - Info
-      summary: Get compute node internal status
+      summary: Get compute node internal status.
       description: ""
       operationId: getComputeStatus
       responses:
@@ -26,7 +26,7 @@ paths:
     get:
       tags:
       - Info
-      summary: Get compute node startup metrics in JSON format
+      summary: Get compute node startup metrics in JSON format.
       description: ""
       operationId: getComputeMetricsJSON
       responses:
@@ -41,9 +41,9 @@ paths:
     get:
       tags:
       - Info
-      summary: Get current compute insights in JSON format
+      summary: Get current compute insights in JSON format.
       description: |
-        Note, that this doesn't include any historical data
+        Note, that this doesn't include any historical data.
       operationId: getComputeInsights
       responses:
         200:
@@ -56,12 +56,12 @@ paths:
   /info:
     get:
       tags:
-      - "info"
-      summary: Get info about the compute Pod/VM
+      - Info
+      summary: Get info about the compute pod / VM.
       description: ""
       operationId: getInfo
       responses:
-        "200":
+        200:
           description: Info
           content:
             application/json:
@@ -72,7 +72,7 @@ paths:
     post:
       tags:
       - Check
-      summary: Check that we can write new data on this compute
+      summary: Check that we can write new data on this compute.
       description: ""
       operationId: checkComputeWritability
       responses:
@@ -82,9 +82,57 @@ paths:
             text/plain:
               schema:
                 type: string
-                description: Error text or 'true' if check passed
+                description: Error text or 'true' if check passed.
                 example: "true"
 
+  /configure:
+    post:
+      tags:
+      - Configure
+      summary: Request compute node configuration.
+      description: |
+        This is a blocking API endpoint, i.e. it blocks waiting until
+        compute is finished configuration and is in `Running` state.
+        Optional non-blocking mode could be added later. Currently,
+        it's also assumed that reconfiguration doesn't require restart.
+      operationId: configureCompute
+      requestBody:
+        description: Configuration request.
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - spec
+              properties:
+                spec:
+                  # XXX: I don't want to explain current spec in the OpenAPI format,
+                  # as it could be changed really soon. Consider doing it later.
+                  type: object
+      responses:
+        200:
+          description: Compute configuration finished.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ComputeState"
+        400:
+          description: Provided spec is invalid.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
+        412:
+          description: |
+            It's not possible to do live-configuration of the compute.
+            It's either in the wrong state, or compute doesn't use pull
+            mode of configuration.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
+
 components:
   securitySchemes:
     JWT:
@@ -95,7 +143,7 @@ components:
   schemas:
     ComputeMetrics:
       type: object
-      description: Compute startup metrics
+      description: Compute startup metrics.
       required:
         - sync_safekeepers_ms
         - basebackup_ms
@@ -113,7 +161,7 @@ components:
 
     Info:
       type: object
-      description: Information about VM/Pod
+      description: Information about VM/Pod.
       required:
         - num_cpus
       properties:
@@ -130,17 +178,26 @@ components:
           $ref: '#/components/schemas/ComputeStatus'
         last_active:
           type: string
-          description: The last detected compute activity timestamp in UTC and RFC3339 format
+          description: The last detected compute activity timestamp in UTC and RFC3339 format.
           example: "2022-10-12T07:20:50.52Z"
         error:
           type: string
-          description: Text of the error during compute startup, if any
+          description: Text of the error during compute startup, if any.
+          example: ""
+        tenant:
+          type: string
+          description: Identifier of the current tenant served by compute node, if any.
+          example: c9269c359e9a199fad1ea0981246a78f
+        timeline:
+          type: string
+          description: Identifier of the current timeline served by compute node, if any.
+          example: ece7de74d4b8cbe5433a68ce4d1b97b4
 
     ComputeInsights:
       type: object
       properties:
         pg_stat_statements:
-          description: Contains raw output from pg_stat_statements in JSON format
+          description: Contains raw output from pg_stat_statements in JSON format.
           type: array
           items:
             type: object
@@ -151,6 +208,19 @@ components:
         - init
         - failed
         - running
+      example: running
+
+    #
+    # Errors
+    #
+
+    GenericError:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: string
 
 security:
   - JWT: []