Files
neon/compute_tools/src/http/openapi_spec.yaml
Alexey Kondratov 53a05e8ccb fix(compute_ctl): Only offload LFC state if no prewarming is in progress (#12645)
## Problem

We currently offload LFC state unconditionally, which can cause
problems. Imagine a situation:
1. Endpoint started with `autoprewarm: true`.
2. While prewarming is not completed, we upload the new incomplete
state.
3. Compute gets interrupted and restarts.
4. We start again and try to prewarm with the state from 2. instead of
the previous complete state.

During the orchestrated prewarming, it's probably not a big issue, but
it's still better to do not interfere with the prewarm process.

## Summary of changes

Do not offload LFC state if we are currently prewarming or any issue
occurred. While on it, also introduce `Skipped` LFC prewarm status,
which is used when the corresponding LFC state is not present in the
endpoint storage. It's primarily needed to distinguish the first compute
start for particular endpoint, as it's completely valid to do not have
LFC state yet.
2025-07-17 21:43:43 +00:00

737 lines
19 KiB
YAML

openapi: "3.0.2"
info:
title: Compute node control API
version: "1.0"
servers:
- url: "http://localhost:3080"
paths:
/status:
get:
tags:
- Info
summary: Get compute node internal status.
description: ""
operationId: getComputeStatus
responses:
200:
description: ComputeState
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeState"
/metrics.json:
get:
tags:
- Info
summary: Get compute node startup metrics in JSON format.
description: ""
operationId: getComputeMetricsJSON
responses:
200:
description: ComputeMetrics
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeMetrics"
/metrics:
get:
tags:
- Info
summary: Get compute node metrics in text format.
description: ""
operationId: getComputeMetrics
responses:
200:
description: ComputeMetrics
content:
text/plain:
schema:
type: string
description: Metrics in text format.
/insights:
get:
tags:
- Info
summary: Get current compute insights in JSON format.
description: |
Note, that this doesn't include any historical data.
operationId: getComputeInsights
responses:
200:
description: Compute insights
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeInsights"
/dbs_and_roles:
get:
tags:
- Info
summary: Get databases and roles in the catalog.
description: ""
operationId: getDbsAndRoles
responses:
200:
description: Compute schema objects
content:
application/json:
schema:
$ref: "#/components/schemas/DbsAndRoles"
/promote:
post:
tags:
- Promotion
summary: Promote secondary replica to primary
description: ""
operationId: promoteReplica
requestBody:
description: Promote requests data
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/SafekeepersLsn"
responses:
200:
description: Promote succeeded or wasn't started
content:
application/json:
schema:
$ref: "#/components/schemas/PromoteState"
500:
description: Promote failed
content:
application/json:
schema:
$ref: "#/components/schemas/PromoteState"
/lfc/prewarm:
post:
summary: Request LFC Prewarm
parameters:
- name: from_endpoint
in: query
schema:
type: string
description: ""
operationId: lfcPrewarm
responses:
202:
description: LFC prewarm started
429:
description: LFC prewarm ongoing
get:
tags:
- Prewarm
summary: Get LFC prewarm state
description: ""
operationId: getLfcPrewarmState
responses:
200:
description: Prewarm state
content:
application/json:
schema:
$ref: "#/components/schemas/LfcPrewarmState"
/lfc/offload:
post:
summary: Request LFC offload
description: ""
operationId: lfcOffload
responses:
202:
description: LFC offload started
429:
description: LFC offload ongoing
get:
tags:
- Prewarm
summary: Get LFC offloading state
description: ""
operationId: getLfcOffloadState
responses:
200:
description: Offload state
content:
application/json:
schema:
$ref: "#/components/schemas/LfcOffloadState"
/database_schema:
get:
tags:
- Info
summary: Get schema dump
parameters:
- name: database
in: query
description: Database name to dump.
required: true
schema:
type: string
example: "postgres"
description: Get schema dump in SQL format.
operationId: getDatabaseSchema
responses:
200:
description: Schema dump
content:
text/plain:
schema:
type: string
description: Schema dump in SQL format.
404:
description: Non existing database.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/grants:
post:
tags:
- Grants
summary: Apply grants to the database.
description: ""
operationId: setRoleGrants
requestBody:
description: Grants request.
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/SetRoleGrantsRequest"
responses:
200:
description: Grants applied.
content:
application/json:
schema:
$ref: "#/components/schemas/SetRoleGrantsResponse"
412:
description: |
Compute is not in the right state for processing the request.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: Error occurred during grants application.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/check_writability:
post:
tags:
- Check
summary: Check that we can write new data on this compute.
description: ""
operationId: checkComputeWritability
responses:
200:
description: Check result
content:
text/plain:
schema:
type: string
description: Error text or 'true' if check passed.
example: "true"
/extensions:
post:
tags:
- Extensions
summary: Install extension if possible.
description: ""
operationId: installExtension
requestBody:
description: Extension name and database to install it to.
required: true
content:
application/json:
schema:
$ref: "#/components/schemas/ExtensionInstallRequest"
responses:
200:
description: Result from extension installation
content:
application/json:
schema:
$ref: "#/components/schemas/ExtensionInstallResult"
412:
description: |
Compute is in the wrong state for processing the request.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: Error during extension installation.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/configure:
post:
tags:
- Configure
summary: Perform compute node configuration.
description: |
This is a blocking API endpoint, i.e. it blocks waiting until
compute is finished configuration and is in `Running` state.
Optional non-blocking mode could be added later.
operationId: configureCompute
requestBody:
description: Configuration request.
required: true
content:
application/json:
schema:
type: object
required:
- spec
properties:
spec:
# XXX: I don't want to explain current spec in the OpenAPI format,
# as it could be changed really soon. Consider doing it later.
type: object
responses:
200:
description: Compute configuration finished.
content:
application/json:
schema:
$ref: "#/components/schemas/ComputeState"
400:
description: Provided spec is invalid.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
412:
description: |
It's not possible to do live-configuration of the compute.
It's either in the wrong state, or compute doesn't use pull
mode of configuration.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: |
Compute configuration request was processed, but error
occurred. Compute will likely shutdown soon.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/extension_server:
post:
tags:
- Extension
summary: Download extension from S3 to local folder.
description: ""
operationId: downloadExtension
responses:
200:
description: Extension downloaded
content:
text/plain:
schema:
type: string
description: Error text or 'OK' if download succeeded.
example: "OK"
400:
description: Request is invalid.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: Extension download request failed.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/terminate:
post:
tags:
- Terminate
summary: Terminate Postgres and wait for it to exit
description: ""
operationId: terminate
parameters:
- name: mode
in: query
description: "Terminate mode: fast (wait 30s before returning) and immediate"
required: false
schema:
type: string
enum: ["fast", "immediate"]
default: fast
responses:
200:
description: Result
content:
application/json:
schema:
$ref: "#/components/schemas/TerminateResponse"
201:
description: Result if compute is already terminated
content:
application/json:
schema:
$ref: "#/components/schemas/TerminateResponse"
412:
description: "wrong state"
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
500:
description: "Unexpected error"
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
components:
securitySchemes:
JWT:
type: http
scheme: bearer
bearerFormat: JWT
schemas:
ComputeMetrics:
type: object
description: Compute startup metrics.
required:
- wait_for_spec_ms
- sync_safekeepers_ms
- basebackup_ms
- config_ms
- total_startup_ms
properties:
wait_for_spec_ms:
type: integer
sync_safekeepers_ms:
type: integer
basebackup_ms:
type: integer
config_ms:
type: integer
total_startup_ms:
type: integer
DbsAndRoles:
type: object
description: Databases and Roles
required:
- roles
- databases
properties:
roles:
type: array
items:
$ref: "#/components/schemas/Role"
databases:
type: array
items:
$ref: "#/components/schemas/Database"
Database:
type: object
description: Database
required:
- name
- owner
- restrict_conn
- invalid
properties:
name:
type: string
owner:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
restrict_conn:
type: boolean
invalid:
type: boolean
Role:
type: object
description: Role
required:
- name
properties:
name:
type: string
encrypted_password:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
GenericOption:
type: object
description: Schema Generic option
required:
- name
- vartype
properties:
name:
type: string
value:
type: string
vartype:
type: string
ComputeState:
type: object
required:
- start_time
- status
properties:
start_time:
type: string
description: |
Time when compute was started. If initially compute was started in the `empty`
state and then provided with valid spec, `start_time` will be reset to the
moment, when spec was received.
example: "2022-10-12T07:20:50.52Z"
status:
$ref: "#/components/schemas/ComputeStatus"
last_active:
type: string
description: |
The last detected compute activity timestamp in UTC and RFC3339 format.
It could be empty if compute was never used by user since start.
example: "2022-10-12T07:20:50.52Z"
error:
type: string
description: Text of the error during compute startup or reconfiguration, if any.
example: ""
tenant:
type: string
description: Identifier of the current tenant served by compute node, if any.
example: c9269c359e9a199fad1ea0981246a78f
timeline:
type: string
description: Identifier of the current timeline served by compute node, if any.
example: ece7de74d4b8cbe5433a68ce4d1b97b4
ComputeInsights:
type: object
properties:
pg_stat_statements:
description: Contains raw output from pg_stat_statements in JSON format.
type: array
items:
type: object
ComputeStatus:
type: string
enum:
- empty
- configuration_pending
- init
- running
- configuration
- failed
- termination_pending_fast
- termination_pending_immediate
- terminated
example: running
ExtensionInstallRequest:
type: object
required:
- extension
- database
- version
properties:
extension:
type: string
description: Extension name.
example: "pg_session_jwt"
version:
type: string
description: Version of the extension.
example: "1.0.0"
database:
type: string
description: Database name.
example: "neondb"
ExtensionInstallResult:
type: object
properties:
extension:
description: Name of the extension.
type: string
example: "pg_session_jwt"
version:
description: Version of the extension.
type: string
example: "1.0.0"
SafekeepersLsn:
type: object
required:
- safekeepers
- wal_flush_lsn
properties:
safekeepers:
description: Primary replica safekeepers
type: string
wal_flush_lsn:
description: Primary last WAL flush LSN
type: string
LfcPrewarmState:
type: object
required:
- status
- total
- prewarmed
- skipped
properties:
status:
description: LFC prewarm status
enum: [not_prewarmed, prewarming, completed, failed, skipped]
type: string
error:
description: LFC prewarm error, if any
type: string
total:
description: Total pages processed
type: integer
prewarmed:
description: Total pages prewarmed
type: integer
skipped:
description: Pages processed but not prewarmed
type: integer
LfcOffloadState:
type: object
required:
- status
properties:
status:
description: LFC offload status
enum: [not_offloaded, offloading, completed, failed]
type: string
error:
description: LFC offload error, if any
type: string
PromoteState:
type: object
required:
- status
properties:
status:
description: Promote result
enum: [not_promoted, completed, failed]
type: string
error:
description: Promote error, if any
type: string
SetRoleGrantsRequest:
type: object
required:
- database
- schema
- privileges
- role
properties:
database:
type: string
description: Database name.
example: "neondb"
schema:
type: string
description: Schema name.
example: "public"
privileges:
type: array
items:
type: string
description: List of privileges to set.
example: ["SELECT", "INSERT"]
role:
type: string
description: Role name.
example: "neon"
TerminateResponse:
type: object
required:
- lsn
properties:
lsn:
type: string
nullable: true
description: "last WAL flush LSN"
example: "0/028F10D8"
SetRoleGrantsResponse:
type: object
required:
- database
- schema
- privileges
- role
properties:
database:
type: string
description: Database name.
example: "neondb"
schema:
type: string
description: Schema name.
example: "public"
privileges:
type: array
items:
type: string
description: List of privileges set.
example: ["SELECT", "INSERT"]
role:
type: string
description: Role name.
example: "neon"
#
# Errors
#
GenericError:
type: object
required:
- error
properties:
error:
type: string
security:
- JWT: []