feat: bare-bones /v1/utilization (#6831)

PR adds a simple at most 1Hz refreshed informational API for querying
pageserver utilization. In this first phase, no actual background
calculation is performed. Instead, the worst possible score is always
returned. The returned bytes information is however correct.

Cc: #6835
Cc: #5331
This commit is contained in:
Joonas Koivunen
2024-02-22 13:58:59 +02:00
committed by GitHub
parent b5246753bf
commit bc7a82caf2
8 changed files with 211 additions and 0 deletions

View File

@@ -1379,6 +1379,25 @@ paths:
schema:
$ref: "#/components/schemas/ServiceUnavailableError"
/v1/utilization:
get:
description: |
Returns the pageservers current utilization and fitness score for new tenants.
responses:
"200":
description: Pageserver utilization and fitness score
content:
application/json:
schema:
$ref: "#/components/schemas/PageserverUtilization"
"500":
description: Generic operation error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
components:
securitySchemes:
JWT:
@@ -1691,6 +1710,33 @@ components:
type: string
enum: [past, present, future, nodata]
PageserverUtilization:
type: object
required:
- disk_usage_bytes
- free_space_bytes
- utilization_score
properties:
disk_usage_bytes:
type: integer
format: int64
minimum: 0
description: The amount of disk space currently utilized by layer files.
free_space_bytes:
type: integer
format: int64
minimum: 0
description: The amount of usable disk space left.
utilization_score:
type: integer
format: int64
minimum: 0
maximum: 9223372036854775807
default: 9223372036854775807
description: |
Lower is better score for how good this pageserver would be for the next tenant.
The default or maximum value can be returned in situations when a proper score cannot (yet) be calculated.
Error:
type: object
required:

View File

@@ -100,6 +100,7 @@ pub struct State {
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
deletion_queue_client: DeletionQueueClient,
secondary_controller: SecondaryController,
latest_utilization: tokio::sync::Mutex<Option<(std::time::Instant, bytes::Bytes)>>,
}
impl State {
@@ -128,6 +129,7 @@ impl State {
disk_usage_eviction_state,
deletion_queue_client,
secondary_controller,
latest_utilization: Default::default(),
})
}
}
@@ -1963,6 +1965,54 @@ async fn put_io_engine_handler(
json_response(StatusCode::OK, ())
}
/// Polled by control plane.
///
/// See [`crate::utilization`].
async fn get_utilization(
r: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
// this probably could be completely public, but lets make that change later.
check_permission(&r, None)?;
let state = get_state(&r);
let mut g = state.latest_utilization.lock().await;
let regenerate_every = Duration::from_secs(1);
let still_valid = g
.as_ref()
.is_some_and(|(captured_at, _)| captured_at.elapsed() < regenerate_every);
// avoid needless statvfs calls even though those should be non-blocking fast.
// regenerate at most 1Hz to allow polling at any rate.
if !still_valid {
let path = state.conf.tenants_path();
let doc = crate::utilization::regenerate(path.as_std_path())
.map_err(ApiError::InternalServerError)?;
let mut buf = Vec::new();
serde_json::to_writer(&mut buf, &doc)
.context("serialize")
.map_err(ApiError::InternalServerError)?;
let body = bytes::Bytes::from(buf);
*g = Some((std::time::Instant::now(), body));
}
// hyper 0.14 doesn't yet have Response::clone so this is a bit of extra legwork
let cached = g.as_ref().expect("just set").1.clone();
Response::builder()
.header(hyper::http::header::CONTENT_TYPE, "application/json")
// thought of using http date header, but that is second precision which does not give any
// debugging aid
.status(StatusCode::OK)
.body(hyper::Body::from(cached))
.context("build response")
.map_err(ApiError::InternalServerError)
}
/// Common functionality of all the HTTP API handlers.
///
/// - Adds a tracing span to each request (by `request_span`)
@@ -2224,5 +2274,6 @@ pub fn make_router(
|r| api_handler(r, timeline_collect_keyspace),
)
.put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
.get("/v1/utilization", |r| api_handler(r, get_utilization))
.any(handler_404))
}