feat: bare-bones /v1/utilization (#6831)

PR adds a simple at most 1Hz refreshed informational API for querying
pageserver utilization. In this first phase, no actual background
calculation is performed. Instead, the worst possible score is always
returned. The returned bytes information is however correct.

Cc: #6835
Cc: #5331
This commit is contained in:
Joonas Koivunen
2024-02-22 13:58:59 +02:00
committed by GitHub
parent b5246753bf
commit bc7a82caf2
8 changed files with 211 additions and 0 deletions

View File

@@ -1379,6 +1379,25 @@ paths:
schema:
$ref: "#/components/schemas/ServiceUnavailableError"
/v1/utilization:
get:
description: |
Returns the pageservers current utilization and fitness score for new tenants.
responses:
"200":
description: Pageserver utilization and fitness score
content:
application/json:
schema:
$ref: "#/components/schemas/PageserverUtilization"
"500":
description: Generic operation error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
components:
securitySchemes:
JWT:
@@ -1691,6 +1710,33 @@ components:
type: string
enum: [past, present, future, nodata]
PageserverUtilization:
type: object
required:
- disk_usage_bytes
- free_space_bytes
- utilization_score
properties:
disk_usage_bytes:
type: integer
format: int64
minimum: 0
description: The amount of disk space currently utilized by layer files.
free_space_bytes:
type: integer
format: int64
minimum: 0
description: The amount of usable disk space left.
utilization_score:
type: integer
format: int64
minimum: 0
maximum: 9223372036854775807
default: 9223372036854775807
description: |
Lower is better score for how good this pageserver would be for the next tenant.
The default or maximum value can be returned in situations when a proper score cannot (yet) be calculated.
Error:
type: object
required:

View File

@@ -100,6 +100,7 @@ pub struct State {
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
deletion_queue_client: DeletionQueueClient,
secondary_controller: SecondaryController,
latest_utilization: tokio::sync::Mutex<Option<(std::time::Instant, bytes::Bytes)>>,
}
impl State {
@@ -128,6 +129,7 @@ impl State {
disk_usage_eviction_state,
deletion_queue_client,
secondary_controller,
latest_utilization: Default::default(),
})
}
}
@@ -1963,6 +1965,54 @@ async fn put_io_engine_handler(
json_response(StatusCode::OK, ())
}
/// Polled by control plane.
///
/// See [`crate::utilization`].
async fn get_utilization(
r: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
// this probably could be completely public, but lets make that change later.
check_permission(&r, None)?;
let state = get_state(&r);
let mut g = state.latest_utilization.lock().await;
let regenerate_every = Duration::from_secs(1);
let still_valid = g
.as_ref()
.is_some_and(|(captured_at, _)| captured_at.elapsed() < regenerate_every);
// avoid needless statvfs calls even though those should be non-blocking fast.
// regenerate at most 1Hz to allow polling at any rate.
if !still_valid {
let path = state.conf.tenants_path();
let doc = crate::utilization::regenerate(path.as_std_path())
.map_err(ApiError::InternalServerError)?;
let mut buf = Vec::new();
serde_json::to_writer(&mut buf, &doc)
.context("serialize")
.map_err(ApiError::InternalServerError)?;
let body = bytes::Bytes::from(buf);
*g = Some((std::time::Instant::now(), body));
}
// hyper 0.14 doesn't yet have Response::clone so this is a bit of extra legwork
let cached = g.as_ref().expect("just set").1.clone();
Response::builder()
.header(hyper::http::header::CONTENT_TYPE, "application/json")
// thought of using http date header, but that is second precision which does not give any
// debugging aid
.status(StatusCode::OK)
.body(hyper::Body::from(cached))
.context("build response")
.map_err(ApiError::InternalServerError)
}
/// Common functionality of all the HTTP API handlers.
///
/// - Adds a tracing span to each request (by `request_span`)
@@ -2224,5 +2274,6 @@ pub fn make_router(
|r| api_handler(r, timeline_collect_keyspace),
)
.put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
.get("/v1/utilization", |r| api_handler(r, get_utilization))
.any(handler_404))
}

View File

@@ -22,6 +22,7 @@ pub(crate) mod statvfs;
pub mod task_mgr;
pub mod tenant;
pub mod trace;
pub mod utilization;
pub mod virtual_file;
pub mod walingest;
pub mod walrecord;

View File

@@ -0,0 +1,38 @@
//! An utilization metric which is used to decide on which pageserver to put next tenant.
//!
//! The metric is exposed via `GET /v1/utilization`. Refer and maintain it's openapi spec as the
//! truth.
use anyhow::Context;
use std::path::Path;
use pageserver_api::models::PageserverUtilization;
pub(crate) fn regenerate(tenants_path: &Path) -> anyhow::Result<PageserverUtilization> {
// TODO: currently the http api ratelimits this to 1Hz at most, which is probably good enough
let statvfs = nix::sys::statvfs::statvfs(tenants_path)
.map_err(std::io::Error::from)
.context("statvfs tenants directory")?;
let blocksz = statvfs.block_size();
#[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
let free = statvfs.blocks_available() as u64 * blocksz;
let used = crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.get();
let captured_at = std::time::SystemTime::now();
let doc = PageserverUtilization {
disk_usage_bytes: used,
free_space_bytes: free,
// lower is better; start with a constant
//
// note that u64::MAX will be output as i64::MAX as u64, but that should not matter
utilization_score: u64::MAX,
captured_at,
};
// TODO: make utilization_score into a metric
Ok(doc)
}