Merge branch 'problame/disk-usage-eviction' into heikki/disk-usage-eviction

This commit is contained in:
Christian Schwarz
2023-03-28 15:24:52 +02:00
6 changed files with 67 additions and 12 deletions

View File

@@ -8,6 +8,14 @@ storage:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
# TODO: learn typical resident-size growth rate [GiB/minute] and configure
# min_avail_bytes such that we have X minutes of headroom.
min_avail_bytes: 0
# We assume that the worst-case growth rate is small enough that we can
# catch above-threshold conditions by checking every 10s.
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"

View File

@@ -8,6 +8,14 @@ storage:
pg_distrib_dir: /usr/local
metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
metric_collection_interval: 10min
disk_usage_based_eviction:
max_usage_pct: 80
# TODO: learn typical resident-size growth rate [GiB/minute] and configure
# min_avail_bytes such that we have X minutes of headroom.
min_avail_bytes: 0
# We assume that the worst-case growth rate is small enough that we can
# catch above-threshold conditions by checking every 10s.
period: "10s"
tenant_config:
eviction_policy:
kind: "LayerAccessThreshold"

View File

@@ -1,4 +1,4 @@
//! A serde::Desierialize type for percentages.
//! A serde::Deserialize type for percentages.
//!
//! See [`Percent`] for details.

View File

@@ -8,7 +8,7 @@ use anyhow::{anyhow, Context};
use clap::{Arg, ArgAction, Command};
use fail::FailScenario;
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
use pageserver::disk_usage_eviction_task::launch_disk_usage_global_eviction_task;
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
use remote_storage::GenericRemoteStorage;
use tracing::*;
@@ -320,8 +320,18 @@ fn start_pageserver(
// Scan the local 'tenants/' directory and start loading the tenants
BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(conf, remote_storage.clone()))?;
// shared state between the disk-usage backed eviction background task and the http endpoint
// that allows triggering disk-usage based eviction manually. note that the http endpoint
// is still accessible even if background task is not configured as long as remote storage has
// been configured.
let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();
if let Some(remote_storage) = &remote_storage {
launch_disk_usage_global_eviction_task(conf, remote_storage.clone())?;
launch_disk_usage_global_eviction_task(
conf,
remote_storage.clone(),
disk_usage_eviction_state.clone(),
)?;
}
// Start up the service to handle HTTP mgmt API request. We created the
@@ -329,9 +339,15 @@ fn start_pageserver(
{
let _rt_guard = MGMT_REQUEST_RUNTIME.enter();
let router = http::make_router(conf, launch_ts, http_auth, remote_storage)?
.build()
.map_err(|err| anyhow!(err))?;
let router = http::make_router(
conf,
launch_ts,
http_auth,
remote_storage,
disk_usage_eviction_state,
)?
.build()
.map_err(|err| anyhow!(err))?;
let service = utils::http::RouterService::new(router).unwrap();
let server = hyper::Server::from_tcp(http_listener)?
.serve(service)

View File

@@ -1,5 +1,7 @@
//! This module implements the pageserver-global disk-usage-based layer eviction task.
//!
//! # Mechanics
//!
//! Function `launch_disk_usage_global_eviction_task` starts a pageserver-global background
//! loop that evicts layers in response to a shortage of available bytes
//! in the $repo/tenants directory's filesystem.
@@ -13,6 +15,8 @@
//! We're good if that second statvfs shows that we're _actually_ below the configured thresholds.
//! If we're still above one or more thresholds, we emit a warning log message, leaving it to the operator to investigate further.
//!
//! # Eviction Policy
//!
//! There are two thresholds:
//! `max_usage_pct` is the relative available space, expressed in percent of the total filesystem space.
//! If the actual usage is higher, the threshold is exceeded.
@@ -69,9 +73,16 @@ pub struct DiskUsageEvictionTaskConfig {
pub period: Duration,
}
#[derive(Default)]
pub struct State {
/// Exclude http requests and background task from running at the same time.
mutex: tokio::sync::Mutex<()>,
}
pub fn launch_disk_usage_global_eviction_task(
conf: &'static PageServerConf,
storage: GenericRemoteStorage,
state: Arc<State>,
) -> anyhow::Result<()> {
let Some(task_config) = &conf.disk_usage_based_eviction else {
info!("disk usage based eviction task not configured");
@@ -99,6 +110,7 @@ pub fn launch_disk_usage_global_eviction_task(
false,
async move {
disk_usage_eviction_task(
&state,
task_config,
storage,
tenants_dir_fd,
@@ -115,6 +127,7 @@ pub fn launch_disk_usage_global_eviction_task(
#[instrument(skip_all)]
async fn disk_usage_eviction_task(
state: &State,
task_config: &DiskUsageEvictionTaskConfig,
storage: GenericRemoteStorage,
tenants_dir_fd: Dir,
@@ -146,6 +159,7 @@ async fn disk_usage_eviction_task(
async {
let res = disk_usage_eviction_task_iteration(
state,
task_config,
&storage,
&mut tenants_dir_fd,
@@ -181,6 +195,7 @@ pub trait Usage: Clone + Copy + std::fmt::Debug {
}
async fn disk_usage_eviction_task_iteration(
state: &State,
task_config: &DiskUsageEvictionTaskConfig,
storage: &GenericRemoteStorage,
tenants_dir_fd: &mut SyncWrapper<Dir>,
@@ -188,7 +203,7 @@ async fn disk_usage_eviction_task_iteration(
) -> anyhow::Result<()> {
let usage_pre = filesystem_level_usage::get(tenants_dir_fd, task_config)
.context("get filesystem-level disk usage before evictions")?;
let res = disk_usage_eviction_task_iteration_impl(storage, usage_pre, cancel).await;
let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
match res {
Ok(outcome) => {
debug!(?outcome, "disk_usage_eviction_iteration finished");
@@ -272,14 +287,13 @@ struct LayerCount {
#[allow(clippy::needless_late_init)]
pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
state: &State,
storage: &GenericRemoteStorage,
usage_pre: U,
cancel: &CancellationToken,
) -> anyhow::Result<IterationOutcome<U>> {
static MUTEX: once_cell::sync::Lazy<tokio::sync::Mutex<()>> =
once_cell::sync::Lazy::new(|| tokio::sync::Mutex::new(()));
let _g = MUTEX
let _g = state
.mutex
.try_lock()
.map_err(|_| anyhow::anyhow!("iteration is already executing"))?;

View File

@@ -18,6 +18,7 @@ use super::models::{
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
};
use crate::context::{DownloadBehavior, RequestContext};
use crate::disk_usage_eviction_task;
use crate::pgdatadir_mapping::LsnForTimestamp;
use crate::task_mgr::TaskKind;
use crate::tenant::config::TenantConfOpt;
@@ -48,6 +49,7 @@ struct State {
auth: Option<Arc<JwtAuth>>,
allowlist_routes: Vec<Uri>,
remote_storage: Option<GenericRemoteStorage>,
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
}
impl State {
@@ -55,6 +57,7 @@ impl State {
conf: &'static PageServerConf,
auth: Option<Arc<JwtAuth>>,
remote_storage: Option<GenericRemoteStorage>,
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
) -> anyhow::Result<Self> {
let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
.iter()
@@ -65,6 +68,7 @@ impl State {
auth,
allowlist_routes,
remote_storage,
disk_usage_eviction_state,
})
}
}
@@ -1124,6 +1128,8 @@ async fn disk_usage_eviction_run(mut r: Request<Body>) -> Result<Response<Body>,
)))
};
let state = state.disk_usage_eviction_state.clone();
let cancel = CancellationToken::new();
let child_cancel = cancel.clone();
let _g = cancel.drop_guard();
@@ -1137,6 +1143,7 @@ async fn disk_usage_eviction_run(mut r: Request<Body>) -> Result<Response<Body>,
false,
async move {
let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
&state,
&storage,
usage,
&child_cancel,
@@ -1168,6 +1175,7 @@ pub fn make_router(
launch_ts: &'static LaunchTimestamp,
auth: Option<Arc<JwtAuth>>,
remote_storage: Option<GenericRemoteStorage>,
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
let spec = include_bytes!("openapi_spec.yml");
let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
@@ -1212,7 +1220,8 @@ pub fn make_router(
Ok(router
.data(Arc::new(
State::new(conf, auth, remote_storage).context("Failed to initialize router state")?,
State::new(conf, auth, remote_storage, disk_usage_eviction_state)
.context("Failed to initialize router state")?,
))
.get("/v1/status", |r| RequestSpan(status_handler).handle(r))
.put(