WIP++ v2 limit eviction task concurrency: metric & enum

This commit is contained in:
Christian Schwarz
2023-10-10 17:41:15 +00:00
parent 112008519c
commit c5259dcf32
6 changed files with 83 additions and 14 deletions

View File

@@ -36,7 +36,7 @@ use utils::pid_file::{self, PidFileRead};
// it's waiting. If the process hasn't started/stopped after 5 seconds,
// it prints a notice that it's taking long, but keeps waiting.
//
const RETRY_UNTIL_SECS: u64 = 10;
const RETRY_UNTIL_SECS: u64 = 40;
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
const RETRY_INTERVAL_MILLIS: u64 = 100;
const DOT_EVERY_RETRIES: u64 = 10;

View File

@@ -2,6 +2,7 @@
//! and push them to a HTTP endpoint.
use crate::context::{DownloadBehavior, RequestContext};
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
use crate::tenant::tasks::BackgroundLoopKind;
use crate::tenant::{mgr, LogicalSizeCalculationCause};
use camino::Utf8PathBuf;
use consumption_metrics::EventType;
@@ -143,7 +144,7 @@ pub async fn collect_metrics(
crate::tenant::tasks::warn_when_period_overrun(
tick_at.elapsed(),
metric_collection_interval,
"consumption_metrics_collect_metrics",
BackgroundLoopKind::ConsumptionMetricsCollectMetrics,
);
}
}
@@ -268,6 +269,11 @@ async fn calculate_synthetic_size_worker(
}
if let Ok(tenant) = mgr::get_tenant(tenant_id, true).await {
// TODO should we just use concurrent_background_tasks_rate_limit().
// We can put in some prioritization for consumption metrics.
// Same for the loop that fetches computed metrics.
// By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
// which turns out is really handy to understand the system.
if let Err(e) = tenant.calculate_synthetic_size(cause, ctx).await {
error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
}
@@ -277,7 +283,7 @@ async fn calculate_synthetic_size_worker(
crate::tenant::tasks::warn_when_period_overrun(
tick_at.elapsed(),
synthetic_size_calculation_interval,
"consumption_metrics_synthetic_size_worker",
BackgroundLoopKind::ConsumptionMetricsSyntheticSizeWorker,
);
}
}

View File

@@ -1060,6 +1060,26 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
.expect("Failed to register tenant_task_events metric")
});
pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT: Lazy<IntCounterVec> =
Lazy::new(|| {
register_int_counter_vec!(
"pageserver_background_loop_semaphore_wait_start_count",
"Counter for background loop concurrency-limiting semaphore acquire calls started",
&["task"],
)
.unwrap()
});
pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT: Lazy<IntCounterVec> =
Lazy::new(|| {
register_int_counter_vec!(
"pageserver_background_loop_semaphore_wait_finish_count",
"Counter for background loop concurrency-limiting semaphore acquire calls finished",
&["task"],
)
.unwrap()
});
pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_background_loop_period_overrun_count",

View File

@@ -36,15 +36,38 @@ static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore
tokio::sync::Semaphore::new(permits)
});
#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr)]
#[strum(serialize_all = "snake_case")]
pub(crate) enum BackgroundLoopKind {
Compaction,
Gc,
Eviction,
ConsumptionMetricsCollectMetrics,
ConsumptionMetricsSyntheticSizeWorker,
}
impl BackgroundLoopKind {
fn as_static_str(&self) -> &'static str {
let s: &'static str = self.into();
s
}
}
pub(crate) enum RateLimitError {
Cancelled,
}
pub(crate) async fn concurrent_background_tasks_rate_limit(
loop_kind: BackgroundLoopKind,
_ctx: &RequestContext,
cancel: &CancellationToken,
) -> Result<impl Drop, RateLimitError> {
// TODO: use request context TaskKind to get statistics on how many tasks of what kind are waiting for background task semaphore
crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT
.with_label_values(&[loop_kind.as_static_str()])
.inc();
scopeguard::defer!(
crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT.with_label_values(&[loop_kind.as_static_str()]).inc();
);
tokio::select! {
permit = CONCURRENT_BACKGROUND_TASKS.acquire() => {
match permit {
@@ -160,7 +183,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
}
};
warn_when_period_overrun(started_at.elapsed(), period, "compaction");
warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Compaction);
// Sleep
if tokio::time::timeout(sleep_duration, cancel.cancelled())
@@ -228,7 +251,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
}
};
warn_when_period_overrun(started_at.elapsed(), period, "gc");
warn_when_period_overrun(started_at.elapsed(), period, BackgroundLoopKind::Gc);
// Sleep
if tokio::time::timeout(sleep_duration, cancel.cancelled())
@@ -302,7 +325,11 @@ pub(crate) async fn random_init_delay(
}
/// Attention: the `task` and `period` beocme labels of a pageserver-wide prometheus metric.
pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task: &str) {
pub(crate) fn warn_when_period_overrun(
elapsed: Duration,
period: Duration,
task: BackgroundLoopKind,
) {
// Duration::ZERO will happen because it's the "disable [bgtask]" value.
if elapsed >= period && period != Duration::ZERO {
// humantime does no significant digits clamping whereas Duration's debug is a bit more
@@ -311,11 +338,11 @@ pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task
warn!(
?elapsed,
period = %humantime::format_duration(period),
task,
?task,
"task iteration took longer than the configured period"
);
crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
.with_label_values(&[task, &format!("{}", period.as_secs())])
.with_label_values(&[task.as_static_str(), &format!("{}", period.as_secs())])
.inc();
}
}

View File

@@ -44,7 +44,7 @@ use crate::tenant::storage_layer::delta_layer::DeltaEntry;
use crate::tenant::storage_layer::{
DeltaLayerWriter, ImageLayerWriter, InMemoryLayer, LayerAccessStats, LayerFileName, RemoteLayer,
};
use crate::tenant::tasks::RateLimitError;
use crate::tenant::tasks::{BackgroundLoopKind, RateLimitError};
use crate::tenant::timeline::logical_size::CurrentLogicalSize;
use crate::tenant::{
layer_map::{LayerMap, SearchResult},
@@ -687,7 +687,12 @@ impl Timeline {
// this wait probably never needs any "long time spent" logging, because we already nag if
// compaction task goes over it's period (20s) which is quite often in production.
let _permit = match super::tasks::concurrent_background_tasks_rate_limit(ctx, cancel).await
let _permit = match super::tasks::concurrent_background_tasks_rate_limit(
BackgroundLoopKind::Compaction,
ctx,
cancel,
)
.await
{
Ok(permit) => permit,
Err(RateLimitError::Cancelled) => return Ok(()),

View File

@@ -30,8 +30,9 @@ use crate::{
tenant::{
config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
storage_layer::PersistentLayer,
tasks::{BackgroundLoopKind, RateLimitError},
timeline::EvictionError,
LogicalSizeCalculationCause, Tenant, tasks::RateLimitError,
LogicalSizeCalculationCause, Tenant,
},
};
@@ -129,7 +130,11 @@ impl Timeline {
ControlFlow::Continue(()) => (),
}
let elapsed = start.elapsed();
crate::tenant::tasks::warn_when_period_overrun(elapsed, p.period, "eviction");
crate::tenant::tasks::warn_when_period_overrun(
elapsed,
p.period,
BackgroundLoopKind::Eviction,
);
crate::metrics::EVICTION_ITERATION_DURATION
.get_metric_with_label_values(&[
&format!("{}", p.period.as_secs()),
@@ -150,7 +155,13 @@ impl Timeline {
) -> ControlFlow<()> {
let now = SystemTime::now();
let _permit = match crate::tenant::tasks::concurrent_background_tasks_rate_limit(ctx, cancel).await {
let _permit = match crate::tenant::tasks::concurrent_background_tasks_rate_limit(
BackgroundLoopKind::Eviction,
ctx,
cancel,
)
.await
{
Ok(permit) => permit,
Err(RateLimitError::Cancelled) => return ControlFlow::Break(()),
};