feat: imitiation_only eviction_task policy (#6598)

mostly reusing the existing and perhaps controversially sharing the
histogram. in practice we don't configure this per-tenant.

Cc: #5331
This commit is contained in:
Joonas Koivunen
2024-02-21 16:57:30 +02:00
committed by GitHub
parent 84f027357d
commit 7257ffbf75
3 changed files with 116 additions and 46 deletions

View File

@@ -291,6 +291,7 @@ pub struct TenantConfig {
pub enum EvictionPolicy {
NoEviction,
LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
OnlyImitiate(EvictionPolicyLayerAccessThreshold),
}
impl EvictionPolicy {
@@ -298,6 +299,7 @@ impl EvictionPolicy {
match self {
EvictionPolicy::NoEviction => "NoEviction",
EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate",
}
}
}

View File

@@ -1572,17 +1572,50 @@ threshold = "20m"
eviction_order: crate::disk_usage_eviction_task::EvictionOrder::AbsoluteAccessed,
})
);
match &conf.default_tenant_conf.eviction_policy {
EvictionPolicy::NoEviction => panic!("Unexpected eviction opolicy tenant settings"),
EvictionPolicy::LayerAccessThreshold(eviction_thresold) => {
assert_eq!(eviction_thresold.period, Duration::from_secs(20 * 60));
assert_eq!(eviction_thresold.threshold, Duration::from_secs(20 * 60));
EvictionPolicy::LayerAccessThreshold(eviction_threshold) => {
assert_eq!(eviction_threshold.period, Duration::from_secs(20 * 60));
assert_eq!(eviction_threshold.threshold, Duration::from_secs(20 * 60));
}
other => unreachable!("Unexpected eviction policy tenant settings: {other:?}"),
}
Ok(())
}
#[test]
fn parse_imitation_only_pageserver_config() {
let tempdir = tempdir().unwrap();
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir).unwrap();
let pageserver_conf_toml = format!(
r#"pg_distrib_dir = "{pg_distrib_dir}"
metric_collection_endpoint = "http://sample.url"
metric_collection_interval = "10min"
id = 222
[tenant_config]
evictions_low_residence_duration_metric_threshold = "20m"
[tenant_config.eviction_policy]
kind = "OnlyImitiate"
period = "20m"
threshold = "20m"
"#,
);
let toml: Document = pageserver_conf_toml.parse().unwrap();
let conf = PageServerConf::parse_and_validate(&toml, &workdir).unwrap();
match &conf.default_tenant_conf.eviction_policy {
EvictionPolicy::OnlyImitiate(t) => {
assert_eq!(t.period, Duration::from_secs(20 * 60));
assert_eq!(t.threshold, Duration::from_secs(20 * 60));
}
other => unreachable!("Unexpected eviction policy tenant settings: {other:?}"),
}
}
fn prepare_fs(tempdir: &Utf8TempDir) -> anyhow::Result<(Utf8PathBuf, Utf8PathBuf)> {
let tempdir_path = tempdir.path();

View File

@@ -85,6 +85,7 @@ impl Timeline {
let policy = self.get_eviction_policy();
let period = match policy {
EvictionPolicy::LayerAccessThreshold(lat) => lat.period,
EvictionPolicy::OnlyImitiate(lat) => lat.period,
EvictionPolicy::NoEviction => Duration::from_secs(10),
};
if random_init_delay(period, &cancel).await.is_err() {
@@ -119,33 +120,45 @@ impl Timeline {
ctx: &RequestContext,
) -> ControlFlow<(), Instant> {
debug!("eviction iteration: {policy:?}");
match policy {
let start = Instant::now();
let (period, threshold) = match policy {
EvictionPolicy::NoEviction => {
// check again in 10 seconds; XXX config watch mechanism
ControlFlow::Continue(Instant::now() + Duration::from_secs(10))
return ControlFlow::Continue(Instant::now() + Duration::from_secs(10));
}
EvictionPolicy::LayerAccessThreshold(p) => {
let start = Instant::now();
match self.eviction_iteration_threshold(p, cancel, ctx).await {
ControlFlow::Break(()) => return ControlFlow::Break(()),
ControlFlow::Continue(()) => (),
}
let elapsed = start.elapsed();
crate::tenant::tasks::warn_when_period_overrun(
elapsed,
p.period,
BackgroundLoopKind::Eviction,
);
crate::metrics::EVICTION_ITERATION_DURATION
.get_metric_with_label_values(&[
&format!("{}", p.period.as_secs()),
&format!("{}", p.threshold.as_secs()),
])
.unwrap()
.observe(elapsed.as_secs_f64());
ControlFlow::Continue(start + p.period)
(p.period, p.threshold)
}
}
EvictionPolicy::OnlyImitiate(p) => {
if self.imitiate_only(p, cancel, ctx).await.is_break() {
return ControlFlow::Break(());
}
(p.period, p.threshold)
}
};
let elapsed = start.elapsed();
crate::tenant::tasks::warn_when_period_overrun(
elapsed,
period,
BackgroundLoopKind::Eviction,
);
// FIXME: if we were to mix policies on a pageserver, we would have no way to sense this. I
// don't think that is a relevant fear however, and regardless the imitation should be the
// most costly part.
crate::metrics::EVICTION_ITERATION_DURATION
.get_metric_with_label_values(&[
&format!("{}", period.as_secs()),
&format!("{}", threshold.as_secs()),
])
.unwrap()
.observe(elapsed.as_secs_f64());
ControlFlow::Continue(start + period)
}
async fn eviction_iteration_threshold(
@@ -167,30 +180,6 @@ impl Timeline {
_ = self.cancel.cancelled() => return ControlFlow::Break(()),
};
// If we evict layers but keep cached values derived from those layers, then
// we face a storm of on-demand downloads after pageserver restart.
// The reason is that the restart empties the caches, and so, the values
// need to be re-computed by accessing layers, which we evicted while the
// caches were filled.
//
// Solutions here would be one of the following:
// 1. Have a persistent cache.
// 2. Count every access to a cached value to the access stats of all layers
// that were accessed to compute the value in the first place.
// 3. Invalidate the caches at a period of < p.threshold/2, so that the values
// get re-computed from layers, thereby counting towards layer access stats.
// 4. Make the eviction task imitate the layer accesses that typically hit caches.
//
// We follow approach (4) here because in Neon prod deployment:
// - page cache is quite small => high churn => low hit rate
// => eviction gets correct access stats
// - value-level caches such as logical size & repatition have a high hit rate,
// especially for inactive tenants
// => eviction sees zero accesses for these
// => they cause the on-demand download storm on pageserver restart
//
// We should probably move to persistent caches in the future, or avoid
// having inactive tenants attached to pageserver in the first place.
match self.imitate_layer_accesses(p, cancel, ctx).await {
ControlFlow::Break(()) => return ControlFlow::Break(()),
ControlFlow::Continue(()) => (),
@@ -307,6 +296,52 @@ impl Timeline {
ControlFlow::Continue(())
}
/// Like `eviction_iteration_threshold`, but without any eviction. Eviction will be done by
/// disk usage based eviction task.
async fn imitiate_only(
self: &Arc<Self>,
p: &EvictionPolicyLayerAccessThreshold,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> ControlFlow<()> {
let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
BackgroundLoopKind::Eviction,
ctx,
);
let _permit = tokio::select! {
permit = acquire_permit => permit,
_ = cancel.cancelled() => return ControlFlow::Break(()),
_ = self.cancel.cancelled() => return ControlFlow::Break(()),
};
self.imitate_layer_accesses(p, cancel, ctx).await
}
/// If we evict layers but keep cached values derived from those layers, then
/// we face a storm of on-demand downloads after pageserver restart.
/// The reason is that the restart empties the caches, and so, the values
/// need to be re-computed by accessing layers, which we evicted while the
/// caches were filled.
///
/// Solutions here would be one of the following:
/// 1. Have a persistent cache.
/// 2. Count every access to a cached value to the access stats of all layers
/// that were accessed to compute the value in the first place.
/// 3. Invalidate the caches at a period of < p.threshold/2, so that the values
/// get re-computed from layers, thereby counting towards layer access stats.
/// 4. Make the eviction task imitate the layer accesses that typically hit caches.
///
/// We follow approach (4) here because in Neon prod deployment:
/// - page cache is quite small => high churn => low hit rate
/// => eviction gets correct access stats
/// - value-level caches such as logical size & repatition have a high hit rate,
/// especially for inactive tenants
/// => eviction sees zero accesses for these
/// => they cause the on-demand download storm on pageserver restart
///
/// We should probably move to persistent caches in the future, or avoid
/// having inactive tenants attached to pageserver in the first place.
#[instrument(skip_all)]
async fn imitate_layer_accesses(
&self,