diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 557a4d7de9..af3c8018c4 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -291,6 +291,7 @@ pub struct TenantConfig { pub enum EvictionPolicy { NoEviction, LayerAccessThreshold(EvictionPolicyLayerAccessThreshold), + OnlyImitiate(EvictionPolicyLayerAccessThreshold), } impl EvictionPolicy { @@ -298,6 +299,7 @@ impl EvictionPolicy { match self { EvictionPolicy::NoEviction => "NoEviction", EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold", + EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate", } } } diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 6c00c55f39..34d9636673 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -1572,17 +1572,50 @@ threshold = "20m" eviction_order: crate::disk_usage_eviction_task::EvictionOrder::AbsoluteAccessed, }) ); + match &conf.default_tenant_conf.eviction_policy { - EvictionPolicy::NoEviction => panic!("Unexpected eviction opolicy tenant settings"), - EvictionPolicy::LayerAccessThreshold(eviction_thresold) => { - assert_eq!(eviction_thresold.period, Duration::from_secs(20 * 60)); - assert_eq!(eviction_thresold.threshold, Duration::from_secs(20 * 60)); + EvictionPolicy::LayerAccessThreshold(eviction_threshold) => { + assert_eq!(eviction_threshold.period, Duration::from_secs(20 * 60)); + assert_eq!(eviction_threshold.threshold, Duration::from_secs(20 * 60)); } + other => unreachable!("Unexpected eviction policy tenant settings: {other:?}"), } Ok(()) } + #[test] + fn parse_imitation_only_pageserver_config() { + let tempdir = tempdir().unwrap(); + let (workdir, pg_distrib_dir) = prepare_fs(&tempdir).unwrap(); + + let pageserver_conf_toml = format!( + r#"pg_distrib_dir = "{pg_distrib_dir}" +metric_collection_endpoint = "http://sample.url" +metric_collection_interval = "10min" +id = 222 + +[tenant_config] +evictions_low_residence_duration_metric_threshold = "20m" + +[tenant_config.eviction_policy] +kind = "OnlyImitiate" +period = "20m" +threshold = "20m" +"#, + ); + let toml: Document = pageserver_conf_toml.parse().unwrap(); + let conf = PageServerConf::parse_and_validate(&toml, &workdir).unwrap(); + + match &conf.default_tenant_conf.eviction_policy { + EvictionPolicy::OnlyImitiate(t) => { + assert_eq!(t.period, Duration::from_secs(20 * 60)); + assert_eq!(t.threshold, Duration::from_secs(20 * 60)); + } + other => unreachable!("Unexpected eviction policy tenant settings: {other:?}"), + } + } + fn prepare_fs(tempdir: &Utf8TempDir) -> anyhow::Result<(Utf8PathBuf, Utf8PathBuf)> { let tempdir_path = tempdir.path(); diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 33ba234a63..127e351c14 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -85,6 +85,7 @@ impl Timeline { let policy = self.get_eviction_policy(); let period = match policy { EvictionPolicy::LayerAccessThreshold(lat) => lat.period, + EvictionPolicy::OnlyImitiate(lat) => lat.period, EvictionPolicy::NoEviction => Duration::from_secs(10), }; if random_init_delay(period, &cancel).await.is_err() { @@ -119,33 +120,45 @@ impl Timeline { ctx: &RequestContext, ) -> ControlFlow<(), Instant> { debug!("eviction iteration: {policy:?}"); - match policy { + let start = Instant::now(); + let (period, threshold) = match policy { EvictionPolicy::NoEviction => { // check again in 10 seconds; XXX config watch mechanism - ControlFlow::Continue(Instant::now() + Duration::from_secs(10)) + return ControlFlow::Continue(Instant::now() + Duration::from_secs(10)); } EvictionPolicy::LayerAccessThreshold(p) => { - let start = Instant::now(); match self.eviction_iteration_threshold(p, cancel, ctx).await { ControlFlow::Break(()) => return ControlFlow::Break(()), ControlFlow::Continue(()) => (), } - let elapsed = start.elapsed(); - crate::tenant::tasks::warn_when_period_overrun( - elapsed, - p.period, - BackgroundLoopKind::Eviction, - ); - crate::metrics::EVICTION_ITERATION_DURATION - .get_metric_with_label_values(&[ - &format!("{}", p.period.as_secs()), - &format!("{}", p.threshold.as_secs()), - ]) - .unwrap() - .observe(elapsed.as_secs_f64()); - ControlFlow::Continue(start + p.period) + (p.period, p.threshold) } - } + EvictionPolicy::OnlyImitiate(p) => { + if self.imitiate_only(p, cancel, ctx).await.is_break() { + return ControlFlow::Break(()); + } + (p.period, p.threshold) + } + }; + + let elapsed = start.elapsed(); + crate::tenant::tasks::warn_when_period_overrun( + elapsed, + period, + BackgroundLoopKind::Eviction, + ); + // FIXME: if we were to mix policies on a pageserver, we would have no way to sense this. I + // don't think that is a relevant fear however, and regardless the imitation should be the + // most costly part. + crate::metrics::EVICTION_ITERATION_DURATION + .get_metric_with_label_values(&[ + &format!("{}", period.as_secs()), + &format!("{}", threshold.as_secs()), + ]) + .unwrap() + .observe(elapsed.as_secs_f64()); + + ControlFlow::Continue(start + period) } async fn eviction_iteration_threshold( @@ -167,30 +180,6 @@ impl Timeline { _ = self.cancel.cancelled() => return ControlFlow::Break(()), }; - // If we evict layers but keep cached values derived from those layers, then - // we face a storm of on-demand downloads after pageserver restart. - // The reason is that the restart empties the caches, and so, the values - // need to be re-computed by accessing layers, which we evicted while the - // caches were filled. - // - // Solutions here would be one of the following: - // 1. Have a persistent cache. - // 2. Count every access to a cached value to the access stats of all layers - // that were accessed to compute the value in the first place. - // 3. Invalidate the caches at a period of < p.threshold/2, so that the values - // get re-computed from layers, thereby counting towards layer access stats. - // 4. Make the eviction task imitate the layer accesses that typically hit caches. - // - // We follow approach (4) here because in Neon prod deployment: - // - page cache is quite small => high churn => low hit rate - // => eviction gets correct access stats - // - value-level caches such as logical size & repatition have a high hit rate, - // especially for inactive tenants - // => eviction sees zero accesses for these - // => they cause the on-demand download storm on pageserver restart - // - // We should probably move to persistent caches in the future, or avoid - // having inactive tenants attached to pageserver in the first place. match self.imitate_layer_accesses(p, cancel, ctx).await { ControlFlow::Break(()) => return ControlFlow::Break(()), ControlFlow::Continue(()) => (), @@ -307,6 +296,52 @@ impl Timeline { ControlFlow::Continue(()) } + /// Like `eviction_iteration_threshold`, but without any eviction. Eviction will be done by + /// disk usage based eviction task. + async fn imitiate_only( + self: &Arc, + p: &EvictionPolicyLayerAccessThreshold, + cancel: &CancellationToken, + ctx: &RequestContext, + ) -> ControlFlow<()> { + let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit( + BackgroundLoopKind::Eviction, + ctx, + ); + + let _permit = tokio::select! { + permit = acquire_permit => permit, + _ = cancel.cancelled() => return ControlFlow::Break(()), + _ = self.cancel.cancelled() => return ControlFlow::Break(()), + }; + + self.imitate_layer_accesses(p, cancel, ctx).await + } + + /// If we evict layers but keep cached values derived from those layers, then + /// we face a storm of on-demand downloads after pageserver restart. + /// The reason is that the restart empties the caches, and so, the values + /// need to be re-computed by accessing layers, which we evicted while the + /// caches were filled. + /// + /// Solutions here would be one of the following: + /// 1. Have a persistent cache. + /// 2. Count every access to a cached value to the access stats of all layers + /// that were accessed to compute the value in the first place. + /// 3. Invalidate the caches at a period of < p.threshold/2, so that the values + /// get re-computed from layers, thereby counting towards layer access stats. + /// 4. Make the eviction task imitate the layer accesses that typically hit caches. + /// + /// We follow approach (4) here because in Neon prod deployment: + /// - page cache is quite small => high churn => low hit rate + /// => eviction gets correct access stats + /// - value-level caches such as logical size & repatition have a high hit rate, + /// especially for inactive tenants + /// => eviction sees zero accesses for these + /// => they cause the on-demand download storm on pageserver restart + /// + /// We should probably move to persistent caches in the future, or avoid + /// having inactive tenants attached to pageserver in the first place. #[instrument(skip_all)] async fn imitate_layer_accesses( &self,