diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 5909477586..a52fcb4a3f 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -391,11 +391,6 @@ impl PageServerNode { evictions_low_residence_duration_metric_threshold: settings .remove("evictions_low_residence_duration_metric_threshold") .map(|x| x.to_string()), - gc_feedback: settings - .remove("gc_feedback") - .map(|x| x.parse::()) - .transpose() - .context("Failed to parse 'gc_feedback' as bool")?, heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()), lazy_slru_download: settings .remove("lazy_slru_download") @@ -501,11 +496,6 @@ impl PageServerNode { evictions_low_residence_duration_metric_threshold: settings .remove("evictions_low_residence_duration_metric_threshold") .map(|x| x.to_string()), - gc_feedback: settings - .remove("gc_feedback") - .map(|x| x.parse::()) - .transpose() - .context("Failed to parse 'gc_feedback' as bool")?, heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()), lazy_slru_download: settings .remove("lazy_slru_download") diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index aa1a8ae487..ce9afd65ac 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -283,7 +283,6 @@ pub struct TenantConfig { pub eviction_policy: Option, pub min_resident_size_override: Option, pub evictions_low_residence_duration_metric_threshold: Option, - pub gc_feedback: Option, pub heatmap_period: Option, pub lazy_slru_download: Option, pub timeline_get_throttle: Option, diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 3b7672fa26..b0d828d066 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -140,7 +140,6 @@ pub mod defaults { #min_resident_size_override = .. # in bytes #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}' -#gc_feedback = false #heatmap_upload_concurrency = {DEFAULT_HEATMAP_UPLOAD_CONCURRENCY} #secondary_download_concurrency = {DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY} diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 6389d52014..c97f24c0fc 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3641,7 +3641,6 @@ pub(crate) mod harness { evictions_low_residence_duration_metric_threshold: Some( tenant_conf.evictions_low_residence_duration_metric_threshold, ), - gc_feedback: Some(tenant_conf.gc_feedback), heatmap_period: Some(tenant_conf.heatmap_period), lazy_slru_download: Some(tenant_conf.lazy_slru_download), timeline_get_throttle: Some(tenant_conf.timeline_get_throttle), diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 5c88d30caf..cce30e900e 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -339,7 +339,6 @@ pub struct TenantConf { // See the corresponding metric's help string. #[serde(with = "humantime_serde")] pub evictions_low_residence_duration_metric_threshold: Duration, - pub gc_feedback: bool, /// If non-zero, the period between uploads of a heatmap from attached tenants. This /// may be disabled if a Tenant will not have secondary locations: only secondary @@ -427,10 +426,6 @@ pub struct TenantConfOpt { #[serde(default)] pub evictions_low_residence_duration_metric_threshold: Option, - #[serde(skip_serializing_if = "Option::is_none")] - #[serde(default)] - pub gc_feedback: Option, - #[serde(skip_serializing_if = "Option::is_none")] #[serde(with = "humantime_serde")] #[serde(default)] @@ -485,7 +480,6 @@ impl TenantConfOpt { evictions_low_residence_duration_metric_threshold: self .evictions_low_residence_duration_metric_threshold .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold), - gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback), heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period), lazy_slru_download: self .lazy_slru_download @@ -530,7 +524,6 @@ impl Default for TenantConf { DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, ) .expect("cannot parse default evictions_low_residence_duration_metric_threshold"), - gc_feedback: false, heatmap_period: Duration::ZERO, lazy_slru_download: false, timeline_get_throttle: crate::tenant::throttle::Config::disabled(), @@ -603,7 +596,6 @@ impl From for models::TenantConfig { evictions_low_residence_duration_metric_threshold: value .evictions_low_residence_duration_metric_threshold .map(humantime), - gc_feedback: value.gc_feedback, heatmap_period: value.heatmap_period.map(humantime), lazy_slru_download: value.lazy_slru_download, timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2c2351d531..0586ec38c8 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -76,7 +76,7 @@ use crate::{ use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind}; use crate::config::PageServerConf; -use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum}; +use crate::keyspace::{KeyPartitioning, KeySpace}; use crate::metrics::{ TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT, }; @@ -210,17 +210,6 @@ pub struct Timeline { /// so that e.g. on-demand-download/eviction, and layer spreading, can operate just on `LayerFileManager`. pub(crate) layers: Arc>, - /// Set of key ranges which should be covered by image layers to - /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored. - /// It is used by compaction task when it checks if new image layer should be created. - /// Newly created image layer doesn't help to remove the delta layer, until the - /// newly created image layer falls off the PITR horizon. So on next GC cycle, - /// gc_timeline may still want the new image layer to be created. To avoid redundant - /// image layers creation we should check if image layer exists but beyond PITR horizon. - /// This is why we need remember GC cutoff LSN. - /// - wanted_image_layers: Mutex>, - last_freeze_at: AtomicLsn, // Atomic would be more appropriate here. last_freeze_ts: RwLock, @@ -1516,13 +1505,6 @@ impl Timeline { .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold) } - fn get_gc_feedback(&self) -> bool { - let tenant_conf = &self.tenant_conf.read().unwrap().tenant_conf.clone(); - tenant_conf - .gc_feedback - .unwrap_or(self.conf.default_tenant_conf.gc_feedback) - } - pub(super) fn tenant_conf_updated(&self) { // NB: Most tenant conf options are read by background loops, so, // changes will automatically be picked up. @@ -1596,7 +1578,6 @@ impl Timeline { shard_identity, pg_version, layers: Default::default(), - wanted_image_layers: Mutex::new(None), walredo_mgr, walreceiver: Mutex::new(None), @@ -3408,31 +3389,6 @@ impl Timeline { let layers = guard.layer_map(); let mut max_deltas = 0; - { - let wanted_image_layers = self.wanted_image_layers.lock().unwrap(); - if let Some((cutoff_lsn, wanted)) = &*wanted_image_layers { - let img_range = - partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end; - if wanted.overlaps(&img_range) { - // - // gc_timeline only pays attention to image layers that are older than the GC cutoff, - // but create_image_layers creates image layers at last-record-lsn. - // So it's possible that gc_timeline wants a new image layer to be created for a key range, - // but the range is already covered by image layers at more recent LSNs. Before we - // create a new image layer, check if the range is already covered at more recent LSNs. - if !layers - .image_layer_exists(&img_range, &(Lsn::min(lsn, *cutoff_lsn)..lsn + 1)) - { - debug!( - "Force generation of layer {}-{} wanted by GC, cutoff={}, lsn={})", - img_range.start, img_range.end, cutoff_lsn, lsn - ); - return true; - } - } - } - } - for part_range in &partition.ranges { let image_coverage = layers.image_coverage(part_range, lsn); for (img_range, last_img) in image_coverage { @@ -3603,12 +3559,6 @@ impl Timeline { tracing::debug!("no data in range {}-{}", img_range.start, img_range.end); } } - // All layers that the GC wanted us to create have now been created. - // - // It's possible that another GC cycle happened while we were compacting, and added - // something new to wanted_image_layers, and we now clear that before processing it. - // That's OK, because the next GC iteration will put it back in. - *self.wanted_image_layers.lock().unwrap() = None; // Sync the new layer to disk before adding it to the layer map, to make sure // we don't garbage collect something based on the new layer, before it has @@ -4518,7 +4468,6 @@ impl Timeline { debug!("retain_lsns: {:?}", retain_lsns); let mut layers_to_remove = Vec::new(); - let mut wanted_image_layers = KeySpaceRandomAccum::default(); // Scan all layers in the timeline (remote or on-disk). // @@ -4600,15 +4549,6 @@ impl Timeline { .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff)) { debug!("keeping {} because it is the latest layer", l.filename()); - // Collect delta key ranges that need image layers to allow garbage - // collecting the layers. - // It is not so obvious whether we need to propagate information only about - // delta layers. Image layers can form "stairs" preventing old image from been deleted. - // But image layers are in any case less sparse than delta layers. Also we need some - // protection from replacing recent image layers with new one after each GC iteration. - if self.get_gc_feedback() && l.is_incremental() && !LayerMap::is_l0(&l) { - wanted_image_layers.add_range(l.get_key_range()); - } result.layers_not_updated += 1; continue 'outer; } @@ -4621,10 +4561,6 @@ impl Timeline { ); layers_to_remove.push(l); } - self.wanted_image_layers - .lock() - .unwrap() - .replace((new_gc_cutoff, wanted_image_layers.to_keyspace())); if !layers_to_remove.is_empty() { // Persist the new GC cutoff value before we actually remove anything. diff --git a/test_runner/performance/test_gc_feedback.py b/test_runner/performance/test_gc_feedback.py index cf9e4808fc..48dd84fb06 100644 --- a/test_runner/performance/test_gc_feedback.py +++ b/test_runner/performance/test_gc_feedback.py @@ -13,6 +13,11 @@ def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma Information about image layers needed to collect old layers should be propagated by GC to compaction task which should take in in account when make a decision which new image layers needs to be created. + + NB: this test demonstrates the problem. The source tree contained the + `gc_feedback` mechanism for about 9 months, but, there were problems + with it and it wasn't enabled at runtime. + This PR removed the code: https://github.com/neondatabase/neon/pull/6863 """ env = neon_env_builder.init_start() client = env.pageserver.http_client() diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 1aaded222c..43e035d303 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -166,7 +166,6 @@ def test_fully_custom_config(positive_env: NeonEnv): "threshold": "23h", }, "evictions_low_residence_duration_metric_threshold": "2days", - "gc_feedback": True, "gc_horizon": 23 * (1024 * 1024), "gc_period": "2h 13m", "heatmap_period": "10m",