diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index c5eabc46db..abf815f07a 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -389,6 +389,10 @@ impl PageServerNode { .remove("image_creation_threshold") .map(|x| x.parse::()) .transpose()?, + image_layer_creation_check_threshold: settings + .remove("image_layer_creation_check_threshold") + .map(|x| x.parse::()) + .transpose()?, pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()), walreceiver_connect_timeout: settings .remove("walreceiver_connect_timeout") @@ -501,6 +505,12 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'image_creation_threshold' as non zero integer")?, + image_layer_creation_check_threshold: settings + .remove("image_layer_creation_check_threshold") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'image_creation_check_threshold' as integer")?, + pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()), walreceiver_connect_timeout: settings .remove("walreceiver_connect_timeout") diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index aad4cc97fc..ad4ca6710d 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -301,6 +301,7 @@ pub struct TenantConfig { pub heatmap_period: Option, pub lazy_slru_download: Option, pub timeline_get_throttle: Option, + pub image_layer_creation_check_threshold: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 792d9e548d..0806ef0cf4 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3653,6 +3653,9 @@ pub(crate) mod harness { heatmap_period: Some(tenant_conf.heatmap_period), lazy_slru_download: Some(tenant_conf.lazy_slru_download), timeline_get_throttle: Some(tenant_conf.timeline_get_throttle), + image_layer_creation_check_threshold: Some( + tenant_conf.image_layer_creation_check_threshold, + ), } } } diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 53a8c97e23..a2bb479f63 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -57,6 +57,9 @@ pub mod defaults { // throughputs up to 1GiB/s per timeline. pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024; pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour"; + // By default ingest enough WAL for two new L0 layers before checking if new image + // image layers should be created. + pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2; pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100; } @@ -362,6 +365,10 @@ pub struct TenantConf { pub lazy_slru_download: bool, pub timeline_get_throttle: pageserver_api::models::ThrottleConfig, + + // How much WAL must be ingested before checking again whether a new image layer is required. + // Expresed in multiples of checkpoint distance. + pub image_layer_creation_check_threshold: u8, } /// Same as TenantConf, but this struct preserves the information about @@ -454,6 +461,9 @@ pub struct TenantConfOpt { #[serde(skip_serializing_if = "Option::is_none")] pub timeline_get_throttle: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub image_layer_creation_check_threshold: Option, } impl TenantConfOpt { @@ -508,6 +518,9 @@ impl TenantConfOpt { .timeline_get_throttle .clone() .unwrap_or(global_conf.timeline_get_throttle), + image_layer_creation_check_threshold: self + .image_layer_creation_check_threshold + .unwrap_or(global_conf.image_layer_creation_check_threshold), } } } @@ -548,6 +561,7 @@ impl Default for TenantConf { heatmap_period: Duration::ZERO, lazy_slru_download: false, timeline_get_throttle: crate::tenant::throttle::Config::disabled(), + image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD, } } } @@ -621,6 +635,7 @@ impl From for models::TenantConfig { heatmap_period: value.heatmap_period.map(humantime), lazy_slru_download: value.lazy_slru_download, timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from), + image_layer_creation_check_threshold: value.image_layer_creation_check_threshold, } } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index bc3fc1df1f..f3565c1fb3 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -309,6 +309,8 @@ pub struct Timeline { /// Configuration: how often should the partitioning be recalculated. repartition_threshold: u64, + last_image_layer_creation_check_at: AtomicLsn, + /// Current logical size of the "datadir", at the last LSN. current_logical_size: LogicalSize, @@ -1632,6 +1634,15 @@ impl Timeline { .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold) } + fn get_image_layer_creation_check_threshold(&self) -> u8 { + let tenant_conf = self.tenant_conf.read().unwrap().tenant_conf.clone(); + tenant_conf.image_layer_creation_check_threshold.unwrap_or( + self.conf + .default_tenant_conf + .image_layer_creation_check_threshold, + ) + } + pub(super) fn tenant_conf_updated(&self) { // NB: Most tenant conf options are read by background loops, so, // changes will automatically be picked up. @@ -1769,6 +1780,7 @@ impl Timeline { }, partitioning: tokio::sync::Mutex::new((KeyPartitioning::new(), Lsn(0))), repartition_threshold: 0, + last_image_layer_creation_check_at: AtomicLsn::new(0), last_received_wal: Mutex::new(None), rel_size_cache: RwLock::new(HashMap::new()), @@ -1797,6 +1809,7 @@ impl Timeline { }; result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; + result .metrics .last_record_gauge @@ -3501,6 +3514,24 @@ impl Timeline { // Is it time to create a new image layer for the given partition? async fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> bool { + let last = self.last_image_layer_creation_check_at.load(); + if lsn != Lsn(0) { + let distance = lsn + .checked_sub(last) + .expect("Attempt to compact with LSN going backwards"); + + let min_distance = self.get_image_layer_creation_check_threshold() as u64 + * self.get_checkpoint_distance(); + + // Skip the expensive delta layer counting below if we've not ingested + // sufficient WAL since the last check. + if distance.0 < min_distance { + return false; + } + } + + self.last_image_layer_creation_check_at.store(lsn); + let threshold = self.get_image_creation_threshold(); let guard = self.layers.read().await; diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 3058926b25..909d25980b 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -189,6 +189,7 @@ def test_fully_custom_config(positive_env: NeonEnv): }, "trace_read_requests": True, "walreceiver_connect_timeout": "13m", + "image_layer_creation_check_threshold": 1, } ps_http = env.pageserver.http_client() diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 7bbc0cc160..fefb30bbdd 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -165,6 +165,7 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): "compaction_threshold": "3", # "image_creation_threshold": set at runtime "compaction_target_size": f"{128 * (1024**2)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers + "image_layer_creation_check_threshold": "0", # always check if a new image layer can be created } def tenant_update_config(changes): diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py index ca4295c5cb..f311a8bf2c 100644 --- a/test_runner/regress/test_layers_from_future.py +++ b/test_runner/regress/test_layers_from_future.py @@ -53,6 +53,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder): "checkpoint_timeout": "24h", # something we won't reach "checkpoint_distance": f"{50 * (1024**2)}", # something we won't reach, we checkpoint manually "image_creation_threshold": "100", # we want to control when image is created + "image_layer_creation_check_threshold": "0", "compaction_threshold": f"{l0_l1_threshold}", "compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers } diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py index 914f068afb..ba0d53704b 100644 --- a/test_runner/regress/test_ondemand_download.py +++ b/test_runner/regress/test_ondemand_download.py @@ -568,6 +568,8 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne "image_creation_threshold": 100, # repartitioning parameter, unused "compaction_target_size": 128 * 1024**2, + # Always check if a new image layer can be created + "image_layer_creation_check_threshold": 0, # pitr_interval and gc_horizon are not interesting because we dont run gc } @@ -632,7 +634,8 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne # threshold to expose image creation to downloading all of the needed # layers -- threshold of 2 would sound more reasonable, but keeping it as 1 # to be less flaky - env.neon_cli.config_tenant(tenant_id, {"image_creation_threshold": "1"}) + conf["image_creation_threshold"] = "1" + env.neon_cli.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()}) pageserver_http.timeline_compact(tenant_id, timeline_id) layers = pageserver_http.layer_map_info(tenant_id, timeline_id) diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 56b4548b64..41fa03cdf8 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -53,6 +53,7 @@ TENANT_CONF = { "compaction_period": "0s", # create image layers eagerly, so that GC can remove some layers "image_creation_threshold": "1", + "image_layer_creation_check_threshold": "0", } diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 986d6c4dbf..47200a856e 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -245,6 +245,7 @@ def test_remote_storage_upload_queue_retries( "compaction_period": "0s", # create image layers eagerly, so that GC can remove some layers "image_creation_threshold": "1", + "image_layer_creation_check_threshold": "0", } )