From 7d4bfcdc4795654100b87a02d398e41373027e44 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:29:38 -0500 Subject: [PATCH] feat(pageserver): add config items for gc-compaction auto trigger (#10455) ## Problem part of https://github.com/neondatabase/neon/issues/9114 The automatic trigger is already implemented at https://github.com/neondatabase/neon/pull/10221 but I need to write some tests and finish my experiments in staging before I can merge it with confidence. Given that I have some other patches that will modify the config items, I'd like to get the config items merged first to reduce conflicts. ## Summary of changes * add `l2_lsn` to index_part.json -- below that LSN, data have been processed by gc-compaction * add a set of gc-compaction auto trigger control items into the config --------- Signed-off-by: Alex Chi Z --- control_plane/src/pageserver.rs | 15 +++ libs/pageserver_api/src/config.rs | 16 +++ libs/pageserver_api/src/models.rs | 24 ++++ pageserver/src/tenant.rs | 5 + pageserver/src/tenant/config.rs | 36 ++++++ .../tenant/remote_timeline_client/index.rs | 104 +++++++++++++++++- .../regress/test_attach_tenant_config.py | 3 + 7 files changed, 201 insertions(+), 2 deletions(-) diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index df81b44f2d..b33b2877b3 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -423,6 +423,21 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'rel_size_v2_enabled' as bool")?, + gc_compaction_enabled: settings + .remove("gc_compaction_enabled") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'gc_compaction_enabled' as bool")?, + gc_compaction_initial_threshold_kb: settings + .remove("gc_compaction_initial_threshold_kb") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'gc_compaction_initial_threshold_kb' as integer")?, + gc_compaction_ratio_percent: settings + .remove("gc_compaction_ratio_percent") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'gc_compaction_ratio_percent' as integer")?, }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 7fb7a9d54e..f0aeb00736 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -305,6 +305,16 @@ pub struct TenantConfigToml { /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into /// `index_part.json`, and it cannot be reversed. pub rel_size_v2_enabled: Option, + + // gc-compaction related configs + /// Enable automatic gc-compaction trigger on this tenant. + pub gc_compaction_enabled: bool, + /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold, + /// gc-compaction will be triggered. + pub gc_compaction_initial_threshold_kb: u64, + /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN) + /// is above this ratio, gc-compaction will be triggered. + pub gc_compaction_ratio_percent: u64, } pub mod defaults { @@ -498,6 +508,9 @@ pub mod tenant_conf_defaults { // By default ingest enough WAL for two new L0 layers before checking if new image // image layers should be created. pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2; + pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false; + pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000; + pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100; } impl Default for TenantConfigToml { @@ -543,6 +556,9 @@ impl Default for TenantConfigToml { timeline_offloading: false, wal_receiver_protocol_override: None, rel_size_v2_enabled: None, + gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED, + gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB, + gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT, } } } diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 1538134c96..fd4879087f 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -499,6 +499,12 @@ pub struct TenantConfigPatch { pub wal_receiver_protocol_override: FieldPatch, #[serde(skip_serializing_if = "FieldPatch::is_noop")] pub rel_size_v2_enabled: FieldPatch, + #[serde(skip_serializing_if = "FieldPatch::is_noop")] + pub gc_compaction_enabled: FieldPatch, + #[serde(skip_serializing_if = "FieldPatch::is_noop")] + pub gc_compaction_initial_threshold_kb: FieldPatch, + #[serde(skip_serializing_if = "FieldPatch::is_noop")] + pub gc_compaction_ratio_percent: FieldPatch, } /// An alternative representation of `pageserver::tenant::TenantConf` with @@ -531,6 +537,9 @@ pub struct TenantConfig { pub timeline_offloading: Option, pub wal_receiver_protocol_override: Option, pub rel_size_v2_enabled: Option, + pub gc_compaction_enabled: Option, + pub gc_compaction_initial_threshold_kb: Option, + pub gc_compaction_ratio_percent: Option, } impl TenantConfig { @@ -561,6 +570,9 @@ impl TenantConfig { mut timeline_offloading, mut wal_receiver_protocol_override, mut rel_size_v2_enabled, + mut gc_compaction_enabled, + mut gc_compaction_initial_threshold_kb, + mut gc_compaction_ratio_percent, } = self; patch.checkpoint_distance.apply(&mut checkpoint_distance); @@ -606,6 +618,15 @@ impl TenantConfig { .wal_receiver_protocol_override .apply(&mut wal_receiver_protocol_override); patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled); + patch + .gc_compaction_enabled + .apply(&mut gc_compaction_enabled); + patch + .gc_compaction_initial_threshold_kb + .apply(&mut gc_compaction_initial_threshold_kb); + patch + .gc_compaction_ratio_percent + .apply(&mut gc_compaction_ratio_percent); Self { checkpoint_distance, @@ -633,6 +654,9 @@ impl TenantConfig { timeline_offloading, wal_receiver_protocol_override, rel_size_v2_enabled, + gc_compaction_enabled, + gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent, } } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 05a311391c..e45ba2ca3b 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5476,6 +5476,11 @@ pub(crate) mod harness { timeline_offloading: Some(tenant_conf.timeline_offloading), wal_receiver_protocol_override: tenant_conf.wal_receiver_protocol_override, rel_size_v2_enabled: tenant_conf.rel_size_v2_enabled, + gc_compaction_enabled: Some(tenant_conf.gc_compaction_enabled), + gc_compaction_initial_threshold_kb: Some( + tenant_conf.gc_compaction_initial_threshold_kb, + ), + gc_compaction_ratio_percent: Some(tenant_conf.gc_compaction_ratio_percent), } } } diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 14d8e9ccd4..3db1445f6e 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -360,6 +360,15 @@ pub struct TenantConfOpt { #[serde(skip_serializing_if = "Option::is_none")] pub rel_size_v2_enabled: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub gc_compaction_enabled: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub gc_compaction_initial_threshold_kb: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub gc_compaction_ratio_percent: Option, } impl TenantConfOpt { @@ -429,6 +438,15 @@ impl TenantConfOpt { .wal_receiver_protocol_override .or(global_conf.wal_receiver_protocol_override), rel_size_v2_enabled: self.rel_size_v2_enabled.or(global_conf.rel_size_v2_enabled), + gc_compaction_enabled: self + .gc_compaction_enabled + .unwrap_or(global_conf.gc_compaction_enabled), + gc_compaction_initial_threshold_kb: self + .gc_compaction_initial_threshold_kb + .unwrap_or(global_conf.gc_compaction_initial_threshold_kb), + gc_compaction_ratio_percent: self + .gc_compaction_ratio_percent + .unwrap_or(global_conf.gc_compaction_ratio_percent), } } @@ -459,6 +477,9 @@ impl TenantConfOpt { mut timeline_offloading, mut wal_receiver_protocol_override, mut rel_size_v2_enabled, + mut gc_compaction_enabled, + mut gc_compaction_initial_threshold_kb, + mut gc_compaction_ratio_percent, } = self; patch.checkpoint_distance.apply(&mut checkpoint_distance); @@ -528,6 +549,15 @@ impl TenantConfOpt { .wal_receiver_protocol_override .apply(&mut wal_receiver_protocol_override); patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled); + patch + .gc_compaction_enabled + .apply(&mut gc_compaction_enabled); + patch + .gc_compaction_initial_threshold_kb + .apply(&mut gc_compaction_initial_threshold_kb); + patch + .gc_compaction_ratio_percent + .apply(&mut gc_compaction_ratio_percent); Ok(Self { checkpoint_distance, @@ -555,6 +585,9 @@ impl TenantConfOpt { timeline_offloading, wal_receiver_protocol_override, rel_size_v2_enabled, + gc_compaction_enabled, + gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent, }) } } @@ -611,6 +644,9 @@ impl From for models::TenantConfig { timeline_offloading: value.timeline_offloading, wal_receiver_protocol_override: value.wal_receiver_protocol_override, rel_size_v2_enabled: value.rel_size_v2_enabled, + gc_compaction_enabled: value.gc_compaction_enabled, + gc_compaction_initial_threshold_kb: value.gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent: value.gc_compaction_ratio_percent, } } } diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index 30b6b07ca3..3824bc8f11 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -77,11 +77,17 @@ pub struct IndexPart { /// /// None means no aux files have been written to the storage before the point /// when this flag is introduced. + /// + /// This flag is not used any more as all tenants have been transitioned to the new aux file policy. #[serde(skip_serializing_if = "Option::is_none", default)] pub(crate) last_aux_file_policy: Option, #[serde(skip_serializing_if = "Option::is_none", default)] pub(crate) rel_size_migration: Option, + + /// The LSN of gc-compaction horizon. Once gc-compaction is finished for all layer files below an LSN, this LSN will be updated. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub(crate) l2_lsn: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -116,10 +122,11 @@ impl IndexPart { /// - 9: +gc_blocking /// - 10: +import_pgdata /// - 11: +rel_size_migration - const LATEST_VERSION: usize = 11; + /// - 12: +l2_lsn + const LATEST_VERSION: usize = 12; // Versions we may see when reading from a bucket. - pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; pub const FILE_NAME: &'static str = "index_part.json"; @@ -136,6 +143,7 @@ impl IndexPart { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, } } @@ -437,6 +445,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -483,6 +492,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -530,6 +540,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -580,6 +591,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap(); @@ -625,6 +637,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -673,6 +686,7 @@ mod tests { last_aux_file_policy: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -726,6 +740,7 @@ mod tests { last_aux_file_policy: Some(AuxFilePolicy::V2), import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -784,6 +799,7 @@ mod tests { last_aux_file_policy: Default::default(), import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -843,6 +859,7 @@ mod tests { last_aux_file_policy: Default::default(), import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -907,6 +924,7 @@ mod tests { archived_at: None, import_pgdata: None, rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -984,6 +1002,7 @@ mod tests { idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()), }))), rel_size_migration: None, + l2_lsn: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -1062,6 +1081,87 @@ mod tests { idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()), }))), rel_size_migration: Some(RelSizeMigration::Legacy), + l2_lsn: None, + }; + + let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); + assert_eq!(part, expected); + } + + #[test] + fn v12_l2_lsn_is_parsed() { + let example = r#"{ + "version": 12, + "layer_metadata":{ + "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 }, + "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 } + }, + "disk_consistent_lsn":"0/16960E8", + "metadata": { + "disk_consistent_lsn": "0/16960E8", + "prev_record_lsn": "0/1696070", + "ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e", + "ancestor_lsn": "0/0", + "latest_gc_cutoff_lsn": "0/1696070", + "initdb_lsn": "0/1696070", + "pg_version": 14 + }, + "gc_blocking": { + "started_at": "2024-07-19T09:00:00.123", + "reasons": ["DetachAncestor"] + }, + "import_pgdata": { + "V1": { + "Done": { + "idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5", + "started_at": "2024-11-13T09:23:42.123", + "finished_at": "2024-11-13T09:42:23.123" + } + } + }, + "rel_size_migration": "legacy", + "l2_lsn": "0/16960E8" + }"#; + + let expected = IndexPart { + version: 12, + layer_metadata: HashMap::from([ + ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata { + file_size: 25600000, + generation: Generation::none(), + shard: ShardIndex::unsharded() + }), + ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata { + file_size: 9007199254741001, + generation: Generation::none(), + shard: ShardIndex::unsharded() + }) + ]), + disk_consistent_lsn: "0/16960E8".parse::().unwrap(), + metadata: TimelineMetadata::new( + Lsn::from_str("0/16960E8").unwrap(), + Some(Lsn::from_str("0/1696070").unwrap()), + Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()), + Lsn::INVALID, + Lsn::from_str("0/1696070").unwrap(), + Lsn::from_str("0/1696070").unwrap(), + 14, + ).with_recalculated_checksum().unwrap(), + deleted_at: None, + lineage: Default::default(), + gc_blocking: Some(GcBlocking { + started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"), + reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]), + }), + last_aux_file_policy: Default::default(), + archived_at: None, + import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{ + started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"), + finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"), + idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()), + }))), + rel_size_migration: Some(RelSizeMigration::Legacy), + l2_lsn: Some("0/16960E8".parse::().unwrap()), }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index b34dbddc80..b8d47346a3 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -177,6 +177,9 @@ def test_fully_custom_config(positive_env: NeonEnv): "args": {"format": "bincode", "compression": {"zstd": {"level": 1}}}, }, "rel_size_v2_enabled": True, + "gc_compaction_enabled": True, + "gc_compaction_initial_threshold_kb": 1024000, + "gc_compaction_ratio_percent": 200, } vps_http = env.storage_controller.pageserver_api()