From 8143270c4fa6d6e5eefb5f98984a390841460aa8 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 2 Jul 2025 14:18:23 +0200 Subject: [PATCH] fix test_readonly_node_gc, it (among other things) tests the lease deadline --- libs/pageserver_api/src/models.rs | 1 + pageserver/src/tenant.rs | 64 +++++++++++++++++++++-- pageserver/src/tenant/mgr.rs | 9 +++- pageserver/src/tenant/tasks.rs | 1 + test_runner/fixtures/pageserver/http.py | 3 +- test_runner/regress/test_readonly_node.py | 8 +-- 6 files changed, 76 insertions(+), 10 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 16545364c1..5f36e8d2b7 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -1735,6 +1735,7 @@ pub enum DownloadRemoteLayersTaskState { #[derive(Debug, Serialize, Deserialize)] pub struct TimelineGcRequest { pub gc_horizon: Option, + pub ignore_lease_deadline: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index e5caf0dcb7..012710a0ed 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1145,6 +1145,22 @@ pub(crate) enum LoadConfigError { NotFound(Utf8PathBuf), } +pub(crate) enum IgnoreLeaseDeadline { + Default, + No, + Yes, +} + +impl From> for IgnoreLeaseDeadline { + fn from(value: Option) -> Self { + match value { + None => IgnoreLeaseDeadline::Default, + Some(false) => IgnoreLeaseDeadline::No, + Some(true) => IgnoreLeaseDeadline::Yes, + } + } +} + impl TenantShard { /// Yet another helper for timeline initialization. /// @@ -3076,6 +3092,7 @@ impl TenantShard { target_timeline_id: Option, horizon: u64, pitr: Duration, + ignore_lease_deadline: IgnoreLeaseDeadline, cancel: &CancellationToken, ctx: &RequestContext, ) -> Result { @@ -3112,7 +3129,11 @@ impl TenantShard { // with durable leases, take a shortcut here and skip lease deadline check // for all tests. // Cf https://databricks.atlassian.net/browse/LKB-92?focusedCommentId=6722329 - let ignore_lease_deadline = cfg!(test) || cfg!(feature = "testing"); + let ignore_lease_deadline = match ignore_lease_deadline { + IgnoreLeaseDeadline::Default => cfg!(test) || cfg!(feature = "testing"), + IgnoreLeaseDeadline::No => false, + IgnoreLeaseDeadline::Yes => true, + }; if !ignore_lease_deadline && conf.is_gc_blocked_by_lsn_lease_deadline() { info!("Skipping GC because lsn lease deadline is not reached"); return Ok(GcResult::default()); @@ -6701,6 +6722,7 @@ mod tests { Some(TIMELINE_ID), 0x10, Duration::ZERO, + IgnoreLeaseDeadline::Default, &CancellationToken::new(), &ctx, ) @@ -6814,6 +6836,7 @@ mod tests { Some(TIMELINE_ID), 0x10, Duration::ZERO, + IgnoreLeaseDeadline::Default, &CancellationToken::new(), &ctx, ) @@ -6874,6 +6897,7 @@ mod tests { Some(TIMELINE_ID), 0x10, Duration::ZERO, + IgnoreLeaseDeadline::Default, &CancellationToken::new(), &ctx, ) @@ -6908,6 +6932,7 @@ mod tests { Some(TIMELINE_ID), 0x10, Duration::ZERO, + IgnoreLeaseDeadline::Default, &CancellationToken::new(), &ctx, ) @@ -7189,7 +7214,14 @@ mod tests { // this doesn't really need to use the timeline_id target, but it is closer to what it // originally was. let res = tenant - .gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx) + .gc_iteration( + Some(timeline.timeline_id), + 0, + Duration::ZERO, + IgnoreLeaseDeadline::Default, + &cancel, + ctx, + ) .await?; assert_eq!(res.layers_removed, 0, "this never removes anything"); @@ -7807,7 +7839,14 @@ mod tests { // Perform a cycle of flush, and GC tline.freeze_and_flush().await?; tenant - .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx) + .gc_iteration( + Some(tline.timeline_id), + 0, + Duration::ZERO, + IgnoreLeaseDeadline::Default, + &cancel, + &ctx, + ) .await?; } @@ -7898,7 +7937,14 @@ mod tests { tline.freeze_and_flush().await?; tline.compact(&cancel, EnumSet::default(), &ctx).await?; tenant - .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx) + .gc_iteration( + Some(tline.timeline_id), + 0, + Duration::ZERO, + IgnoreLeaseDeadline::Default, + &cancel, + &ctx, + ) .await?; } @@ -8234,7 +8280,14 @@ mod tests { ) .await?; tenant - .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx) + .gc_iteration( + Some(tline.timeline_id), + 0, + Duration::ZERO, + IgnoreLeaseDeadline::Default, + &cancel, + &ctx, + ) .await?; } } @@ -9455,6 +9508,7 @@ mod tests { Some(TIMELINE_ID), 0, Duration::ZERO, + IgnoreLeaseDeadline::Default, &CancellationToken::new(), &ctx, ) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 95f5c60170..81cd6f4212 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -2365,7 +2365,14 @@ impl TenantManager { #[allow(unused_mut)] let mut result = tenant - .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx) + .gc_iteration( + Some(timeline_id), + gc_horizon, + pitr, + crate::tenant::IgnoreLeaseDeadline::from(gc_req.ignore_lease_deadline), + &cancel, + &ctx, + ) .await; // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it // better once the types support it. diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 954dd38bb4..e4890efdb0 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -406,6 +406,7 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { None, gc_horizon, tenant.get_pitr_interval(), + crate::tenant::IgnoreLeaseDeadline::Default, &cancel, &ctx, )) diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index d9037f2d08..d60fc34fa3 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -695,6 +695,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, gc_horizon: int | None, + ignore_lease_deadline: bool | None = None, ) -> dict[str, Any]: """ Unlike most handlers, this will wait for the layers to be actually @@ -707,7 +708,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): ) res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/do_gc", - json={"gc_horizon": gc_horizon}, + json={"gc_horizon": gc_horizon, "ignore_lease_deadline": ignore_lease_deadline}, ) log.info(f"Got GC request response code: {res.status_code}") self.verbose_error(res) diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index ee934a900d..98877ad7f9 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -201,11 +201,13 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): for shard, ps in tenant_get_shards(env, env.initial_tenant): client = ps.http_client() layers_guarded_before_gc = get_layers_protected_by_lease( - client, shard, env.initial_timeline, lease_lsn=lsn + client, shard, env.initial_timeline, lease_lsn=lease_lsn + ) + gc_result = client.timeline_gc( + shard, env.initial_timeline, 0, ignore_lease_deadline=False ) - gc_result = client.timeline_gc(shard, env.initial_timeline, 0) layers_guarded_after_gc = get_layers_protected_by_lease( - client, shard, env.initial_timeline, lease_lsn=lsn + client, shard, env.initial_timeline, lease_lsn=lease_lsn ) # Note: cannot assert on `layers_removed` here because it could be layers