fix test_readonly_node_gc, it (among other things) tests the lease deadline

This commit is contained in:
Christian Schwarz
2025-07-02 14:18:23 +02:00
parent 5d499f62cd
commit 8143270c4f
6 changed files with 76 additions and 10 deletions

View File

@@ -1735,6 +1735,7 @@ pub enum DownloadRemoteLayersTaskState {
#[derive(Debug, Serialize, Deserialize)]
pub struct TimelineGcRequest {
pub gc_horizon: Option<u64>,
pub ignore_lease_deadline: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -1145,6 +1145,22 @@ pub(crate) enum LoadConfigError {
NotFound(Utf8PathBuf),
}
pub(crate) enum IgnoreLeaseDeadline {
Default,
No,
Yes,
}
impl From<Option<bool>> for IgnoreLeaseDeadline {
fn from(value: Option<bool>) -> Self {
match value {
None => IgnoreLeaseDeadline::Default,
Some(false) => IgnoreLeaseDeadline::No,
Some(true) => IgnoreLeaseDeadline::Yes,
}
}
}
impl TenantShard {
/// Yet another helper for timeline initialization.
///
@@ -3076,6 +3092,7 @@ impl TenantShard {
target_timeline_id: Option<TimelineId>,
horizon: u64,
pitr: Duration,
ignore_lease_deadline: IgnoreLeaseDeadline,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<GcResult, GcError> {
@@ -3112,7 +3129,11 @@ impl TenantShard {
// with durable leases, take a shortcut here and skip lease deadline check
// for all tests.
// Cf https://databricks.atlassian.net/browse/LKB-92?focusedCommentId=6722329
let ignore_lease_deadline = cfg!(test) || cfg!(feature = "testing");
let ignore_lease_deadline = match ignore_lease_deadline {
IgnoreLeaseDeadline::Default => cfg!(test) || cfg!(feature = "testing"),
IgnoreLeaseDeadline::No => false,
IgnoreLeaseDeadline::Yes => true,
};
if !ignore_lease_deadline && conf.is_gc_blocked_by_lsn_lease_deadline() {
info!("Skipping GC because lsn lease deadline is not reached");
return Ok(GcResult::default());
@@ -6701,6 +6722,7 @@ mod tests {
Some(TIMELINE_ID),
0x10,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&CancellationToken::new(),
&ctx,
)
@@ -6814,6 +6836,7 @@ mod tests {
Some(TIMELINE_ID),
0x10,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&CancellationToken::new(),
&ctx,
)
@@ -6874,6 +6897,7 @@ mod tests {
Some(TIMELINE_ID),
0x10,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&CancellationToken::new(),
&ctx,
)
@@ -6908,6 +6932,7 @@ mod tests {
Some(TIMELINE_ID),
0x10,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&CancellationToken::new(),
&ctx,
)
@@ -7189,7 +7214,14 @@ mod tests {
// this doesn't really need to use the timeline_id target, but it is closer to what it
// originally was.
let res = tenant
.gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx)
.gc_iteration(
Some(timeline.timeline_id),
0,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&cancel,
ctx,
)
.await?;
assert_eq!(res.layers_removed, 0, "this never removes anything");
@@ -7807,7 +7839,14 @@ mod tests {
// Perform a cycle of flush, and GC
tline.freeze_and_flush().await?;
tenant
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
.gc_iteration(
Some(tline.timeline_id),
0,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&cancel,
&ctx,
)
.await?;
}
@@ -7898,7 +7937,14 @@ mod tests {
tline.freeze_and_flush().await?;
tline.compact(&cancel, EnumSet::default(), &ctx).await?;
tenant
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
.gc_iteration(
Some(tline.timeline_id),
0,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&cancel,
&ctx,
)
.await?;
}
@@ -8234,7 +8280,14 @@ mod tests {
)
.await?;
tenant
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
.gc_iteration(
Some(tline.timeline_id),
0,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&cancel,
&ctx,
)
.await?;
}
}
@@ -9455,6 +9508,7 @@ mod tests {
Some(TIMELINE_ID),
0,
Duration::ZERO,
IgnoreLeaseDeadline::Default,
&CancellationToken::new(),
&ctx,
)

View File

@@ -2365,7 +2365,14 @@ impl TenantManager {
#[allow(unused_mut)]
let mut result = tenant
.gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
.gc_iteration(
Some(timeline_id),
gc_horizon,
pitr,
crate::tenant::IgnoreLeaseDeadline::from(gc_req.ignore_lease_deadline),
&cancel,
&ctx,
)
.await;
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
// better once the types support it.

View File

@@ -406,6 +406,7 @@ async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
None,
gc_horizon,
tenant.get_pitr_interval(),
crate::tenant::IgnoreLeaseDeadline::Default,
&cancel,
&ctx,
))

View File

@@ -695,6 +695,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
tenant_id: TenantId | TenantShardId,
timeline_id: TimelineId,
gc_horizon: int | None,
ignore_lease_deadline: bool | None = None,
) -> dict[str, Any]:
"""
Unlike most handlers, this will wait for the layers to be actually
@@ -707,7 +708,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
)
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/do_gc",
json={"gc_horizon": gc_horizon},
json={"gc_horizon": gc_horizon, "ignore_lease_deadline": ignore_lease_deadline},
)
log.info(f"Got GC request response code: {res.status_code}")
self.verbose_error(res)

View File

@@ -201,11 +201,13 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
for shard, ps in tenant_get_shards(env, env.initial_tenant):
client = ps.http_client()
layers_guarded_before_gc = get_layers_protected_by_lease(
client, shard, env.initial_timeline, lease_lsn=lsn
client, shard, env.initial_timeline, lease_lsn=lease_lsn
)
gc_result = client.timeline_gc(
shard, env.initial_timeline, 0, ignore_lease_deadline=False
)
gc_result = client.timeline_gc(shard, env.initial_timeline, 0)
layers_guarded_after_gc = get_layers_protected_by_lease(
client, shard, env.initial_timeline, lease_lsn=lsn
client, shard, env.initial_timeline, lease_lsn=lease_lsn
)
# Note: cannot assert on `layers_removed` here because it could be layers