pageserver: change pitr_interval=0 behavior (#7423)

## Problem

We already made a change in #6407 to make pitr_interval authoritative
for synthetic size calculations (do not charge users for data retained
due to gc_horizon), but that change didn't cover the case where someone
entirely disables time-based retention by setting pitr_interval=0

Relates to: https://github.com/neondatabase/neon/issues/6374

## Summary of changes

When pitr_interval is zero, do not set `pitr_cutoff` based on
gc_horizon.

gc_horizon is still enforced, but separately (its value is passed
separately, there was never a need to claim pitr_cutoff to gc_horizon)

## More detail

### Issue 1
Before this PR, we would skip the update_gc_info for timelines with
last_record_lsn() < gc_horizon.
Let's call such timelines "tiny".

The rationale for that presumably was that we can't GC anything in the
tiny timelines, why bother to call update_gc_info().

However, synthetic size calculation relies on up-to-date
update_gc_info() data.

Before this PR, tiny timelines would never get an updated
GcInfo::pitr_horizon (it remained Lsn(0)).
Even on projects with pitr_interval=0d.

With this PR, update_gc_info is always called, hence
GcInfo::pitr_horizon is always updated, thereby
providing synthetic size calculation with up-to-data data.

### Issue 2
Before this PR, regardless of whether the timeline is "tiny" or not,
GcInfo::pitr_horizon was clamped to at least last_record_lsn -
gc_horizon, even if the pitr window in terms of LSN range was shorter
(=less than) the gc_horizon.

With this PR, that clamping is removed, so, for pitr_interval=0, the
pitr_horizon = last_record_lsn.
This commit is contained in:
John Spray
2024-04-23 17:16:17 +01:00
committed by GitHub
parent e22c072064
commit ee9ec26808
3 changed files with 30 additions and 75 deletions

View File

@@ -2870,20 +2870,23 @@ impl Tenant {
}
}
if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
let branchpoints: Vec<Lsn> = all_branchpoints
.range((
Included((timeline_id, Lsn(0))),
Included((timeline_id, Lsn(u64::MAX))),
))
.map(|&x| x.1)
.collect();
timeline
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
.await?;
let cutoff = timeline
.get_last_record_lsn()
.checked_sub(horizon)
.unwrap_or(Lsn(0));
gc_timelines.push(timeline);
}
let branchpoints: Vec<Lsn> = all_branchpoints
.range((
Included((timeline_id, Lsn(0))),
Included((timeline_id, Lsn(u64::MAX))),
))
.map(|&x| x.1)
.collect();
timeline
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
.await?;
gc_timelines.push(timeline);
}
drop(gc_cs);
Ok(gc_timelines)

View File

@@ -4244,9 +4244,8 @@ impl Timeline {
*self.get_latest_gc_cutoff_lsn()
}
} else {
// No time-based retention was configured. Set time-based cutoff to
// same as LSN based.
cutoff_horizon
// No time-based retention was configured. Interpret this as "keep no history".
self.get_last_record_lsn()
};
// Grab the lock and update the values

View File

@@ -292,33 +292,12 @@ def test_single_branch_get_tenant_size_grows(
Operate on single branch reading the tenants size after each transaction.
"""
# Disable automatic gc and compaction.
# The pitr_interval here is quite problematic, so we cannot really use it.
# it'd have to be calibrated per test executing env.
# there was a bug which was hidden if the create table and first batch of
# inserts is larger than gc_horizon. for example 0x20000 here hid the fact
# that there next_gc_cutoff could be smaller than initdb_lsn, which will
# obviously lead to issues when calculating the size.
gc_horizon = 0x3BA00
# it's a bit of a hack, but different versions of postgres have different
# amount of WAL generated for the same amount of data. so we need to
# adjust the gc_horizon accordingly.
if pg_version == PgVersion.V14:
gc_horizon = 0x4A000
elif pg_version == PgVersion.V15:
gc_horizon = 0x3BA00
elif pg_version == PgVersion.V16:
gc_horizon = 210000
else:
raise NotImplementedError(pg_version)
# Disable automatic compaction and GC, and set a long PITR interval: we will expect
# size to always increase with writes as all writes remain within the PITR
tenant_config = {
"compaction_period": "0s",
"gc_period": "0s",
"pitr_interval": "0s",
"gc_horizon": gc_horizon,
"pitr_interval": "3600s",
}
env = neon_env_builder.init_start(initial_tenant_conf=tenant_config)
@@ -332,18 +311,6 @@ def test_single_branch_get_tenant_size_grows(
size_debug_file = open(test_output_dir / "size_debug.html", "w")
def check_size_change(
current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int
):
if current_lsn - initdb_lsn >= gc_horizon:
assert (
size >= prev_size
), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
else:
assert (
size > prev_size
), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
def get_current_consistent_size(
env: NeonEnv,
endpoint: Endpoint,
@@ -412,14 +379,6 @@ def test_single_branch_get_tenant_size_grows(
)
prev_size = collected_responses[-1][2]
# branch start shouldn't be past gc_horizon yet
# thus the size should grow as we insert more data
# "gc_horizon" is tuned so that it kicks in _after_ the
# insert phase, but before the update phase ends.
assert (
current_lsn - initdb_lsn <= gc_horizon
), "Tuning of GC window is likely out-of-date"
assert size > prev_size
collected_responses.append(("INSERT", current_lsn, size))
@@ -439,8 +398,7 @@ def test_single_branch_get_tenant_size_grows(
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size > prev_size
collected_responses.append(("UPDATE", current_lsn, size))
@@ -457,8 +415,7 @@ def test_single_branch_get_tenant_size_grows(
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size > prev_size
collected_responses.append(("DELETE", current_lsn, size))
@@ -469,20 +426,20 @@ def test_single_branch_get_tenant_size_grows(
with endpoint.cursor() as cur:
cur.execute("DROP TABLE t0")
# Without setting a PITR interval, dropping the table doesn't reclaim any space
# from the user's point of view, because the DROP transaction is too small
# to fall out of gc_horizon.
# Dropping the table doesn't reclaim any space
# from the user's point of view, because the DROP transaction is still
# within pitr_interval.
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
assert size >= prev_size
prev_size = size
# Set a tiny PITR interval to allow the DROP to impact the synthetic size
# Set a zero PITR interval to allow the DROP to impact the synthetic size
# Because synthetic size calculation uses pitr interval when available,
# when our tenant is configured with a tiny pitr interval, dropping a table should
# cause synthetic size to go down immediately
tenant_config["pitr_interval"] = "1ms"
tenant_config["pitr_interval"] = "0s"
env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config)
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
@@ -494,10 +451,6 @@ def test_single_branch_get_tenant_size_grows(
# defined by gc_horizon.
collected_responses.append(("DROP", current_lsn, size))
# Should have gone past gc_horizon, otherwise gc_horizon is too large
bytes_written = current_lsn - initdb_lsn
assert bytes_written > gc_horizon
# this isn't too many lines to forget for a while. observed while
# developing these tests that locally the value is a bit more than what we
# get in the ci.