mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-16 12:40:36 +00:00
pageserver: change pitr_interval=0 behavior (#7423)
## Problem We already made a change in #6407 to make pitr_interval authoritative for synthetic size calculations (do not charge users for data retained due to gc_horizon), but that change didn't cover the case where someone entirely disables time-based retention by setting pitr_interval=0 Relates to: https://github.com/neondatabase/neon/issues/6374 ## Summary of changes When pitr_interval is zero, do not set `pitr_cutoff` based on gc_horizon. gc_horizon is still enforced, but separately (its value is passed separately, there was never a need to claim pitr_cutoff to gc_horizon) ## More detail ### Issue 1 Before this PR, we would skip the update_gc_info for timelines with last_record_lsn() < gc_horizon. Let's call such timelines "tiny". The rationale for that presumably was that we can't GC anything in the tiny timelines, why bother to call update_gc_info(). However, synthetic size calculation relies on up-to-date update_gc_info() data. Before this PR, tiny timelines would never get an updated GcInfo::pitr_horizon (it remained Lsn(0)). Even on projects with pitr_interval=0d. With this PR, update_gc_info is always called, hence GcInfo::pitr_horizon is always updated, thereby providing synthetic size calculation with up-to-data data. ### Issue 2 Before this PR, regardless of whether the timeline is "tiny" or not, GcInfo::pitr_horizon was clamped to at least last_record_lsn - gc_horizon, even if the pitr window in terms of LSN range was shorter (=less than) the gc_horizon. With this PR, that clamping is removed, so, for pitr_interval=0, the pitr_horizon = last_record_lsn.
This commit is contained in:
@@ -2870,20 +2870,23 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(cutoff) = timeline.get_last_record_lsn().checked_sub(horizon) {
|
||||
let branchpoints: Vec<Lsn> = all_branchpoints
|
||||
.range((
|
||||
Included((timeline_id, Lsn(0))),
|
||||
Included((timeline_id, Lsn(u64::MAX))),
|
||||
))
|
||||
.map(|&x| x.1)
|
||||
.collect();
|
||||
timeline
|
||||
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
|
||||
.await?;
|
||||
let cutoff = timeline
|
||||
.get_last_record_lsn()
|
||||
.checked_sub(horizon)
|
||||
.unwrap_or(Lsn(0));
|
||||
|
||||
gc_timelines.push(timeline);
|
||||
}
|
||||
let branchpoints: Vec<Lsn> = all_branchpoints
|
||||
.range((
|
||||
Included((timeline_id, Lsn(0))),
|
||||
Included((timeline_id, Lsn(u64::MAX))),
|
||||
))
|
||||
.map(|&x| x.1)
|
||||
.collect();
|
||||
timeline
|
||||
.update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
|
||||
.await?;
|
||||
|
||||
gc_timelines.push(timeline);
|
||||
}
|
||||
drop(gc_cs);
|
||||
Ok(gc_timelines)
|
||||
|
||||
@@ -4244,9 +4244,8 @@ impl Timeline {
|
||||
*self.get_latest_gc_cutoff_lsn()
|
||||
}
|
||||
} else {
|
||||
// No time-based retention was configured. Set time-based cutoff to
|
||||
// same as LSN based.
|
||||
cutoff_horizon
|
||||
// No time-based retention was configured. Interpret this as "keep no history".
|
||||
self.get_last_record_lsn()
|
||||
};
|
||||
|
||||
// Grab the lock and update the values
|
||||
|
||||
@@ -292,33 +292,12 @@ def test_single_branch_get_tenant_size_grows(
|
||||
Operate on single branch reading the tenants size after each transaction.
|
||||
"""
|
||||
|
||||
# Disable automatic gc and compaction.
|
||||
# The pitr_interval here is quite problematic, so we cannot really use it.
|
||||
# it'd have to be calibrated per test executing env.
|
||||
|
||||
# there was a bug which was hidden if the create table and first batch of
|
||||
# inserts is larger than gc_horizon. for example 0x20000 here hid the fact
|
||||
# that there next_gc_cutoff could be smaller than initdb_lsn, which will
|
||||
# obviously lead to issues when calculating the size.
|
||||
gc_horizon = 0x3BA00
|
||||
|
||||
# it's a bit of a hack, but different versions of postgres have different
|
||||
# amount of WAL generated for the same amount of data. so we need to
|
||||
# adjust the gc_horizon accordingly.
|
||||
if pg_version == PgVersion.V14:
|
||||
gc_horizon = 0x4A000
|
||||
elif pg_version == PgVersion.V15:
|
||||
gc_horizon = 0x3BA00
|
||||
elif pg_version == PgVersion.V16:
|
||||
gc_horizon = 210000
|
||||
else:
|
||||
raise NotImplementedError(pg_version)
|
||||
|
||||
# Disable automatic compaction and GC, and set a long PITR interval: we will expect
|
||||
# size to always increase with writes as all writes remain within the PITR
|
||||
tenant_config = {
|
||||
"compaction_period": "0s",
|
||||
"gc_period": "0s",
|
||||
"pitr_interval": "0s",
|
||||
"gc_horizon": gc_horizon,
|
||||
"pitr_interval": "3600s",
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=tenant_config)
|
||||
@@ -332,18 +311,6 @@ def test_single_branch_get_tenant_size_grows(
|
||||
|
||||
size_debug_file = open(test_output_dir / "size_debug.html", "w")
|
||||
|
||||
def check_size_change(
|
||||
current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int
|
||||
):
|
||||
if current_lsn - initdb_lsn >= gc_horizon:
|
||||
assert (
|
||||
size >= prev_size
|
||||
), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
|
||||
else:
|
||||
assert (
|
||||
size > prev_size
|
||||
), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
|
||||
|
||||
def get_current_consistent_size(
|
||||
env: NeonEnv,
|
||||
endpoint: Endpoint,
|
||||
@@ -412,14 +379,6 @@ def test_single_branch_get_tenant_size_grows(
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
|
||||
# branch start shouldn't be past gc_horizon yet
|
||||
# thus the size should grow as we insert more data
|
||||
# "gc_horizon" is tuned so that it kicks in _after_ the
|
||||
# insert phase, but before the update phase ends.
|
||||
assert (
|
||||
current_lsn - initdb_lsn <= gc_horizon
|
||||
), "Tuning of GC window is likely out-of-date"
|
||||
assert size > prev_size
|
||||
|
||||
collected_responses.append(("INSERT", current_lsn, size))
|
||||
@@ -439,8 +398,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
|
||||
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
|
||||
assert size > prev_size
|
||||
|
||||
collected_responses.append(("UPDATE", current_lsn, size))
|
||||
|
||||
@@ -457,8 +415,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
)
|
||||
|
||||
prev_size = collected_responses[-1][2]
|
||||
|
||||
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
|
||||
assert size > prev_size
|
||||
|
||||
collected_responses.append(("DELETE", current_lsn, size))
|
||||
|
||||
@@ -469,20 +426,20 @@ def test_single_branch_get_tenant_size_grows(
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("DROP TABLE t0")
|
||||
|
||||
# Without setting a PITR interval, dropping the table doesn't reclaim any space
|
||||
# from the user's point of view, because the DROP transaction is too small
|
||||
# to fall out of gc_horizon.
|
||||
# Dropping the table doesn't reclaim any space
|
||||
# from the user's point of view, because the DROP transaction is still
|
||||
# within pitr_interval.
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
)
|
||||
prev_size = collected_responses[-1][2]
|
||||
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
|
||||
assert size >= prev_size
|
||||
prev_size = size
|
||||
|
||||
# Set a tiny PITR interval to allow the DROP to impact the synthetic size
|
||||
# Set a zero PITR interval to allow the DROP to impact the synthetic size
|
||||
# Because synthetic size calculation uses pitr interval when available,
|
||||
# when our tenant is configured with a tiny pitr interval, dropping a table should
|
||||
# cause synthetic size to go down immediately
|
||||
tenant_config["pitr_interval"] = "1ms"
|
||||
tenant_config["pitr_interval"] = "0s"
|
||||
env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config)
|
||||
(current_lsn, size) = get_current_consistent_size(
|
||||
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
|
||||
@@ -494,10 +451,6 @@ def test_single_branch_get_tenant_size_grows(
|
||||
# defined by gc_horizon.
|
||||
collected_responses.append(("DROP", current_lsn, size))
|
||||
|
||||
# Should have gone past gc_horizon, otherwise gc_horizon is too large
|
||||
bytes_written = current_lsn - initdb_lsn
|
||||
assert bytes_written > gc_horizon
|
||||
|
||||
# this isn't too many lines to forget for a while. observed while
|
||||
# developing these tests that locally the value is a bit more than what we
|
||||
# get in the ci.
|
||||
|
||||
Reference in New Issue
Block a user