pageserver: exclude gc_horizon from synthetic size calculation (#6407)

## Problem

See:
- https://github.com/neondatabase/neon/issues/6374

## Summary of changes

Whereas previously we calculated synthetic size from the gc_horizon or
the pitr_interval (whichever is the lower LSN), now we ignore gc_horizon
and exclusively start from the `pitr_interval`. This is a more generous
calculation for billing, where we do not charge users for data retained
due to gc_horizon.
This commit is contained in:
John Spray
2024-03-15 16:07:36 +00:00
committed by GitHub
parent 67522ce83d
commit bc1efa827f
3 changed files with 53 additions and 14 deletions

View File

@@ -183,7 +183,13 @@ pub(super) async fn gather_inputs(
// new gc run, which we have no control over. however differently from `Timeline::gc`
// we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not
// actually removing files.
let mut next_gc_cutoff = cmp::min(gc_info.horizon_cutoff, gc_info.pitr_cutoff);
//
// We only consider [`GcInfo::pitr_cutoff`], and not [`GcInfo::horizon_cutoff`], because from
// a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather
// than a space bound (horizon cutoff). This means that if someone drops a database and waits for their
// PITR interval, they will see synthetic size decrease, even if we are still storing data inside
// horizon_cutoff.
let mut next_gc_cutoff = gc_info.pitr_cutoff;
// If the caller provided a shorter retention period, use that instead of the GC cutoff.
let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {

View File

@@ -3784,8 +3784,11 @@ impl Timeline {
// The timestamp is in the future. That sounds impossible,
// but what it really means is that there hasn't been
// any commits since the cutoff timestamp.
//
// In this case we should use the LSN of the most recent commit,
// which is implicitly the last LSN in the log.
debug!("future({})", lsn);
cutoff_horizon
self.get_last_record_lsn()
}
LsnForTimestamp::Past(lsn) => {
debug!("past({})", lsn);

View File

@@ -1,3 +1,4 @@
import os
from pathlib import Path
from typing import List, Tuple
@@ -326,7 +327,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
size_debug_file.write(size_debug)
@pytest.mark.xfail
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
def test_single_branch_get_tenant_size_grows(
neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
):
@@ -349,10 +350,21 @@ def test_single_branch_get_tenant_size_grows(
# adjust the gc_horizon accordingly.
if pg_version == PgVersion.V14:
gc_horizon = 0x4A000
elif pg_version == PgVersion.V15:
gc_horizon = 0x3BA00
elif pg_version == PgVersion.V16:
gc_horizon = 210000
else:
raise NotImplementedError(pg_version)
neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
tenant_config = {
"compaction_period": "0s",
"gc_period": "0s",
"pitr_interval": "0s",
"gc_horizon": gc_horizon,
}
env = neon_env_builder.init_start()
env = neon_env_builder.init_start(initial_tenant_conf=tenant_config)
tenant_id = env.initial_tenant
branch_name, timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
@@ -405,6 +417,7 @@ def test_single_branch_get_tenant_size_grows(
current_lsn = after_lsn
size_debug_file.write(size_debug)
assert size > 0
log.info(f"size: {size} at lsn {current_lsn}")
return (current_lsn, size)
with env.endpoints.create_start(
@@ -492,24 +505,41 @@ def test_single_branch_get_tenant_size_grows(
collected_responses.append(("DELETE", current_lsn, size))
size_before_drop = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)[1]
with endpoint.cursor() as cur:
cur.execute("DROP TABLE t0")
# Without setting a PITR interval, dropping the table doesn't reclaim any space
# from the user's point of view, because the DROP transaction is too small
# to fall out of gc_horizon.
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
# Set a tiny PITR interval to allow the DROP to impact the synthetic size
# Because synthetic size calculation uses pitr interval when available,
# when our tenant is configured with a tiny pitr interval, dropping a table should
# cause synthetic size to go down immediately
tenant_config["pitr_interval"] = "1ms"
env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config)
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
assert size < size_before_drop
# The size of the tenant should still be as large as before we dropped
# the table, because the drop operation can still be undone in the PITR
# defined by gc_horizon.
(current_lsn, size) = get_current_consistent_size(
env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
)
prev_size = collected_responses[-1][2]
check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
collected_responses.append(("DROP", current_lsn, size))
# Should have gone past gc_horizon, otherwise gc_horizon is too large
assert current_lsn - initdb_lsn > gc_horizon
bytes_written = current_lsn - initdb_lsn
assert bytes_written > gc_horizon
# this isn't too many lines to forget for a while. observed while
# developing these tests that locally the value is a bit more than what we