From bc1efa827f794fee9ac3755b4d51b344dd2cefbf Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 15 Mar 2024 16:07:36 +0000 Subject: [PATCH] pageserver: exclude gc_horizon from synthetic size calculation (#6407) ## Problem See: - https://github.com/neondatabase/neon/issues/6374 ## Summary of changes Whereas previously we calculated synthetic size from the gc_horizon or the pitr_interval (whichever is the lower LSN), now we ignore gc_horizon and exclusively start from the `pitr_interval`. This is a more generous calculation for billing, where we do not charge users for data retained due to gc_horizon. --- pageserver/src/tenant/size.rs | 8 +++- pageserver/src/tenant/timeline.rs | 5 ++- test_runner/regress/test_tenant_size.py | 54 +++++++++++++++++++------ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index e0b1652d98..ad79b74d8b 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -183,7 +183,13 @@ pub(super) async fn gather_inputs( // new gc run, which we have no control over. however differently from `Timeline::gc` // we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not // actually removing files. - let mut next_gc_cutoff = cmp::min(gc_info.horizon_cutoff, gc_info.pitr_cutoff); + // + // We only consider [`GcInfo::pitr_cutoff`], and not [`GcInfo::horizon_cutoff`], because from + // a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather + // than a space bound (horizon cutoff). This means that if someone drops a database and waits for their + // PITR interval, they will see synthetic size decrease, even if we are still storing data inside + // horizon_cutoff. + let mut next_gc_cutoff = gc_info.pitr_cutoff; // If the caller provided a shorter retention period, use that instead of the GC cutoff. let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d507a19de9..2ab7301cce 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3784,8 +3784,11 @@ impl Timeline { // The timestamp is in the future. That sounds impossible, // but what it really means is that there hasn't been // any commits since the cutoff timestamp. + // + // In this case we should use the LSN of the most recent commit, + // which is implicitly the last LSN in the log. debug!("future({})", lsn); - cutoff_horizon + self.get_last_record_lsn() } LsnForTimestamp::Past(lsn) => { debug!("past({})", lsn); diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 7cea301a9c..025cc930d7 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from typing import List, Tuple @@ -326,7 +327,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa size_debug_file.write(size_debug) -@pytest.mark.xfail +@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build") def test_single_branch_get_tenant_size_grows( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion ): @@ -349,10 +350,21 @@ def test_single_branch_get_tenant_size_grows( # adjust the gc_horizon accordingly. if pg_version == PgVersion.V14: gc_horizon = 0x4A000 + elif pg_version == PgVersion.V15: + gc_horizon = 0x3BA00 + elif pg_version == PgVersion.V16: + gc_horizon = 210000 + else: + raise NotImplementedError(pg_version) - neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}" + tenant_config = { + "compaction_period": "0s", + "gc_period": "0s", + "pitr_interval": "0s", + "gc_horizon": gc_horizon, + } - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf=tenant_config) tenant_id = env.initial_tenant branch_name, timeline_id = env.neon_cli.list_timelines(tenant_id)[0] @@ -405,6 +417,7 @@ def test_single_branch_get_tenant_size_grows( current_lsn = after_lsn size_debug_file.write(size_debug) assert size > 0 + log.info(f"size: {size} at lsn {current_lsn}") return (current_lsn, size) with env.endpoints.create_start( @@ -492,24 +505,41 @@ def test_single_branch_get_tenant_size_grows( collected_responses.append(("DELETE", current_lsn, size)) + size_before_drop = get_current_consistent_size( + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id + )[1] + with endpoint.cursor() as cur: cur.execute("DROP TABLE t0") + # Without setting a PITR interval, dropping the table doesn't reclaim any space + # from the user's point of view, because the DROP transaction is too small + # to fall out of gc_horizon. + (current_lsn, size) = get_current_consistent_size( + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id + ) + prev_size = collected_responses[-1][2] + check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size) + + # Set a tiny PITR interval to allow the DROP to impact the synthetic size + # Because synthetic size calculation uses pitr interval when available, + # when our tenant is configured with a tiny pitr interval, dropping a table should + # cause synthetic size to go down immediately + tenant_config["pitr_interval"] = "1ms" + env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config) + (current_lsn, size) = get_current_consistent_size( + env, endpoint, size_debug_file, http_client, tenant_id, timeline_id + ) + assert size < size_before_drop + # The size of the tenant should still be as large as before we dropped # the table, because the drop operation can still be undone in the PITR # defined by gc_horizon. - (current_lsn, size) = get_current_consistent_size( - env, endpoint, size_debug_file, http_client, tenant_id, timeline_id - ) - - prev_size = collected_responses[-1][2] - - check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size) - collected_responses.append(("DROP", current_lsn, size)) # Should have gone past gc_horizon, otherwise gc_horizon is too large - assert current_lsn - initdb_lsn > gc_horizon + bytes_written = current_lsn - initdb_lsn + assert bytes_written > gc_horizon # this isn't too many lines to forget for a while. observed while # developing these tests that locally the value is a bit more than what we