pageserver: exclude gc_horizon from synthetic size calculation (#6407)

## Problem See: - https://github.com/neondatabase/neon/issues/6374 ## Summary of changes Whereas previously we calculated synthetic size from the gc_horizon or the pitr_interval (whichever is the lower LSN), now we ignore gc_horizon and exclusively start from the `pitr_interval`. This is a more generous calculation for billing, where we do not charge users for data retained due to gc_horizon.
2026-01-10 23:12:54 +00:00 · 2024-03-15 16:07:36 +00:00
parent 67522ce83d
commit bc1efa827f
3 changed files with 53 additions and 14 deletions
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -183,7 +183,13 @@ pub(super) async fn gather_inputs(
        // new gc run, which we have no control over. however differently from `Timeline::gc`
        // we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not
        // actually removing files.
-        let mut next_gc_cutoff = cmp::min(gc_info.horizon_cutoff, gc_info.pitr_cutoff);
+        //
+        // We only consider [`GcInfo::pitr_cutoff`], and not [`GcInfo::horizon_cutoff`], because from
+        // a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather
+        // than a space bound (horizon cutoff).  This means that if someone drops a database and waits for their
+        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
+        // horizon_cutoff.
+        let mut next_gc_cutoff = gc_info.pitr_cutoff;

        // If the caller provided a shorter retention period, use that instead of the GC cutoff.
        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -3784,8 +3784,11 @@ impl Timeline {
                        // The timestamp is in the future. That sounds impossible,
                        // but what it really means is that there hasn't been
                        // any commits since the cutoff timestamp.
+                        //
+                        // In this case we should use the LSN of the most recent commit,
+                        // which is implicitly the last LSN in the log.
                        debug!("future({})", lsn);
-                        cutoff_horizon
+                        self.get_last_record_lsn()
                    }
                    LsnForTimestamp::Past(lsn) => {
                        debug!("past({})", lsn);
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from typing import List, Tuple

@@ -326,7 +327,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
    size_debug_file.write(size_debug)


-@pytest.mark.xfail
+@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
 def test_single_branch_get_tenant_size_grows(
    neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
 ):
@@ -349,10 +350,21 @@ def test_single_branch_get_tenant_size_grows(
    # adjust the gc_horizon accordingly.
    if pg_version == PgVersion.V14:
        gc_horizon = 0x4A000
+    elif pg_version == PgVersion.V15:
+        gc_horizon = 0x3BA00
+    elif pg_version == PgVersion.V16:
+        gc_horizon = 210000
+    else:
+        raise NotImplementedError(pg_version)

-    neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
+    tenant_config = {
+        "compaction_period": "0s",
+        "gc_period": "0s",
+        "pitr_interval": "0s",
+        "gc_horizon": gc_horizon,
+    }

-    env = neon_env_builder.init_start()
+    env = neon_env_builder.init_start(initial_tenant_conf=tenant_config)

    tenant_id = env.initial_tenant
    branch_name, timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
@@ -405,6 +417,7 @@ def test_single_branch_get_tenant_size_grows(
            current_lsn = after_lsn
        size_debug_file.write(size_debug)
        assert size > 0
+        log.info(f"size: {size} at lsn {current_lsn}")
        return (current_lsn, size)

    with env.endpoints.create_start(
@@ -492,24 +505,41 @@ def test_single_branch_get_tenant_size_grows(

            collected_responses.append(("DELETE", current_lsn, size))

+        size_before_drop = get_current_consistent_size(
+            env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
+        )[1]
+
        with endpoint.cursor() as cur:
            cur.execute("DROP TABLE t0")

+        # Without setting a PITR interval, dropping the table doesn't reclaim any space
+        # from the user's point of view, because the DROP transaction is too small
+        # to fall out of gc_horizon.
+        (current_lsn, size) = get_current_consistent_size(
+            env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        prev_size = collected_responses[-1][2]
+        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
+
+        # Set a tiny PITR interval to allow the DROP to impact the synthetic size
+        # Because synthetic size calculation uses pitr interval when available,
+        # when our tenant is configured with a tiny pitr interval, dropping a table should
+        # cause synthetic size to go down immediately
+        tenant_config["pitr_interval"] = "1ms"
+        env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config)
+        (current_lsn, size) = get_current_consistent_size(
+            env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        assert size < size_before_drop
+
        # The size of the tenant should still be as large as before we dropped
        # the table, because the drop operation can still be undone in the PITR
        # defined by gc_horizon.
-        (current_lsn, size) = get_current_consistent_size(
-            env, endpoint, size_debug_file, http_client, tenant_id, timeline_id
-        )
-
-        prev_size = collected_responses[-1][2]
-
-        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
-
        collected_responses.append(("DROP", current_lsn, size))

    # Should have gone past gc_horizon, otherwise gc_horizon is too large
-    assert current_lsn - initdb_lsn > gc_horizon
+    bytes_written = current_lsn - initdb_lsn
+    assert bytes_written > gc_horizon

    # this isn't too many lines to forget for a while. observed while
    # developing these tests that locally the value is a bit more than what we