diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 73f224039e..c4b3d057f8 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1669,7 +1669,7 @@ class AbstractNeonCli(abc.ABC):
             timeout=timeout,
         )
         if not res.returncode:
-            log.info(f"Run success: {res.stdout}")
+            log.info(f"Run {res.args} success: {res.stdout}")
         elif check_return_code:
             # this way command output will be in recorded and shown in CI in failure message
             msg = f"""\
@@ -3463,6 +3463,14 @@ def wait_for_last_flush_lsn(
     return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
 
 
+def wait_for_wal_insert_lsn(
+    env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
+) -> Lsn:
+    """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
+    last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
+    return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
+
+
 def fork_at_current_lsn(
     env: NeonEnv,
     pg: Postgres,
diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py
index 8c2996f491..a4b5f7739a 100644
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -3,8 +3,15 @@ from typing import List, Tuple
 
 import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, wait_for_last_flush_lsn
-from fixtures.types import Lsn
+from fixtures.neon_fixtures import (
+    NeonEnv,
+    NeonEnvBuilder,
+    PageserverHttpClient,
+    Postgres,
+    wait_for_last_flush_lsn,
+    wait_for_wal_insert_lsn,
+)
+from fixtures.types import Lsn, TenantId, TimelineId
 
 
 def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
@@ -324,7 +331,7 @@ def test_single_branch_get_tenant_size_grows(
     # inserts is larger than gc_horizon. for example 0x20000 here hid the fact
     # that there next_gc_cutoff could be smaller than initdb_lsn, which will
     # obviously lead to issues when calculating the size.
-    gc_horizon = 0x30000
+    gc_horizon = 0x38000
     neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
 
     env = neon_env_builder.init_start()
@@ -334,29 +341,75 @@ def test_single_branch_get_tenant_size_grows(
 
     http_client = env.pageserver.http_client()
 
-    collected_responses: List[Tuple[Lsn, int]] = []
+    collected_responses: List[Tuple[str, Lsn, int]] = []
 
     size_debug_file = open(test_output_dir / "size_debug.html", "w")
 
-    def check_size_change(current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev: int):
-        if current_lsn - initdb_lsn > gc_horizon:
+    def check_size_change(
+        current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int
+    ):
+        if current_lsn - initdb_lsn >= gc_horizon:
             assert (
-                size >= prev
+                size >= prev_size
             ), "tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size"
         else:
             assert (
-                size > prev
+                size > prev_size
             ), "tenant_size should grow, because we continue to add WAL to initial snapshot size"
 
-    with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg:
-        initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+    def get_current_consistent_size(
+        env: NeonEnv,
+        pg: Postgres,
+        size_debug_file,  # apparently there is no public signature for open()...
+        http_client: PageserverHttpClient,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Tuple[Lsn, int]:
+        consistent = False
+        size_debug = None
+
+        current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
+        # We want to make sure we have a self-consistent set of values.
+        # Size changes with WAL, so only if both before and after getting
+        # the size of the tenant reports the same WAL insert LSN, we're OK
+        # to use that (size, LSN) combination.
+        # Note that 'wait_for_wal_flush_lsn' is not accurate enough: There
+        # can be more wal after the flush LSN that can arrive on the
+        # pageserver before we're requesting the page size.
+        # Anyway, in general this is only one iteration, so in general
+        # this is fine.
+        while not consistent:
+            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            size_debug = http_client.tenant_size_debug(tenant_id)
+
+            after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
+            consistent = current_lsn == after_lsn
+            current_lsn = after_lsn
+        size_debug_file.write(size_debug)
+        return (current_lsn, size)
+
+    with env.postgres.create_start(
+        branch_name,
+        tenant_id=tenant_id,
+        ### autovacuum is disabled to limit WAL logging.
+        config_lines=["autovacuum=off"],
+    ) as pg:
+        (initdb_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        collected_responses.append(("INITDB", initdb_lsn, size))
+
         with pg.cursor() as cur:
-            cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL)")
+            cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)")
+
+        (current_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        collected_responses.append(("CREATE", current_lsn, size))
 
         batch_size = 100
 
-        i = 0
-        while True:
+        for i in range(3):
             with pg.cursor() as cur:
                 cur.execute(
                     f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)",
@@ -365,27 +418,24 @@ def test_single_branch_get_tenant_size_grows(
 
             i += 1
 
-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )
 
-            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            prev_size = collected_responses[-1][2]
+            if size == 0:
+                assert prev_size == 0
+            else:
+                # branch start shouldn't be past gc_horizon yet
+                # thus the size should grow as we insert more data
+                # "gc_horizon" is tuned so that it kicks in _after_ the
+                # insert phase, but before the update phase ends.
+                assert (
+                    current_lsn - initdb_lsn <= gc_horizon
+                ), "Tuning of GC window is likely out-of-date"
+                assert size > prev_size
 
-            size_debug = http_client.tenant_size_debug(tenant_id)
-            size_debug_file.write(size_debug)
-
-            if len(collected_responses) > 0:
-                prev = collected_responses[-1][1]
-                if size == 0:
-                    assert prev == 0
-                else:
-                    # branch start shouldn't be past gc_horizon yet
-                    # thus the size should grow as we insert more data
-                    assert current_lsn - initdb_lsn <= gc_horizon
-                    assert size > prev
-
-            collected_responses.append((current_lsn, size))
-
-            if len(collected_responses) > 2:
-                break
+            collected_responses.append(("INSERT", current_lsn, size))
 
         while True:
             with pg.cursor() as cur:
@@ -397,18 +447,15 @@ def test_single_branch_get_tenant_size_grows(
             if updated == 0:
                 break
 
-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )
 
-            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            prev_size = collected_responses[-1][2]
 
-            size_debug = http_client.tenant_size_debug(tenant_id)
-            size_debug_file.write(size_debug)
+            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
 
-            prev = collected_responses[-1][1]
-
-            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
-
-            collected_responses.append((current_lsn, size))
+            collected_responses.append(("UPDATE", current_lsn, size))
 
         while True:
             with pg.cursor() as cur:
@@ -418,40 +465,47 @@ def test_single_branch_get_tenant_size_grows(
             if deleted == 0:
                 break
 
-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )
 
-            size = http_client.tenant_size(tenant_id)
-            prev = collected_responses[-1][1]
+            prev_size = collected_responses[-1][2]
 
-            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
+            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
 
-            collected_responses.append((current_lsn, size))
+            collected_responses.append(("DELETE", current_lsn, size))
 
         with pg.cursor() as cur:
             cur.execute("DROP TABLE t0")
 
-        current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+        # The size of the tenant should still be as large as before we dropped
+        # the table, because the drop operation can still be undone in the PITR
+        # defined by gc_horizon.
+        (current_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )
 
-        size = http_client.tenant_size(tenant_id)
-        prev = collected_responses[-1][1]
+        prev_size = collected_responses[-1][2]
 
-        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
+        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)
 
-        collected_responses.append((current_lsn, size))
+        collected_responses.append(("DROP", current_lsn, size))
 
     # this isn't too many lines to forget for a while. observed while
     # developing these tests that locally the value is a bit more than what we
     # get in the ci.
-    for lsn, size in collected_responses:
-        log.info(f"collected: {lsn}, {size}")
+    for phase, lsn, size in collected_responses:
+        log.info(f"collected: {phase}, {lsn}, {size}")
 
     env.pageserver.stop()
     env.pageserver.start()
 
+    size_after = http_client.tenant_size(tenant_id)
+    size_debug = http_client.tenant_size_debug(tenant_id)
+    size_debug_file.write(size_debug)
     size_debug_file.close()
 
-    size_after = http_client.tenant_size(tenant_id)
-    prev = collected_responses[-1][1]
+    prev = collected_responses[-1][2]
 
     assert size_after == prev, "size after restarting pageserver should not have changed"
 
diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
index b44ee1d9a5..468d3c0824 160000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
@@ -1 +1 @@
-Subproject commit b44ee1d9a5b061ababb31f89a4e30a1795573f51
+Subproject commit 468d3c08245906f083fed1009759f9f953f5915d
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index 303fa4050f..9a2093383a 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit 303fa4050fafba3771052b3d49b8e2d00d6ea2e3
+Subproject commit 9a2093383ae19906f025b008ceecf89ebc9ea869