From 9a32aa828d8f2b4ee5f84f81bb5cb3f6012bfeb5 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 21 Sep 2024 04:00:38 +0300 Subject: [PATCH] Fix init of WAL page header at startup (#8914) If the primary is started at an LSN within the first of a 16 MB WAL segment, the "long XLOG page header" at the beginning of the segment was not initialized correctly. That has gone unnnoticed, because under normal circumstances, nothing looks at the page header. The WAL that is streamed to the safekeepers starts at the new record's LSN, not at the beginning of the page, so that bogus page header didn't propagate elsewhere, and a primary server doesn't normally read the WAL its written. Which is good because the contents of the page would be bogus anyway, as it wouldn't contain any of the records before the LSN where the new record is written. Except that in the following cases a primary does read its own WAL: 1. When there are two-phase transactions in prepared state at checkpoint. The checkpointer reads the two-phase state from the XLOG_XACT_PREPARE record, and writes it to a file in pg_twophase/. 2. Logical decoding reads the WAL starting from the replication slot's restart LSN. This PR fixes the problem with two-phase transactions. For that, it's sufficient to initialize the page header correctly. The checkpointer only needs to read XLOG_XACT_PREPARE records that were generated after the server startup, so it's still OK that older WAL is missing / bogus. I have not investigated if we have a problem with logical decoding, however. Let's deal with that separately. Special thanks to @Lzjing-1997, who independently found the same bug and opened a PR to fix it, although I did not use that PR. --- test_runner/regress/test_twophase.py | 31 +++++++++++++++++++++++----- vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 +++---- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/test_runner/regress/test_twophase.py b/test_runner/regress/test_twophase.py index ebe65e7c29..75fab78d6e 100644 --- a/test_runner/regress/test_twophase.py +++ b/test_runner/regress/test_twophase.py @@ -8,6 +8,7 @@ from fixtures.neon_fixtures import ( PgBin, fork_at_current_lsn, import_timeline_from_vanilla_postgres, + wait_for_wal_insert_lsn, ) @@ -22,11 +23,6 @@ def twophase_test_on_timeline(env: NeonEnv): conn = endpoint.connect() cur = conn.cursor() - # FIXME: Switch to the next WAL segment, to work around the bug fixed in - # https://github.com/neondatabase/neon/pull/8914. When that is merged, this can be - # removed. - cur.execute("select pg_switch_wal()") - cur.execute("CREATE TABLE foo (t text)") # Prepare a transaction that will insert a row @@ -140,3 +136,28 @@ def test_twophase_nonzero_epoch( vanilla_pg.stop() # don't need the original server anymore twophase_test_on_timeline(env) + + +def test_twophase_at_wal_segment_start(neon_simple_env: NeonEnv): + """ + Same as 'test_twophase' test, but the server is started at an LSN at the beginning + of a WAL segment. We had a bug where we didn't initialize the "long XLOG page header" + at the beginning of the segment correctly, which was detected when the checkpointer + tried to read the XLOG_XACT_PREPARE record from the WAL, if that record was on the + very first page of a WAL segment and the server was started up at that first page. + """ + env = neon_simple_env + timeline_id = env.neon_cli.create_branch("test_twophase", "main") + + endpoint = env.endpoints.create_start( + "test_twophase", config_lines=["max_prepared_transactions=5"] + ) + endpoint.safe_psql("SELECT pg_switch_wal()") + + # to avoid hitting https://github.com/neondatabase/neon/issues/9079, wait for the + # WAL to reach the pageserver. + wait_for_wal_insert_lsn(env, endpoint, env.initial_tenant, timeline_id) + + endpoint.stop_and_destroy() + + twophase_test_on_timeline(env) diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index f9c51c1243..a38d15f323 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit f9c51c12438b20049b6905eb4e43d321defd6ff2 +Subproject commit a38d15f3233a4c07f2bf3335fcbd874dd1f4e386 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 1dbd6f3164..16c3c6b64f 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 1dbd6f316416c8360bbd4f3d6db956cf70937cf0 +Subproject commit 16c3c6b64f1420a367a2a9b2510f20d94f791af8 diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index d009084a74..1d7081a3b0 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit d009084a745cb4d5e6de222c778b2a562c8b2767 +Subproject commit 1d7081a3b076ddf5086e0b118d4329820e6a7427 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index dadd6fe208..2cf120e739 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit dadd6fe208bb906cc0a48980f2ab4e13c47ba3ad +Subproject commit 2cf120e7393ca5f537c6a38b457585576dc035fc diff --git a/vendor/revisions.json b/vendor/revisions.json index c93393970f..9f6512d03e 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17rc1", - "dadd6fe208bb906cc0a48980f2ab4e13c47ba3ad" + "2cf120e7393ca5f537c6a38b457585576dc035fc" ], "v16": [ "16.4", - "d009084a745cb4d5e6de222c778b2a562c8b2767" + "1d7081a3b076ddf5086e0b118d4329820e6a7427" ], "v15": [ "15.8", - "1dbd6f316416c8360bbd4f3d6db956cf70937cf0" + "16c3c6b64f1420a367a2a9b2510f20d94f791af8" ], "v14": [ "14.13", - "f9c51c12438b20049b6905eb4e43d321defd6ff2" + "a38d15f3233a4c07f2bf3335fcbd874dd1f4e386" ] }