From e4898a6e605e791a00ce21bf49d4cc0d9a10534a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 20 Jan 2024 18:04:16 +0200 Subject: [PATCH] Don't pass InvalidTransactionId to update_next_xid. (#6410) update_next_xid() doesn't have any special treatment for the invalid or other special XIDs, so it will treat InvalidTransactionId (0) as a regular XID. If old nextXid is smaller than 2^31, 0 will look like a very old XID, and nothing happens. But if nextXid is greater than 2^31 0 will look like a very new XID, and update_next_xid() will incorrectly bump up nextXID. --- pageserver/src/walingest.rs | 11 +- test_runner/regress/test_next_xid.py | 172 ++++++++++++++++++++++++++- test_runner/regress/test_vm_bits.py | 3 + 3 files changed, 181 insertions(+), 5 deletions(-) diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 26de229e4d..f2c35436db 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -102,7 +102,9 @@ impl WalIngest { buf.advance(decoded.main_data_offset); assert!(!self.checkpoint_modified); - if self.checkpoint.update_next_xid(decoded.xl_xid) { + if decoded.xl_xid != pg_constants::INVALID_TRANSACTION_ID + && self.checkpoint.update_next_xid(decoded.xl_xid) + { self.checkpoint_modified = true; } @@ -330,8 +332,13 @@ impl WalIngest { < 0 { self.checkpoint.oldestXid = xlog_checkpoint.oldestXid; - self.checkpoint_modified = true; } + + // Write a new checkpoint key-value pair on every checkpoint record, even + // if nothing really changed. Not strictly required, but it seems nice to + // have some trace of the checkpoint records in the layer files at the same + // LSNs. + self.checkpoint_modified = true; } } pg_constants::RM_LOGICALMSG_ID => { diff --git a/test_runner/regress/test_next_xid.py b/test_runner/regress/test_next_xid.py index 6e94e15227..da2580dbf9 100644 --- a/test_runner/regress/test_next_xid.py +++ b/test_runner/regress/test_next_xid.py @@ -1,10 +1,18 @@ +import json +import os import time +from pathlib import Path -from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_wal_insert_lsn +from fixtures.pageserver.utils import ( + wait_for_last_record_lsn, +) +from fixtures.remote_storage import RemoteStorageKind +from fixtures.types import Lsn, TenantId, TimelineId +from fixtures.utils import query_scalar -# Test restarting page server, while safekeeper and compute node keep -# running. def test_next_xid(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() @@ -52,3 +60,161 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): cur = conn.cursor() cur.execute("SELECT count(*) FROM t") assert cur.fetchone() == (iterations,) + + +# Test for a bug we had, where nextXid was incorrectly updated when the +# XID counter reached 2 billion. The nextXid tracking logic incorrectly +# treated 0 (InvalidTransactionId) as a regular XID, and after reaching +# 2 billion, it started to look like a very new XID, which caused nextXid +# to be immediately advanced to the next epoch. +# +def test_import_at_2bil( + neon_env_builder: NeonEnvBuilder, + test_output_dir: Path, + pg_distrib_dir: Path, + pg_bin, + vanilla_pg, +): + neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) + env = neon_env_builder.init_start() + ps_http = env.pageserver.http_client() + + # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. + # PgBin sets it automatically, but here we need to pipe psql output to the tar command. + psql_env = {"LD_LIBRARY_PATH": str(pg_distrib_dir / "lib")} + + # Reset the vanilla Postgres instance to somewhat before 2 billion transactions. + pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal") + cmd = [pg_resetwal_path, "--next-transaction-id=2129920000", "-D", str(vanilla_pg.pgdatadir)] + pg_bin.run_capture(cmd, env=psql_env) + + vanilla_pg.start() + vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser") + vanilla_pg.safe_psql( + """create table tt as select 'long string to consume some space' || g + from generate_series(1,300000) g""" + ) + assert vanilla_pg.safe_psql("select count(*) from tt") == [(300000,)] + vanilla_pg.safe_psql("CREATE TABLE t (t text);") + vanilla_pg.safe_psql("INSERT INTO t VALUES ('inserted in vanilla')") + + endpoint_id = "ep-import_from_vanilla" + tenant = TenantId.generate() + timeline = TimelineId.generate() + + env.pageserver.tenant_create(tenant) + + # Take basebackup + basebackup_dir = os.path.join(test_output_dir, "basebackup") + base_tar = os.path.join(basebackup_dir, "base.tar") + wal_tar = os.path.join(basebackup_dir, "pg_wal.tar") + os.mkdir(basebackup_dir) + vanilla_pg.safe_psql("CHECKPOINT") + pg_bin.run( + [ + "pg_basebackup", + "-F", + "tar", + "-d", + vanilla_pg.connstr(), + "-D", + basebackup_dir, + ] + ) + + # Get start_lsn and end_lsn + with open(os.path.join(basebackup_dir, "backup_manifest")) as f: + manifest = json.load(f) + start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"] + end_lsn = manifest["WAL-Ranges"][0]["End-LSN"] + + def import_tar(base, wal): + env.neon_cli.raw_cli( + [ + "timeline", + "import", + "--tenant-id", + str(tenant), + "--timeline-id", + str(timeline), + "--node-name", + endpoint_id, + "--base-lsn", + start_lsn, + "--base-tarfile", + base, + "--end-lsn", + end_lsn, + "--wal-tarfile", + wal, + "--pg-version", + env.pg_version, + ] + ) + + # Importing correct backup works + import_tar(base_tar, wal_tar) + wait_for_last_record_lsn(ps_http, tenant, timeline, Lsn(end_lsn)) + + endpoint = env.endpoints.create_start( + endpoint_id, + tenant_id=tenant, + config_lines=[ + "log_autovacuum_min_duration = 0", + "autovacuum_naptime='5 s'", + ], + ) + assert endpoint.safe_psql("select count(*) from t") == [(1,)] + + # Ok, consume + conn = endpoint.connect() + cur = conn.cursor() + + # Install extension containing function needed for test + cur.execute("CREATE EXTENSION neon_test_utils") + + # Advance nextXid close to 2 billion XIDs + while True: + xid = int(query_scalar(cur, "SELECT txid_current()")) + log.info(f"xid now {xid}") + # Consume 10k transactons at a time until we get to 2^31 - 200k + if xid < 2 * 1024 * 1024 * 1024 - 100000: + cur.execute("select test_consume_xids(50000);") + elif xid < 2 * 1024 * 1024 * 1024 - 10000: + cur.execute("select test_consume_xids(5000);") + else: + break + + # Run a bunch of real INSERTs to cross over the 2 billion mark + # Use a begin-exception block to have a separate sub-XID for each insert. + cur.execute( + """ + do $$ + begin + for i in 1..10000 loop + -- Use a begin-exception block to generate a new subtransaction on each iteration + begin + insert into t values (i); + exception when others then + raise 'not expected %', sqlerrm; + end; + end loop; + end; + $$; + """ + ) + # A checkpoint writes a WAL record with xl_xid=0. Many other WAL + # records would have the same effect. + cur.execute("checkpoint") + + # wait until pageserver receives that data + wait_for_wal_insert_lsn(env, endpoint, tenant, timeline) + + # Restart endpoint + endpoint.stop() + endpoint.start() + + conn = endpoint.connect() + cur = conn.cursor() + cur.execute("SELECT count(*) from t") + assert cur.fetchone() == (10000 + 1,) diff --git a/test_runner/regress/test_vm_bits.py b/test_runner/regress/test_vm_bits.py index bc810ceb09..415f086bd3 100644 --- a/test_runner/regress/test_vm_bits.py +++ b/test_runner/regress/test_vm_bits.py @@ -1,3 +1,4 @@ +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn @@ -117,6 +118,8 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): # Test that the ALL_FROZEN VM bit is cleared correctly at a HEAP_LOCK # record. # +# FIXME: This test is broken +@pytest.mark.skip("See https://github.com/neondatabase/neon/pull/6412#issuecomment-1902072541") def test_vm_bit_clear_on_heap_lock(neon_simple_env: NeonEnv): env = neon_simple_env