From 1e172230ce6e72667b743ecfba437986e934c7f3 Mon Sep 17 00:00:00 2001 From: anastasia Date: Wed, 1 Sep 2021 12:09:02 +0300 Subject: [PATCH] Add test funciton to compare files in compute nodes to catch bugs in SLRU replay. Compare files in existing compute node's pgdata with fresh basebackup at the same lsn. We expect that content is identical, except tmp files Use it after some tests. --- .../batch_others/test_clog_truncate.py | 2 +- test_runner/batch_others/test_createdropdb.py | 7 ++- test_runner/batch_others/test_multixact.py | 5 +- .../batch_pg_regress/test_pg_regress.py | 10 +++- .../batch_pg_regress/test_zenith_regress.py | 9 ++- test_runner/fixtures/zenith_fixtures.py | 56 +++++++++++++++++++ 6 files changed, 84 insertions(+), 5 deletions(-) diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py index e9233986e4..0ae8597e5d 100644 --- a/test_runner/batch_others/test_clog_truncate.py +++ b/test_runner/batch_others/test_clog_truncate.py @@ -3,7 +3,7 @@ import os from contextlib import closing -from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver +from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content pytest_plugins = ("fixtures.zenith_fixtures") diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py index 25ab2e8111..56369e7242 100644 --- a/test_runner/batch_others/test_createdropdb.py +++ b/test_runner/batch_others/test_createdropdb.py @@ -2,7 +2,7 @@ import os import pathlib from contextlib import closing -from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory, ZenithCli +from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory, ZenithCli, check_restored_datadir_content pytest_plugins = ("fixtures.zenith_fixtures") @@ -69,6 +69,8 @@ def test_dropdb( with conn.cursor() as cur: cur.execute('DROP DATABASE foodb') + cur.execute('CHECKPOINT') + cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_after_drop = cur.fetchone()[0] @@ -94,3 +96,6 @@ def test_dropdb( print(dbpath) assert os.path.isdir(dbpath) == False + + # Check that we restore the content of the datadir correctly + check_restored_datadir_content(zenith_cli, pg, lsn_after_drop, postgres) diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/batch_others/test_multixact.py index 49c6e4aa66..1919e319a8 100644 --- a/test_runner/batch_others/test_multixact.py +++ b/test_runner/batch_others/test_multixact.py @@ -1,4 +1,4 @@ -from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver +from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content pytest_plugins = ("fixtures.zenith_fixtures") @@ -63,3 +63,6 @@ def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_b # Check that we restored pg_controlfile correctly assert next_multixact_id_new == next_multixact_id + + # Check that we restore the content of the datadir correctly + check_restored_datadir_content(zenith_cli, pg, lsn, postgres) diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py index e45f3c199c..32b056a330 100644 --- a/test_runner/batch_pg_regress/test_pg_regress.py +++ b/test_runner/batch_pg_regress/test_pg_regress.py @@ -1,7 +1,7 @@ import os from fixtures.utils import mkdir_if_needed -from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver +from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content pytest_plugins = ("fixtures.zenith_fixtures") @@ -49,3 +49,11 @@ def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_ # logs the exact same data to `regression.out` anyway. with capsys.disabled(): pg_bin.run(pg_regress_command, env=env, cwd=runpath) + + # checkpoint one more time to ensure that the lsn we get is the latest one + pg.safe_psql('CHECKPOINT') + lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0] + + # Check that we restore the content of the datadir correctly + # FIXME Now it fails on pg_xact for some reason + # check_restored_datadir_content(zenith_cli, pg, lsn, postgres) diff --git a/test_runner/batch_pg_regress/test_zenith_regress.py b/test_runner/batch_pg_regress/test_zenith_regress.py index ccda49295b..ffc43fd570 100644 --- a/test_runner/batch_pg_regress/test_zenith_regress.py +++ b/test_runner/batch_pg_regress/test_zenith_regress.py @@ -1,7 +1,7 @@ import os from fixtures.utils import mkdir_if_needed -from fixtures.zenith_fixtures import PostgresFactory +from fixtures.zenith_fixtures import PostgresFactory, check_restored_datadir_content pytest_plugins = ("fixtures.zenith_fixtures") @@ -50,3 +50,10 @@ def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_outp # logs the exact same data to `regression.out` anyway. with capsys.disabled(): pg_bin.run(pg_regress_command, env=env, cwd=runpath) + + # checkpoint one more time to ensure that the lsn we get is the latest one + pg.safe_psql('CHECKPOINT') + lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0] + + # Check that we restore the content of the datadir correctly + check_restored_datadir_content(zenith_cli, pg, lsn, postgres) diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py index a64f4720bd..6665a4ec8d 100644 --- a/test_runner/fixtures/zenith_fixtures.py +++ b/test_runner/fixtures/zenith_fixtures.py @@ -10,6 +10,7 @@ import shutil import signal import subprocess import time +import filecmp from contextlib import closing from pathlib import Path @@ -597,6 +598,23 @@ class Postgres(PgProtocol): self.stop() + def list_files_to_compare(self): + pgdata_files = [] + for root, _file, filenames in os.walk(self.pgdata_dir): + for filename in filenames: + rel_dir = os.path.relpath(root, self.pgdata_dir) + # Skip some dirs and files we don't want to compare + skip_dirs = ['pg_wal', 'pg_stat', 'pg_stat_tmp', 'pg_subtrans', 'pg_logical'] + skip_files = ['pg_internal.init', 'pg.log', 'zenith.signal', 'postgresql.conf', + 'postmaster.opts', 'postmaster.pid', 'pg_control'] + if rel_dir not in skip_dirs and filename not in skip_files: + rel_file = os.path.join(rel_dir, filename) + pgdata_files.append(rel_file) + + pgdata_files.sort() + print(pgdata_files) + return pgdata_files + class PostgresFactory: """ An object representing multiple running postgres daemons. """ def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, initial_tenant: str, base_port: int = 55431): @@ -924,3 +942,41 @@ class TenantFactory: @zenfixture def tenant_factory(zenith_cli: ZenithCli): return TenantFactory(zenith_cli) + + +# pg is the existing comute node we want to compare our basebackup to +# lsn is the latest lsn of this node +def check_restored_datadir_content(zenith_cli, pg, lsn, postgres: PostgresFactory): + # stop postgres to ensure that files won't change + pg.stop() + + # list files we're going to compare + pgdata_files = pg.list_files_to_compare() + + # create new branch, but don't start postgres + # We only need 'basebackup' result here. + zenith_cli.run( + ["branch", "new", pg.branch + "@" + lsn]) + + pg2 = postgres.create('new') + print('postgres is created on new branch') + + print('files in a basebackup') + # list files we're going to compare + pgdata_files2 = pg2.list_files_to_compare() + + # check that file sets are equal + assert pgdata_files == pgdata_files2 + + # compare content of the files + # filecmp returns (match, mismatch, error) lists + # We've already filtered all mismatching files in list_files_to_compare(), + # so here expect that the content is identical + (match, mismatch, error) = filecmp.cmpfiles(pg.pgdata_dir, + pg2.pgdata_dir, + pgdata_files, + shallow=False) + print('filecmp result mismatch and error lists:') + print(mismatch) + print(error) + assert (mismatch, error) == ([], [])