From c222d96bae10f3dae8cab2a4d1b5e209185b3c73 Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Mon, 4 Jul 2022 19:21:32 -0400 Subject: [PATCH] Clean up --- .../batch_others/test_complete_basebackup.py | 213 +++++++++++------- 1 file changed, 127 insertions(+), 86 deletions(-) diff --git a/test_runner/batch_others/test_complete_basebackup.py b/test_runner/batch_others/test_complete_basebackup.py index 6204c3f79e..c8357e463f 100644 --- a/test_runner/batch_others/test_complete_basebackup.py +++ b/test_runner/batch_others/test_complete_basebackup.py @@ -3,110 +3,151 @@ from fixtures.utils import mkdir_if_needed, subprocess_capture import os import shutil from pathlib import Path +import tempfile -def get_rel_paths(pg_bin, restored_dir): +def get_rel_paths(log_dir, pg_bin, base_tar): """Yeild list of relation paths""" - port = "55439" # Probably free - with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg: - vanilla_pg.configure([f"port={port}"]) - vanilla_pg.start() + with tempfile.TemporaryDirectory() as restored_dir: + # Unpack the base tar + subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir]) - # Create database based on template0 because we can't connect to template0 - # TODO clean up afterwards - query = "create database template0copy template template0" - vanilla_pg.safe_psql(query, user="cloud_admin") - vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin") + port = "55439" # Probably free + with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg: + vanilla_pg.configure([f"port={port}"]) + vanilla_pg.start() - # Get all databases - query = "select oid, datname from pg_database" - oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin") - print(oid_dbname_pairs) - template0_oid = [ - oid - for (oid, database) in oid_dbname_pairs - if database == "template0" - ][0] + # Create database based on template0 because we can't connect to template0 + query = "create database template0copy template template0" + vanilla_pg.safe_psql(query, user="cloud_admin") + vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin") - # Get rel paths for each database - for oid, database in oid_dbname_pairs: - if database == "template0": - # We can't connect to template0 - continue + # Get all databases + query = "select oid, datname from pg_database" + oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin") + template0_oid = [ + oid + for (oid, database) in oid_dbname_pairs + if database == "template0" + ][0] - query = "select relname, pg_relation_filepath(oid) from pg_class" - result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database) - for relname, filepath in result: - if filepath is not None: - yield filepath + # Get rel paths for each database + for oid, database in oid_dbname_pairs: + if database == "template0": + # We can't connect to template0 + continue - # Add all template0copy paths to template0 - # TODO maybe just copying from template1 would be good enough - if database == "template0copy": - prefix = f"base/{oid}/" - if filepath.startswith(prefix): - suffix = filepath.removeprefix(prefix) - yield f"base/{template0_oid}/{suffix}" - elif filepath.startswith("global"): - print(f"skipping {database} global file {filepath}") + query = "select relname, pg_relation_filepath(oid) from pg_class" + result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database) + for relname, filepath in result: + if filepath is not None: + + if database == "template0copy": + # Add all template0copy paths to template0 + prefix = f"base/{oid}/" + if filepath.startswith(prefix): + suffix = filepath[len(prefix):] + yield f"base/{template0_oid}/{suffix}" + elif filepath.startswith("global"): + print(f"skipping {database} global file {filepath}") + else: + raise AssertionError else: - raise AssertionError + yield filepath + + +def pack_base(log_dir, restored_dir, output_tar): + tmp_tar_name = "tmp.tar" + tmp_tar_path = os.path.join(restored_dir, tmp_tar_name) + cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir) + subprocess_capture(log_dir, cmd, cwd=restored_dir) + shutil.move(tmp_tar_path, output_tar) + + +def get_files_in_tar(log_dir, tar): + with tempfile.TemporaryDirectory() as restored_dir: + # Unpack the base tar + subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir]) + + # Find empty files + empty_files = [] + for root, dirs, files in os.walk(restored_dir): + for name in files: + file_path = os.path.join(root, name) + yield file_path[len(restored_dir) + 1:] + + +def corrupt(log_dir, base_tar, output_tar): + """Remove all empty files and repackage. Return paths of files removed.""" + with tempfile.TemporaryDirectory() as restored_dir: + # Unpack the base tar + subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir]) + + # Find empty files + empty_files = [] + for root, dirs, files in os.walk(restored_dir): + for name in files: + file_path = os.path.join(root, name) + file_size = os.path.getsize(file_path) + if file_size == 0: + empty_files.append(file_path) + + # Delete empty files (just to see if they get recreated) + for empty_file in empty_files: + os.remove(empty_file) + + # Repackage + pack_base(log_dir, restored_dir, output_tar) + + # Return relative paths + return { + empty_file[len(restored_dir) + 1:] + for empty_file in empty_files + } + + +def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths): + with tempfile.TemporaryDirectory() as restored_dir: + # Unpack the base tar + subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir]) + + # Touch files that don't exist + for path in paths: + absolute_path = os.path.join(restored_dir, path) + exists = os.path.exists(absolute_path) + if not exists: + Path(absolute_path).touch() + + # Repackage + pack_base(log_dir, restored_dir, output_tar) def test_complete(pg_bin): # Specify directories + # TODO make this a standalone script, with these as inputs work_dir = "/home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/" base_tar = os.path.join(work_dir, "psql_2.stdout") output_tar = os.path.join(work_dir, "psql_2-completed.stdout") - # Unpack the base tar - restored_dir = os.path.join(work_dir, "restored") - shutil.rmtree(restored_dir, ignore_errors=True) - os.mkdir(restored_dir, 0o750) - subprocess_capture(work_dir, ["tar", "-xf", base_tar, "-C", restored_dir]) + # Create new base tar with missing empty files + corrupt_tar = os.path.join(work_dir, "psql_2-corrupted.stdout") + deleted_files = corrupt(work_dir, base_tar, corrupt_tar) + assert len(set(get_files_in_tar(work_dir, base_tar)) - + set(get_files_in_tar(work_dir, corrupt_tar))) > 0 - # Find empty files - empty_files = [] - for root, dirs, files in os.walk(restored_dir): - for name in files: - file_path = os.path.join(root, name) - file_size = os.path.getsize(file_path) - if file_size == 0: - empty_files.append(file_path) - - # Delete empty files (just to see if they get recreated) - for empty_file in empty_files: - os.remove(empty_file) - - # Get the nodes - paths = list(get_rel_paths(pg_bin, restored_dir)) - - # Touch files that don't exist - for path in paths: - absolute_path = os.path.join(restored_dir, path) - exists = os.path.exists(absolute_path) - if not exists: - print("Touching file ", absolute_path) - Path(absolute_path).touch() - - # Check that all deleted files are back - unimportant_files = { + # Reconstruct paths from the corrupted tar, assert it covers everything important + reconstructed_paths = set(get_rel_paths(work_dir, pg_bin, corrupt_tar)) + paths_missed = deleted_files - reconstructed_paths + assert paths_missed.issubset({ "postgresql.auto.conf", "pg_ident.conf", - } - for empty_file in empty_files: - file_name = os.path.basename(empty_file) - if file_name in unimportant_files: - continue + }) - exists = os.path.exists(empty_file) - if not exists: - print(f"Deleted empty file {empty_file} was not recreated") - assert(exists) - - # Pack completed tar, being careful to preserve relative file names - tmp_tar_name = "tmp.tar" - tmp_tar_path = os.path.join(restored_dir, tmp_tar_name) - cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir) - subprocess_capture(work_dir, cmd, cwd=restored_dir) - shutil.move(tmp_tar_path, output_tar) + # Recreate the correct tar by touching files, compare with original tar + touch_missing_rels(work_dir, corrupt_tar, output_tar, reconstructed_paths) + paths_missed = (set(get_files_in_tar(work_dir, base_tar)) - + set(get_files_in_tar(work_dir, output_tar))) + assert paths_missed.issubset({ + "postgresql.auto.conf", + "pg_ident.conf", + })