Compare commits

...

10 Commits

Author SHA1 Message Date
Bojan Serafimov
98bf3e7136 Clean up 2022-07-04 20:11:08 -04:00
Bojan Serafimov
c222d96bae Clean up 2022-07-04 19:21:32 -04:00
Bojan Serafimov
b86e89d659 use template0copy, should work now 2022-07-04 16:41:52 -04:00
Bojan Serafimov
20694a9285 hardcode template0 files 2022-07-04 13:21:25 -04:00
Bojan Serafimov
5587b3a27c Check all dbs 2022-07-04 13:09:50 -04:00
Bojan Serafimov
36d8b3b640 Add assertion 2022-07-04 12:01:53 -04:00
Bojan Serafimov
3581759e11 Use pg_relation_filepath 2022-07-04 11:45:22 -04:00
Bojan Serafimov
7e9bcaca54 WIP 2022-07-01 16:29:31 -04:00
Bojan Serafimov
dd5e6436b5 WIP 2022-07-01 15:45:51 -04:00
Bojan Serafimov
a1c6dabd35 WIP 2022-07-01 14:45:47 -04:00

View File

@@ -0,0 +1,168 @@
from fixtures.neon_fixtures import VanillaPostgres
from fixtures.utils import subprocess_capture
import os
import shutil
from pathlib import Path
import tempfile
def get_rel_paths(log_dir, pg_bin, base_tar):
"""Yeild list of relation paths"""
with tempfile.TemporaryDirectory() as restored_dir:
# Unpack the base tar
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
port = "55439" # Probably free
with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
vanilla_pg.configure([f"port={port}"])
vanilla_pg.start()
# Create database based on template0 because we can't connect to template0
query = "create database template0copy template template0"
vanilla_pg.safe_psql(query, user="cloud_admin")
vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
# Get all databases
query = "select oid, datname from pg_database"
oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
template0_oid = [
oid
for (oid, database) in oid_dbname_pairs
if database == "template0"
][0]
# Get rel paths for each database
for oid, database in oid_dbname_pairs:
if database == "template0":
# We can't connect to template0
continue
query = "select relname, pg_relation_filepath(oid) from pg_class"
result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
for relname, filepath in result:
if filepath is not None:
if database == "template0copy":
# Add all template0copy paths to template0
prefix = f"base/{oid}/"
if filepath.startswith(prefix):
suffix = filepath[len(prefix):]
yield f"base/{template0_oid}/{suffix}"
elif filepath.startswith("global"):
print(f"skipping {database} global file {filepath}")
else:
raise AssertionError
else:
yield filepath
def pack_base(log_dir, restored_dir, output_tar):
tmp_tar_name = "tmp.tar"
tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
subprocess_capture(log_dir, cmd, cwd=restored_dir)
shutil.move(tmp_tar_path, output_tar)
def get_files_in_tar(log_dir, tar):
with tempfile.TemporaryDirectory() as restored_dir:
# Unpack the base tar
subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
# Find empty files
empty_files = []
for root, dirs, files in os.walk(restored_dir):
for name in files:
file_path = os.path.join(root, name)
yield file_path[len(restored_dir) + 1:]
def corrupt(log_dir, base_tar, output_tar):
"""Remove all empty files and repackage. Return paths of files removed."""
with tempfile.TemporaryDirectory() as restored_dir:
# Unpack the base tar
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
# Find empty files
empty_files = []
for root, dirs, files in os.walk(restored_dir):
for name in files:
file_path = os.path.join(root, name)
file_size = os.path.getsize(file_path)
if file_size == 0:
empty_files.append(file_path)
# Delete empty files (just to see if they get recreated)
for empty_file in empty_files:
os.remove(empty_file)
# Repackage
pack_base(log_dir, restored_dir, output_tar)
# Return relative paths
return {
empty_file[len(restored_dir) + 1:]
for empty_file in empty_files
}
def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
with tempfile.TemporaryDirectory() as restored_dir:
# Unpack the base tar
subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
# Touch files that don't exist
for path in paths:
absolute_path = os.path.join(restored_dir, path)
exists = os.path.exists(absolute_path)
if not exists:
print("File {absolute_path} didn't exist. Creating..")
Path(absolute_path).touch()
# Repackage
pack_base(log_dir, restored_dir, output_tar)
def test_complete(test_output_dir, pg_bin):
# Specify directories
# TODO make a basebackup instead of using one from another test
work_dir = "/home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/"
base_tar = os.path.join(work_dir, "psql_2.stdout")
output_tar = os.path.join(work_dir, "psql_2-completed.stdout")
# Create new base tar with missing empty files
corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
# Reconstruct paths from the corrupted tar, assert it covers everything important
reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
paths_missed = deleted_files - reconstructed_paths
assert paths_missed.issubset({
"postgresql.auto.conf",
"pg_ident.conf",
})
# Recreate the correct tar by touching files, compare with original tar
touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
set(get_files_in_tar(test_output_dir, output_tar)))
assert paths_missed.issubset({
"postgresql.auto.conf",
"pg_ident.conf",
})
# HACK this script relies on test fixtures, but you can run it with
# poetry run pytest -k test_main_hack and pass inputs via envvars
#
# The script takes a base tar, infers what empty rel files might be missing
# and creates a new base tar with those files included. It does not modify
# the original file.
def test_main_hack(test_output_dir, pg_bin):
base_tar = os.environ['INPUT_BASE_TAR']
output_tar = os.environ['OUTPUT_BASE_TAR']
reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, base_tar))
touch_missing_rels(test_output_dir, base_tar, output_tar, reconstructed_paths)