From f7efbb2d426f0fca8b26b17e0b8c21eb5898d038 Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Tue, 5 Jul 2022 17:12:16 -0400 Subject: [PATCH] WIP relocation test --- scripts/export_import_betwen_pageservers.py | 9 ++-- .../batch_others/test_tenant_relocation.py | 45 +++++++++++++------ 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/scripts/export_import_betwen_pageservers.py b/scripts/export_import_betwen_pageservers.py index 62b7ff9f4c..153a8e07d3 100755 --- a/scripts/export_import_betwen_pageservers.py +++ b/scripts/export_import_betwen_pageservers.py @@ -110,21 +110,20 @@ def main(args: argparse.Namespace): if args.only_import is False: query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}" - cmd = ["psql", "--no-psqlrc", old_pageserver_connstr, "-c", query] + cmd = [args.psql_path, "--no-psqlrc", old_pageserver_connstr, "-c", query] print(f"Running: {cmd}") tar_filename = path.join(basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar") - incomplete_tar_filename = tar_filename + ".incomplete" stderr_filename = path.join( basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr") - with open(incomplete_tar_filename, 'w') as stdout_f: + with open(tar_filename, 'w') as stdout_f: with open(stderr_filename, 'w') as stderr_f: - print(f"(capturing output to {incomplete_filename})") + print(f"(capturing output to {tar_filename})") subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env) - add_missing_emtpy_rels(incomplete_tar_filename, tar_filename) + # add_missing_emtpy_rels(incomplete_tar_filename, tar_filename) print(f"Done export: {tar_filename}") diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/batch_others/test_tenant_relocation.py index 0239b17494..de2823f86a 100644 --- a/test_runner/batch_others/test_tenant_relocation.py +++ b/test_runner/batch_others/test_tenant_relocation.py @@ -11,7 +11,7 @@ import signal import pytest from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir -from fixtures.utils import lsn_from_hex +from fixtures.utils import lsn_from_hex, subprocess_capture def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float): @@ -184,19 +184,38 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, new_pageserver_http_port, neon_env_builder.broker): - # call to attach timeline to new pageserver - new_pageserver_http.timeline_attach(tenant, timeline) - # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint - new_timeline_detail = wait_until( - number_of_iterations=5, - interval=1, - func=lambda: assert_local(new_pageserver_http, tenant, timeline)) + # Migrate either by attacking from s3 or import/export basebackup + relocation_method = "import" + if relocation_method == "import": + scripts_dir = "/home/bojan/src/neondatabase/neon/scripts/" + cmd = [ + "python", + os.path.join(scripts_dir, "export_import_betwen_pageservers.py"), + "--tenant-id", tenant.hex, + "--from-host", "localhost", + "--from-http-port", str(pageserver_http.port), + "--from-pg-port", str(env.pageserver.service_port.pg), + "--to-host", "localhost", + "--to-http-port", str(new_pageserver_http_port), + "--to-pg-port", str(new_pageserver_pg_port), + "--psql-path", os.path.join(pg_distrib_dir, "bin", "psql"), + ] + subprocess_capture(env.repo_dir, cmd, check=True) + elif relocation_method == "attach": + # call to attach timeline to new pageserver + new_pageserver_http.timeline_attach(tenant, timeline) - # when load is active these checks can break because lsns are not static - # so lets check with some margin - assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), - lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']), - 0.03) + # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint + new_timeline_detail = wait_until( + number_of_iterations=5, + interval=1, + func=lambda: assert_local(new_pageserver_http, tenant, timeline)) + + # when load is active these checks can break because lsns are not static + # so lets check with some margin + assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), + lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']), + 0.03) tenant_pg.stop()