WIP relocation test

This commit is contained in:
Bojan Serafimov
2022-07-05 17:12:16 -04:00
parent 0aff7c9ee9
commit f7efbb2d42
2 changed files with 36 additions and 18 deletions

View File

@@ -110,21 +110,20 @@ def main(args: argparse.Namespace):
if args.only_import is False:
query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}"
cmd = ["psql", "--no-psqlrc", old_pageserver_connstr, "-c", query]
cmd = [args.psql_path, "--no-psqlrc", old_pageserver_connstr, "-c", query]
print(f"Running: {cmd}")
tar_filename = path.join(basepath,
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
incomplete_tar_filename = tar_filename + ".incomplete"
stderr_filename = path.join(
basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
with open(incomplete_tar_filename, 'w') as stdout_f:
with open(tar_filename, 'w') as stdout_f:
with open(stderr_filename, 'w') as stderr_f:
print(f"(capturing output to {incomplete_filename})")
print(f"(capturing output to {tar_filename})")
subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env)
add_missing_emtpy_rels(incomplete_tar_filename, tar_filename)
# add_missing_emtpy_rels(incomplete_tar_filename, tar_filename)
print(f"Done export: {tar_filename}")

View File

@@ -11,7 +11,7 @@ import signal
import pytest
from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
from fixtures.utils import lsn_from_hex
from fixtures.utils import lsn_from_hex, subprocess_capture
def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
@@ -184,19 +184,38 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
new_pageserver_http_port,
neon_env_builder.broker):
# call to attach timeline to new pageserver
new_pageserver_http.timeline_attach(tenant, timeline)
# new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
new_timeline_detail = wait_until(
number_of_iterations=5,
interval=1,
func=lambda: assert_local(new_pageserver_http, tenant, timeline))
# Migrate either by attacking from s3 or import/export basebackup
relocation_method = "import"
if relocation_method == "import":
scripts_dir = "/home/bojan/src/neondatabase/neon/scripts/"
cmd = [
"python",
os.path.join(scripts_dir, "export_import_betwen_pageservers.py"),
"--tenant-id", tenant.hex,
"--from-host", "localhost",
"--from-http-port", str(pageserver_http.port),
"--from-pg-port", str(env.pageserver.service_port.pg),
"--to-host", "localhost",
"--to-http-port", str(new_pageserver_http_port),
"--to-pg-port", str(new_pageserver_pg_port),
"--psql-path", os.path.join(pg_distrib_dir, "bin", "psql"),
]
subprocess_capture(env.repo_dir, cmd, check=True)
elif relocation_method == "attach":
# call to attach timeline to new pageserver
new_pageserver_http.timeline_attach(tenant, timeline)
# when load is active these checks can break because lsns are not static
# so lets check with some margin
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
0.03)
# new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
new_timeline_detail = wait_until(
number_of_iterations=5,
interval=1,
func=lambda: assert_local(new_pageserver_http, tenant, timeline))
# when load is active these checks can break because lsns are not static
# so lets check with some margin
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
0.03)
tenant_pg.stop()