tests: revise perf test that interfered with local disk state (#5682)

This benchmark started failing after #5580 merged.

It was manually deleting some local content on a pageserver, and
expecting the behavior that the pageserver would "forget" about the
timeline on startup as a result. That is no longer our behavior:
pageservers use the remote storage as the source of truth.

Rather than having the test go manually delete things at all, we can
just delete the whole tenant via the pageserver API, and thereby start
from a clean situation.
This commit is contained in:
John Spray
2023-10-27 09:23:49 +01:00
committed by GitHub
parent 71611f4ab3
commit 83567f9e4e

View File

@@ -1,7 +1,7 @@
import shutil
from contextlib import closing
from fixtures.compare_fixtures import NeonCompare, PgCompare
from fixtures.pageserver.utils import wait_tenant_status_404
from fixtures.pg_version import PgVersion
@@ -42,17 +42,14 @@ def measure_recovery_time(env: NeonCompare):
client = env.env.pageserver.http_client()
pg_version = PgVersion(client.timeline_detail(env.tenant, env.timeline)["pg_version"])
# Stop pageserver and remove tenant data
env.env.pageserver.stop()
timeline_dir = env.env.pageserver.timeline_dir(env.tenant, env.timeline)
shutil.rmtree(timeline_dir)
# Start pageserver
env.env.pageserver.start()
# Delete the Tenant in the pageserver: this will drop local and remote layers, such that
# when we "create" the Tenant again, we will replay the WAL from the beginning.
client.tenant_delete(env.tenant)
wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5)
client.tenant_create(new_tenant_id=env.tenant)
# Measure recovery time
with env.record_duration("wal_recovery"):
# Create the tenant, which will start walingest
client.timeline_create(pg_version, env.tenant, env.timeline)
# Flush, which will also wait for lsn to catch up