mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-24 00:20:37 +00:00
114 lines
4.1 KiB
Python
114 lines
4.1 KiB
Python
import concurrent.futures
|
|
import os
|
|
from typing import List, Tuple
|
|
|
|
import pytest
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
|
|
from fixtures.utils import query_scalar
|
|
|
|
|
|
# Test restarting page server, while safekeeper and compute node keep
|
|
# running.
|
|
def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
|
|
# One safekeeper is enough for this test.
|
|
neon_env_builder.num_safekeepers = 3
|
|
env = neon_env_builder.init_start()
|
|
|
|
tenant_timelines: List[Tuple[str, str, Postgres]] = []
|
|
|
|
for n in range(4):
|
|
tenant_id_uuid, timeline_id_uuid = env.neon_cli.create_tenant()
|
|
tenant_id = tenant_id_uuid.hex
|
|
timeline_id = timeline_id_uuid.hex
|
|
|
|
pg = env.postgres.create_start("main", tenant_id=tenant_id_uuid)
|
|
with pg.cursor() as cur:
|
|
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
|
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
|
|
|
|
timeline_id = query_scalar(cur, "SHOW neon.timeline_id")
|
|
pg.stop()
|
|
tenant_timelines.append((tenant_id, timeline_id, pg))
|
|
|
|
# Stop the pageserver
|
|
env.pageserver.stop()
|
|
|
|
# Leave the first timeline alone, but corrupt the others in different ways
|
|
(tenant0, timeline0, pg0) = tenant_timelines[0]
|
|
|
|
# Corrupt metadata file on timeline 1
|
|
(tenant1, timeline1, pg1) = tenant_timelines[1]
|
|
metadata_path = "{}/tenants/{}/timelines/{}/metadata".format(env.repo_dir, tenant1, timeline1)
|
|
print(f"overwriting metadata file at {metadata_path}")
|
|
f = open(metadata_path, "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
|
|
# Missing layer files file on timeline 2. (This would actually work
|
|
# if we had Cloud Storage enabled in this test.)
|
|
(tenant2, timeline2, pg2) = tenant_timelines[2]
|
|
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant2, timeline2)
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Remove it
|
|
os.remove(f"{timeline_path}/{filename}")
|
|
|
|
# Corrupt layer files file on timeline 3
|
|
(tenant3, timeline3, pg3) = tenant_timelines[3]
|
|
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant3, timeline3)
|
|
for filename in os.listdir(timeline_path):
|
|
if filename.startswith("00000"):
|
|
# Looks like a layer file. Corrupt it
|
|
f = open(f"{timeline_path}/{filename}", "w")
|
|
f.write("overwritten with garbage!")
|
|
f.close()
|
|
|
|
env.pageserver.start()
|
|
|
|
# Tenant 0 should still work
|
|
pg0.start()
|
|
assert pg0.safe_psql("SELECT COUNT(*) FROM t")[0][0] == 100
|
|
|
|
# But all others are broken
|
|
for n in range(1, 4):
|
|
(tenant, timeline, pg) = tenant_timelines[n]
|
|
with pytest.raises(Exception, match="Cannot load local timeline") as err:
|
|
pg.start()
|
|
log.info(f"compute startup failed as expected: {err}")
|
|
|
|
|
|
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
|
futures = [
|
|
executor.submit(
|
|
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
|
|
)
|
|
for i in range(4)
|
|
]
|
|
for future in futures:
|
|
future.result()
|
|
|
|
|
|
def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
|
|
env = neon_simple_env
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
|
|
# Introduce failpoint when creating a new timeline
|
|
env.pageserver.safe_psql("failpoints before-checkpoint-new-timeline=return")
|
|
with pytest.raises(Exception, match="before-checkpoint-new-timeline"):
|
|
_ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id)
|
|
|
|
# Restart the page server
|
|
env.neon_cli.pageserver_stop(immediate=True)
|
|
env.neon_cli.pageserver_start()
|
|
|
|
# Check that tenant with "broken" timeline is not loaded.
|
|
with pytest.raises(Exception, match=f"Failed to get repo for tenant {tenant_id.hex}"):
|
|
env.neon_cli.list_timelines(tenant_id)
|