mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-13 08:22:55 +00:00
Add test for replacement of the failed safekeeper (#1179)
* Add test to replace failed safekeeper * Restart safekeepers in test_replace_safekeeper * Update vendor/postgres
This commit is contained in:
committed by
GitHub
parent
49d1d1ddf9
commit
cedde559b8
@@ -12,7 +12,7 @@ from contextlib import closing
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing import Process, Value
|
||||
from pathlib import Path
|
||||
from fixtures.zenith_fixtures import PgBin, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
|
||||
from fixtures.zenith_fixtures import PgBin, Postgres, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
|
||||
from fixtures.utils import lsn_to_hex, mkdir_if_needed
|
||||
from fixtures.log_helper import log
|
||||
from typing import List, Optional, Any
|
||||
@@ -603,3 +603,92 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
|
||||
env.postgres.safe_psql("insert into t select generate_series(1, 100)")
|
||||
res = env.postgres.safe_psql("select sum(i) from t")[0][0]
|
||||
assert res == 5050
|
||||
|
||||
|
||||
def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
|
||||
def safekeepers_guc(env: ZenithEnv, sk_names: List[str]) -> str:
|
||||
return ','.join(
|
||||
[f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.name in sk_names])
|
||||
|
||||
def execute_payload(pg: Postgres):
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# we rely upon autocommit after each statement
|
||||
# as waiting for acceptors happens there
|
||||
cur.execute('CREATE TABLE IF NOT EXISTS t(key int, value text)')
|
||||
cur.execute("INSERT INTO t VALUES (0, 'something')")
|
||||
cur.execute('SELECT SUM(key) FROM t')
|
||||
sum_before = cur.fetchone()[0]
|
||||
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute('SELECT SUM(key) FROM t')
|
||||
sum_after = cur.fetchone()[0]
|
||||
assert sum_after == sum_before + 5000050000
|
||||
|
||||
def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str):
|
||||
for sk in safekeepers:
|
||||
http_cli = sk.http_client()
|
||||
try:
|
||||
status = http_cli.timeline_status(tenant_id, timeline_id)
|
||||
log.info(f"Safekeeper {sk.name} status: {status}")
|
||||
except Exception as e:
|
||||
log.info(f"Safekeeper {sk.name} status error: {e}")
|
||||
|
||||
zenith_env_builder.num_safekeepers = 4
|
||||
env = zenith_env_builder.init()
|
||||
env.zenith_cli(["branch", "test_replace_safekeeper", "main"])
|
||||
|
||||
log.info("Use only first 3 safekeepers")
|
||||
env.safekeepers[3].stop()
|
||||
active_safekeepers = ['sk1', 'sk2', 'sk3']
|
||||
pg = env.postgres.create('test_replace_safekeeper')
|
||||
pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
|
||||
pg.start()
|
||||
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
|
||||
timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
|
||||
|
||||
execute_payload(pg)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Restart all safekeepers to flush everything")
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
env.safekeepers[0].start()
|
||||
env.safekeepers[1].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
env.safekeepers[1].start()
|
||||
env.safekeepers[2].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
env.safekeepers[2].start()
|
||||
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
env.safekeepers[1].stop(immediate=True)
|
||||
env.safekeepers[2].stop(immediate=True)
|
||||
env.safekeepers[0].start()
|
||||
env.safekeepers[1].start()
|
||||
env.safekeepers[2].start()
|
||||
|
||||
execute_payload(pg)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3")
|
||||
env.safekeepers[0].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Recreate postgres to replace failed sk1 with new sk4")
|
||||
pg.stop_and_destroy().create('test_replace_safekeeper')
|
||||
active_safekeepers = ['sk2', 'sk3', 'sk4']
|
||||
env.safekeepers[3].start()
|
||||
pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
|
||||
pg.start()
|
||||
|
||||
execute_payload(pg)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work")
|
||||
env.safekeepers[1].stop(immediate=True)
|
||||
execute_payload(pg)
|
||||
show_statuses(env.safekeepers, tenant_id, timeline_id)
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: ca5e7beaf8...99b91512bd
Reference in New Issue
Block a user