mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 00:42:54 +00:00
Make this test look like 'test_compute_restart.sh' by @ololobus, which was surprisingly good for checking safekeepers behavior. This test adds an intermediate compute node start with bulk select that causes a lot of FPI's and select itself wouldn't wait for all that WAL to be replicated. So if we kill compute node right after that we end up with lagging safekeepers with VCL != flush_lsn. And starting new node from that state takes special care. Also, run and print `pg_controldata` output after each compute node start to eyeball lsn/checkpoint info of basebackup. This commit only adds test without fixing the problem.
92 lines
3.0 KiB
Python
92 lines
3.0 KiB
Python
import pytest
|
|
|
|
from contextlib import closing
|
|
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
|
|
|
pytest_plugins = ("fixtures.zenith_fixtures")
|
|
|
|
|
|
#
|
|
# Test restarting and recreating a postgres instance
|
|
#
|
|
# XXX: with_wal_acceptors=True fails now, would be fixed with
|
|
# `postgres --sync-walkeepers` patches.
|
|
#
|
|
@pytest.mark.parametrize('with_wal_acceptors', [False])
|
|
def test_restart_compute(
|
|
zenith_cli,
|
|
pageserver: ZenithPageserver,
|
|
postgres: PostgresFactory,
|
|
pg_bin,
|
|
wa_factory,
|
|
with_wal_acceptors: bool,
|
|
):
|
|
wal_acceptor_connstrs = None
|
|
zenith_cli.run(["branch", "test_restart_compute", "empty"])
|
|
|
|
if with_wal_acceptors:
|
|
wa_factory.start_n_new(3)
|
|
wal_acceptor_connstrs = wa_factory.get_connstrs()
|
|
|
|
pg = postgres.create_start('test_restart_compute',
|
|
wal_acceptors=wal_acceptor_connstrs)
|
|
print("postgres is running on 'test_restart_compute' branch")
|
|
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
|
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
r = cur.fetchone()
|
|
assert r == (5000050000, )
|
|
print("res = ", r)
|
|
|
|
# Remove data directory and restart
|
|
pg.stop_and_destroy().create_start('test_restart_compute',
|
|
wal_acceptors=wal_acceptor_connstrs)
|
|
|
|
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
# We can still see the row
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
r = cur.fetchone()
|
|
assert r == (5000050000, )
|
|
print("res = ", r)
|
|
|
|
# Insert another row
|
|
cur.execute("INSERT INTO t VALUES (100001, 'payload2')")
|
|
cur.execute('SELECT count(*) FROM t')
|
|
|
|
r = cur.fetchone()
|
|
assert r == (100001, )
|
|
print("res = ", r)
|
|
|
|
# Again remove data directory and restart
|
|
pg.stop_and_destroy().create_start('test_restart_compute',
|
|
wal_acceptors=wal_acceptor_connstrs)
|
|
|
|
# That select causes lots of FPI's and increases probability of wakeepers
|
|
# lagging behind after query completion
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
# We can still see the rows
|
|
cur.execute('SELECT count(*) FROM t')
|
|
|
|
r = cur.fetchone()
|
|
assert r == (100001, )
|
|
print("res = ", r)
|
|
|
|
# And again remove data directory and restart
|
|
pg.stop_and_destroy().create_start('test_restart_compute',
|
|
wal_acceptors=wal_acceptor_connstrs)
|
|
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
# We can still see the rows
|
|
cur.execute('SELECT count(*) FROM t')
|
|
|
|
r = cur.fetchone()
|
|
assert r == (100001, )
|
|
print("res = ", r)
|