Refactor test_unavailability (#2134)

Now test_unavailability uses async instead of Process. The test is refactored to fix a possible race condition.
2026-01-06 21:12:55 +00:00 · 2022-07-20 22:13:05 +03:00
parent cc680dd81c
commit b445cf7665
2 changed files with 52 additions and 55 deletions
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -203,61 +203,6 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
    assert cur.fetchone() == (500500, )


-start_delay_sec = 2
-
-
-def delayed_safekeeper_start(wa):
-    time.sleep(start_delay_sec)
-    wa.start()
-
-
-# When majority of acceptors is offline, commits are expected to be frozen
-def test_unavailability(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.num_safekeepers = 2
-    env = neon_env_builder.init_start()
-
-    env.neon_cli.create_branch('test_safekeepers_unavailability')
-    pg = env.postgres.create_start('test_safekeepers_unavailability')
-
-    # we rely upon autocommit after each statement
-    # as waiting for acceptors happens there
-    pg_conn = pg.connect()
-    cur = pg_conn.cursor()
-
-    # check basic work with table
-    cur.execute('CREATE TABLE t(key int primary key, value text)')
-    cur.execute("INSERT INTO t values (1, 'payload')")
-
-    # shutdown one of two acceptors, that is, majority
-    env.safekeepers[0].stop()
-
-    proc = Process(target=delayed_safekeeper_start, args=(env.safekeepers[0], ))
-    proc.start()
-
-    start = time.time()
-    cur.execute("INSERT INTO t values (2, 'payload')")
-    # ensure that the query above was hanging while acceptor was down
-    assert (time.time() - start) >= start_delay_sec
-    proc.join()
-
-    # for the world's balance, do the same with second acceptor
-    env.safekeepers[1].stop()
-
-    proc = Process(target=delayed_safekeeper_start, args=(env.safekeepers[1], ))
-    proc.start()
-
-    start = time.time()
-    cur.execute("INSERT INTO t values (3, 'payload')")
-    # ensure that the query above was hanging while acceptor was down
-    assert (time.time() - start) >= start_delay_sec
-    proc.join()
-
-    cur.execute("INSERT INTO t values (4, 'payload')")
-
-    cur.execute('SELECT sum(key) FROM t')
-    assert cur.fetchone() == (10, )
-
-
 # shut down random subset of acceptors, sleep, wake them up, rinse, repeat
 def xmas_garland(acceptors, stop):
    while not bool(stop.value):
--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -404,3 +404,55 @@ def test_concurrent_computes(neon_env_builder: NeonEnvBuilder):

    env.neon_cli.create_branch('test_concurrent_computes')
    asyncio.run(run_concurrent_computes(env))
+
+
+# Stop safekeeper and check that query cannot be executed while safekeeper is down.
+# Query will insert a single row into a table.
+async def check_unavailability(sk: Safekeeper,
+                               conn: asyncpg.Connection,
+                               key: int,
+                               start_delay_sec: int = 2):
+    # shutdown one of two acceptors, that is, majority
+    sk.stop()
+
+    bg_query = asyncio.create_task(conn.execute(f"INSERT INTO t values ({key}, 'payload')"))
+
+    await asyncio.sleep(start_delay_sec)
+    # ensure that the query has not been executed yet
+    assert not bg_query.done()
+
+    # start safekeeper and await the query
+    sk.start()
+    await bg_query
+    assert bg_query.done()
+
+
+async def run_unavailability(env: NeonEnv, pg: Postgres):
+    conn = await pg.connect_async()
+
+    # check basic work with table
+    await conn.execute('CREATE TABLE t(key int primary key, value text)')
+    await conn.execute("INSERT INTO t values (1, 'payload')")
+
+    # stop safekeeper and check that query cannot be executed while safekeeper is down
+    await check_unavailability(env.safekeepers[0], conn, 2)
+
+    # for the world's balance, do the same with second safekeeper
+    await check_unavailability(env.safekeepers[1], conn, 3)
+
+    # check that we can execute queries after restart
+    await conn.execute("INSERT INTO t values (4, 'payload')")
+
+    result_sum = await conn.fetchval('SELECT sum(key) FROM t')
+    assert result_sum == 10
+
+
+# When majority of acceptors is offline, commits are expected to be frozen
+def test_unavailability(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 2
+    env = neon_env_builder.init_start()
+
+    env.neon_cli.create_branch('test_safekeepers_unavailability')
+    pg = env.postgres.create_start('test_safekeepers_unavailability')
+
+    asyncio.run(run_unavailability(env, pg))