neon/test_runner/batch_others/test_wal_acceptor_async.py

import asyncio
import uuid
import asyncpg
import random
import time

from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
from fixtures.log_helper import getLogger
from fixtures.utils import lsn_from_hex, lsn_to_hex
from typing import List

log = getLogger('root.safekeeper_async')


class BankClient(object):
    def __init__(self, conn: asyncpg.Connection, n_accounts, init_amount):
        self.conn: asyncpg.Connection = conn
        self.n_accounts = n_accounts
        self.init_amount = init_amount

    async def initdb(self):
        await self.conn.execute('DROP TABLE IF EXISTS bank_accs')
        await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)')
        await self.conn.execute(
            '''
            INSERT INTO bank_accs
            SELECT *, $1 FROM generate_series(0, $2)
        ''',
            self.init_amount,
            self.n_accounts - 1)
        await self.conn.execute('DROP TABLE IF EXISTS bank_log')
        await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)')

    async def check_invariant(self):
        row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs')
        assert row['sum'] == self.n_accounts * self.init_amount


async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
    # avoid deadlocks by sorting uids
    if from_uid > to_uid:
        from_uid, to_uid, amount = to_uid, from_uid, -amount

    async with conn.transaction():
        await conn.execute(
            'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2',
            amount,
            to_uid,
        )
        await conn.execute(
            'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2',
            amount,
            from_uid,
        )
        await conn.execute(
            'INSERT INTO bank_log VALUES ($1, $2, $3)',
            from_uid,
            to_uid,
            amount,
        )


class WorkerStats(object):
    def __init__(self, n_workers):
        self.counters = [0] * n_workers
        self.running = True

    def reset(self):
        self.counters = [0] * len(self.counters)

    def inc_progress(self, worker_id):
        self.counters[worker_id] += 1

    def check_progress(self):
        log.debug("Workers progress: {}".format(self.counters))

        # every worker should finish at least one tx
        assert all(cnt > 0 for cnt in self.counters)

        progress = sum(self.counters)
        log.info('All workers made {} transactions'.format(progress))


async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
    pg_conn = await pg.connect_async()
    log.debug('Started worker {}'.format(worker_id))

    while stats.running:
        from_uid = random.randint(0, n_accounts - 1)
        to_uid = (from_uid + random.randint(1, n_accounts - 1)) % n_accounts
        amount = random.randint(1, max_transfer)

        await bank_transfer(pg_conn, from_uid, to_uid, amount)
        stats.inc_progress(worker_id)

        log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))

    log.debug('Finished worker {}'.format(worker_id))

    await pg_conn.close()


async def wait_for_lsn(safekeeper: Safekeeper,
                       tenant_id: str,
                       timeline_id: str,
                       wait_lsn: str,
                       polling_interval=1,
                       timeout=60):
    """
    Poll flush_lsn from safekeeper until it's greater or equal than
    provided wait_lsn. To do that, timeline_status is fetched from
    safekeeper every polling_interval seconds.
    """

    started_at = time.time()
    client = safekeeper.http_client()

    flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
    log.info(
        f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}'
    )

    while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn):
        elapsed = time.time() - started_at
        if elapsed > timeout:
            raise RuntimeError(
                f"timed out waiting for safekeeper at port {safekeeper.port.pg} to reach {wait_lsn}, current lsn is {flush_lsn}"
            )

        await asyncio.sleep(polling_interval)
        flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
        log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}')


# This test will run several iterations and check progress in each of them.
# On each iteration 1 acceptor is stopped, and 2 others should allow
# background workers execute transactions. In the end, state should remain
# consistent.
async def run_restarts_under_load(env: NeonEnv,
                                  pg: Postgres,
                                  acceptors: List[Safekeeper],
                                  n_workers=10,
                                  n_accounts=100,
                                  init_amount=100000,
                                  max_transfer=100,
                                  period_time=4,
                                  iterations=10):
    # Set timeout for this test at 5 minutes. It should be enough for test to complete
    # and less than CircleCI's no_output_timeout, taking into account that this timeout
    # is checked only at the beginning of every iteration.
    test_timeout_at = time.monotonic() + 5 * 60

    pg_conn = await pg.connect_async()
    tenant_id = await pg_conn.fetchval("show neon.tenant_id")
    timeline_id = await pg_conn.fetchval("show neon.timeline_id")

    bank = BankClient(pg_conn, n_accounts=n_accounts, init_amount=init_amount)
    # create tables and initial balances
    await bank.initdb()

    stats = WorkerStats(n_workers)
    workers = []
    for worker_id in range(n_workers):
        worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
        workers.append(asyncio.create_task(worker))

    for it in range(iterations):
        assert time.monotonic() < test_timeout_at, 'test timed out'

        victim_idx = it % len(acceptors)
        victim = acceptors[victim_idx]
        victim.stop()

        flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()')
        flush_lsn = lsn_to_hex(flush_lsn)
        log.info(f'Postgres flush_lsn {flush_lsn}')

        pageserver_lsn = env.pageserver.http_client().timeline_detail(
            uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"]
        sk_ps_lag = lsn_from_hex(flush_lsn) - lsn_from_hex(pageserver_lsn)
        log.info(f'Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb')

        # Wait until alive safekeepers catch up with postgres
        for idx, safekeeper in enumerate(acceptors):
            if idx != victim_idx:
                await wait_for_lsn(safekeeper, tenant_id, timeline_id, flush_lsn)

        stats.reset()
        await asyncio.sleep(period_time)
        # assert that at least one transaction has completed in every worker
        stats.check_progress()

        victim.start()

    log.info('Iterations are finished, exiting coroutines...')
    stats.running = False
    # await all workers
    await asyncio.gather(*workers)
    # assert balances sum hasn't changed
    await bank.check_invariant()
    await pg_conn.close()


# Restart acceptors one by one, while executing and validating bank transactions
def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

    env.neon_cli.create_branch('test_safekeepers_restarts_under_load')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start('test_safekeepers_restarts_under_load',
                                   config_lines=['max_replication_write_lag=1MB'])

    asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))


# Restart acceptors one by one and test that everything is working as expected
# when checkpoins are triggered frequently by max_wal_size=32MB. Because we have
# wal_keep_size=0, there will be aggressive WAL segments recycling.
def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

    env.neon_cli.create_branch('test_restarts_frequent_checkpoints')
    # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
    pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
                                   config_lines=[
                                       'max_replication_write_lag=1MB',
                                       'min_wal_size=32MB',
                                       'max_wal_size=32MB',
                                       'log_checkpoints=on'
                                   ])

    # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
    # are not removed before broadcasted to all safekeepers, with the help of replication slot
    asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))