Files
neon/test_runner/regress/test_lfc_resize.py
Heikki Linnakangas af5c54ed14 test: Make test_lfc_resize more robust (#9117)
1. Increase statement_timeout. It defaults to 120 s, which is not quite
enough on slow or busy systems with debug build. On my laptop, the index
creation takes about 100 s. On buildfarm, we've seen failures, e.g:
https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9084/10997888708/index.html#suites/821f97908a487f1d7d3a2a4dd1571e99/db1834bddfe8c5b9/

2. Keep twiddling the LFC size through the whole test. Before, we would
do it for the first 10 seconds, but that only covers a small part of the
pgbench initialization phase. Change the loop so that the pgbench run
time determines how long the test runs, and we keep changing the LFC for
the whole time.

In the passing, also fix bogus test description, copy-pasted from a
completely unrelated test.
2024-09-24 23:38:16 +03:00

80 lines
2.7 KiB
Python

import os
import random
import re
import subprocess
import threading
import time
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin
@pytest.mark.timeout(600)
def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
"""
Test resizing the Local File Cache
"""
env = neon_simple_env
endpoint = env.endpoints.create_start(
"main",
config_lines=[
"neon.file_cache_path='file.cache'",
"neon.max_file_cache_size=512MB",
"neon.file_cache_size_limit=512MB",
],
)
n_resize = 10
scale = 100
def run_pgbench(connstr: str):
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", "-c10", f"-T{n_resize}", "-Mprepared", "-S", connstr])
# Initializing the pgbench database can be very slow, especially on debug builds.
connstr = endpoint.connstr(options="-cstatement_timeout=300s")
thread = threading.Thread(target=run_pgbench, args=(connstr,), daemon=True)
thread.start()
conn = endpoint.connect()
cur = conn.cursor()
# For as long as pgbench is running, twiddle the LFC size once a second.
# Note that we launch this immediately, already while the "pgbench -i"
# initialization step is still running. That's quite a different workload
# than the actual pgbench benchamark run, so this gives us coverage of both.
while thread.is_alive():
size = random.randint(1, 512)
cur.execute(f"alter system set neon.file_cache_size_limit='{size}MB'")
cur.execute("select pg_reload_conf()")
time.sleep(1)
thread.join()
# At the end, set it at 100 MB, and perform a final check that the disk usage
# of the file is in that ballbark.
#
# We retry the check a few times, because it might take a while for the
# system to react to changing the setting and shrinking the file.
cur.execute("alter system set neon.file_cache_size_limit='100MB'")
cur.execute("select pg_reload_conf()")
nretries = 10
while True:
lfc_file_path = f"{endpoint.pg_data_dir_path()}/file.cache"
lfc_file_size = os.path.getsize(lfc_file_path)
res = subprocess.run(
["ls", "-sk", lfc_file_path], check=True, text=True, capture_output=True
)
lfc_file_blocks = re.findall("([0-9A-F]+)", res.stdout)[0]
log.info(f"Size of LFC file {lfc_file_size}, blocks {lfc_file_blocks}")
assert lfc_file_size <= 512 * 1024 * 1024
if int(lfc_file_blocks) <= 128 * 1024 or nretries == 0:
break
nretries = nretries - 1
time.sleep(1)
assert int(lfc_file_blocks) <= 128 * 1024