mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 15:02:56 +00:00
Bodobolero/test cum stats persistence (#10995)
## Problem So far cumulative statistics have not been persisted when Neon scales to zero (suspends endpoint). With PR https://github.com/neondatabase/neon/pull/6560 the cumulative statistics should now survive endpoint restarts and correctly trigger the auto- vacuum and auto analyze maintenance So far we did not have a testcase that validates that improvement in our dev cloud environment with a real project. ## Summary of changes Introduce testcase `test_cumulative_statistics_persistence`in the benchmarking workflow running daily to verify: - Verifies that the cumulative statistics are correctly persisted across restarts. - Cumulative statistics are important to persist across restarts because they are used - when auto-vacuum an auto-analyze trigger conditions are met. - The test performs the following steps: - Seed a new project using pgbench - insert tuples that by itself are not enough to trigger auto-vacuum - suspend the endpoint - resume the endpoint - insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are - verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension ## Test run https://github.com/neondatabase/neon/actions/runs/13546879714/job/37860609089#step:6:282
This commit is contained in:
@@ -0,0 +1,221 @@
|
||||
import math # Add this import
|
||||
import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import NeonBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_api import NeonAPI, connection_parameters_to_env
|
||||
from fixtures.neon_fixtures import PgBin
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
vacuum_times_sql = """
|
||||
SELECT
|
||||
relname AS table_name,
|
||||
last_autovacuum,
|
||||
last_autoanalyze
|
||||
FROM
|
||||
pg_stat_user_tables where relname = 'pgbench_accounts'
|
||||
ORDER BY
|
||||
last_autovacuum DESC, last_autoanalyze DESC
|
||||
"""
|
||||
|
||||
|
||||
def insert_first_chunk_and_verify_autovacuum_is_not_running(
|
||||
cur, rows_to_insert, autovacuum_naptime
|
||||
):
|
||||
cur.execute(f"""
|
||||
INSERT INTO pgbench_accounts (aid, bid, abalance, filler)
|
||||
SELECT
|
||||
aid,
|
||||
(random() * 10)::int + 1 AS bid,
|
||||
(random() * 10000)::int AS abalance,
|
||||
'filler text' AS filler
|
||||
FROM generate_series(6800001, {6800001 + rows_to_insert - 1}) AS aid;
|
||||
""")
|
||||
assert cur.rowcount == rows_to_insert
|
||||
for _ in range(5):
|
||||
time.sleep(0.5 * autovacuum_naptime)
|
||||
cur.execute(vacuum_times_sql)
|
||||
row = cur.fetchall()[0]
|
||||
log.info(f"last_autovacuum: {row[1]}, last_autoanalyze: {row[2]}")
|
||||
assert row[1] is None
|
||||
|
||||
|
||||
def insert_second_chunk_and_verify_autovacuum_is_now_running(
|
||||
cur, rows_to_insert, autovacuum_naptime
|
||||
):
|
||||
cur.execute(f"""
|
||||
INSERT INTO pgbench_accounts (aid, bid, abalance, filler)
|
||||
SELECT
|
||||
aid,
|
||||
(random() * 10)::int + 1 AS bid,
|
||||
(random() * 10000)::int AS abalance,
|
||||
'filler text' AS filler
|
||||
FROM generate_series({6800001 + rows_to_insert}, {6800001 + rows_to_insert * 2 - 1}) AS aid;
|
||||
""")
|
||||
assert cur.rowcount == rows_to_insert
|
||||
for _ in range(5):
|
||||
time.sleep(0.5 * autovacuum_naptime)
|
||||
cur.execute(vacuum_times_sql)
|
||||
row = cur.fetchall()[0]
|
||||
log.info(f"last_autovacuum: {row[1]}, last_autoanalyze: {row[2]}")
|
||||
assert row[1] is not None
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
@pytest.mark.timeout(60 * 60)
|
||||
def test_cumulative_statistics_persistence(
|
||||
pg_bin: PgBin,
|
||||
test_output_dir: Path,
|
||||
neon_api: NeonAPI,
|
||||
pg_version: PgVersion,
|
||||
zenbenchmark: NeonBenchmarker,
|
||||
):
|
||||
"""
|
||||
Verifies that the cumulative statistics are correctly persisted across restarts.
|
||||
Cumulative statistics are important to persist across restarts because they are used
|
||||
when auto-vacuum an auto-analyze trigger conditions are met.
|
||||
The test performs the following steps:
|
||||
- Seed a new project using pgbench
|
||||
- insert tuples that by itself are not enough to trigger auto-vacuum
|
||||
- suspend the endpoint
|
||||
- resume the endpoint
|
||||
- insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are
|
||||
- verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension
|
||||
"""
|
||||
project = neon_api.create_project(pg_version)
|
||||
project_id = project["project"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
endpoint_id = project["endpoints"][0]["id"]
|
||||
region_id = project["project"]["region_id"]
|
||||
log.info(f"Created project {project_id} with endpoint {endpoint_id} in region {region_id}")
|
||||
error_occurred = False
|
||||
try:
|
||||
connstr = project["connection_uris"][0]["connection_uri"]
|
||||
env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"])
|
||||
# seed about 1 GiB of data into pgbench_accounts
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env)
|
||||
|
||||
# assert rows in pgbench_accounts is 6800000 rows
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
# assert rows in pgbench_accounts is 6800000 rows
|
||||
cur.execute("select count(*) from pgbench_accounts")
|
||||
row_count = cur.fetchall()[0][0]
|
||||
assert row_count == 6800000
|
||||
|
||||
# verify n_tup_ins, n_live_tup, vacuum_count, analyze_count (manual vacuum and analyze)
|
||||
cur.execute(
|
||||
"select n_tup_ins, vacuum_count,analyze_count from pg_stat_user_tables where relname = 'pgbench_accounts'"
|
||||
)
|
||||
row = cur.fetchall()[0]
|
||||
assert row[0] == 6800000 # n_tup_ins
|
||||
assert row[1] == 1 # vacuum_count
|
||||
assert row[2] == 1 # analyze_count
|
||||
|
||||
# retrieve some GUCs (postgres settings) relevant to autovacuum
|
||||
cur.execute(
|
||||
"SELECT setting::int AS autovacuum_naptime FROM pg_settings WHERE name = 'autovacuum_naptime'"
|
||||
)
|
||||
autovacuum_naptime = cur.fetchall()[0][0]
|
||||
assert autovacuum_naptime < 300 and autovacuum_naptime > 0
|
||||
cur.execute(
|
||||
"SELECT setting::float AS autovacuum_vacuum_insert_scale_factor FROM pg_settings WHERE name = 'autovacuum_vacuum_insert_scale_factor'"
|
||||
)
|
||||
autovacuum_vacuum_insert_scale_factor = cur.fetchall()[0][0]
|
||||
assert (
|
||||
autovacuum_vacuum_insert_scale_factor > 0.05
|
||||
and autovacuum_vacuum_insert_scale_factor < 1.0
|
||||
)
|
||||
cur.execute(
|
||||
"SELECT setting::int AS autovacuum_vacuum_insert_threshold FROM pg_settings WHERE name = 'autovacuum_vacuum_insert_threshold'"
|
||||
)
|
||||
autovacuum_vacuum_insert_threshold = cur.fetchall()[0][0]
|
||||
cur.execute(
|
||||
"SELECT setting::int AS pgstat_file_size_limit FROM pg_settings WHERE name = 'neon.pgstat_file_size_limit'"
|
||||
)
|
||||
pgstat_file_size_limit = cur.fetchall()[0][0]
|
||||
assert pgstat_file_size_limit > 10 * 1024 # at least 10 MB
|
||||
|
||||
# insert rows that by itself are not enough to trigger auto-vacuum
|
||||
# vacuum insert threshold = vacuum base insert threshold + vacuum insert scale factor * number of tuples
|
||||
# https://www.postgresql.org/docs/17/routine-vacuuming.html
|
||||
rows_to_insert = int(
|
||||
math.ceil(
|
||||
autovacuum_vacuum_insert_threshold / 2
|
||||
+ row_count * autovacuum_vacuum_insert_scale_factor * 0.6
|
||||
)
|
||||
)
|
||||
|
||||
log.info(
|
||||
f"autovacuum_vacuum_insert_scale_factor: {autovacuum_vacuum_insert_scale_factor}, autovacuum_vacuum_insert_threshold: {autovacuum_vacuum_insert_threshold}, row_count: {row_count}"
|
||||
)
|
||||
log.info(
|
||||
f"Inserting {rows_to_insert} rows, which is below the 'vacuum insert threshold'"
|
||||
)
|
||||
|
||||
insert_first_chunk_and_verify_autovacuum_is_not_running(
|
||||
cur, rows_to_insert, autovacuum_naptime
|
||||
)
|
||||
|
||||
conn.close()
|
||||
|
||||
# suspend the endpoint
|
||||
log.info(f"Suspending endpoint {endpoint_id}")
|
||||
neon_api.suspend_endpoint(project_id, endpoint_id)
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
time.sleep(60) # give some time in between suspend and resume
|
||||
|
||||
# resume the endpoint
|
||||
log.info(f"Starting endpoint {endpoint_id}")
|
||||
neon_api.start_endpoint(project_id, endpoint_id)
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
# insert additional rows that by itself are not enough to trigger auto-vacuum, but in combination
|
||||
# with the previous rows inserted before the suspension are
|
||||
log.info(
|
||||
f"Inserting another {rows_to_insert} rows, which is below the 'vacuum insert threshold'"
|
||||
)
|
||||
insert_second_chunk_and_verify_autovacuum_is_now_running(
|
||||
cur, rows_to_insert, autovacuum_naptime
|
||||
)
|
||||
|
||||
# verify estimatednumber of tuples in pgbench_accounts is within 6800000 + inserted rows +- 2 %
|
||||
cur.execute(
|
||||
"select reltuples::bigint from pg_class where relkind = 'r' and relname = 'pgbench_accounts'"
|
||||
)
|
||||
reltuples = cur.fetchall()[0][0]
|
||||
assert reltuples > 6800000 + rows_to_insert * 2 * 0.98
|
||||
assert reltuples < 6800000 + rows_to_insert * 2 * 1.02
|
||||
|
||||
# verify exact number of pgbench_accounts rows (computed row_count)
|
||||
cur.execute("select count(*) from pgbench_accounts")
|
||||
row_count = cur.fetchall()[0][0]
|
||||
assert row_count == 6800000 + rows_to_insert * 2
|
||||
|
||||
# verify n_tup_ins, n_live_tup, vacuum_count, analyze_count (manual vacuum and analyze)
|
||||
cur.execute(
|
||||
"select n_tup_ins, vacuum_count,analyze_count from pg_stat_user_tables where relname = 'pgbench_accounts'"
|
||||
)
|
||||
row = cur.fetchall()[0]
|
||||
assert row[0] == 6800000 + rows_to_insert * 2
|
||||
assert row[1] == 1
|
||||
assert row[2] == 1
|
||||
|
||||
conn.close()
|
||||
|
||||
except Exception as e:
|
||||
error_occurred = True
|
||||
log.error(f"Caught exception: {e}")
|
||||
log.error(traceback.format_exc())
|
||||
finally:
|
||||
assert not error_occurred # Fail the test if an error occurred
|
||||
neon_api.delete_project(project_id)
|
||||
Reference in New Issue
Block a user