Files
neon/test_runner/regress/test_large_schema.py
Heikki Linnakangas 3aca717f3d Reorganize python tests.
Merge batch_others and batch_pg_regress. The original idea was to
split all the python tests into multiple "batches" and run each batch
in parallel as a separate CI job. However, the batch_pg_regress batch
was pretty short compared to all the tests in batch_others. We could
split batch_others into multiple batches, but it actually seems better
to just treat them as one big pool of tests and use pytest's handle
the parallelism on its own. If we need to split them across multiple
nodes in the future, we could use pytest-shard or something else,
instead of managing the batches ourselves.

Merge test_neon_regress.py, test_pg_regress.py and test_isolation.py
into one file, test_pg_regress.py. Seems more clear to group all
pg_regress-based tests into one file, now that they would all be in
the same directory.
2022-08-30 18:25:38 +03:00

84 lines
3.2 KiB
Python

import os
import time
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
# This test creates large number of tables which cause large catalog.
# Right now Neon serialize directory as single key-value storage entry and so
# it leads to layer filled mostly by one key.
# Originally Neon implementation of checkpoint and compaction is not able to split key which leads
# to large (several gigabytes) layer files (both ephemeral and delta layers).
# It may cause problems with uploading to S3 and also degrade performance because ephemeral file swapping.
#
def test_large_schema(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start("main")
conn = pg.connect()
cur = conn.cursor()
tables = 2 # 10 is too much for debug build
partitions = 1000
for i in range(1, tables + 1):
print(f"iteration {i} / {tables}")
# Restart compute. Restart is actually not strictly needed.
# It is done mostly because this test originally tries to model the problem reported by Ketteq.
pg.stop()
# Kill and restart the pageserver.
# env.pageserver.stop(immediate=True)
# env.pageserver.start()
pg.start()
retry_sleep = 0.5
max_retries = 200
retries = 0
while True:
try:
conn = pg.connect()
cur = conn.cursor()
cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)")
for j in range(1, partitions + 1):
cur.execute(
f"create table if not exists p_{i}_{j} partition of t_{i} for values from ({j}) to ({j + 1})"
)
cur.execute(f"insert into t_{i} values (generate_series(1,{partitions}))")
cur.execute("vacuum full")
conn.close()
except Exception as error:
# It's normal that it takes some time for the pageserver to
# restart, and for the connection to fail until it does. It
# should eventually recover, so retry until it succeeds.
print(f"failed: {error}")
if retries < max_retries:
retries += 1
print(f"retry {retries} / {max_retries}")
time.sleep(retry_sleep)
continue
else:
raise
break
conn = pg.connect()
cur = conn.cursor()
for i in range(1, tables + 1):
cur.execute(f"SELECT count(*) FROM t_{i}")
assert cur.fetchone() == (partitions,)
cur.execute("set enable_sort=off")
cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid")
# Check layer file sizes
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id)
for filename in os.listdir(timeline_path):
if filename.startswith("00000"):
log.info(f"layer {filename} size is {os.path.getsize(timeline_path + filename)}")
assert os.path.getsize(timeline_path + filename) < 512_000_000