Files
neon/test_runner/regress/test_large_schema.py
Heikki Linnakangas 53f438a8a8 Rename "Postgres nodes" in control_plane to endpoints.
We use the term "endpoint" in for compute Postgres nodes in the web UI
and user-facing documentation now. Adjust the nomenclature in the code.

This changes the name of the "neon_local pg" command to "neon_local
endpoint". Also adjust names of classes, variables etc. in the python
tests accordingly.

This also changes the directory structure so that endpoints are now
stored in:

    .neon/endpoints/<endpoint id>

instead of:

    .neon/pgdatadirs/tenants/<tenant_id>/<endpoint (node) name>

The tenant ID is no longer part of the path. That means that you
cannot have two endpoints with the same name/ID in two different
tenants anymore. That's consistent with how we treat endpoints in the
real control plane and proxy: the endpoint ID must be globally unique.
2023-04-13 14:34:29 +03:00

84 lines
3.3 KiB
Python

import os
import time
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
# This test creates large number of tables which cause large catalog.
# Right now Neon serialize directory as single key-value storage entry and so
# it leads to layer filled mostly by one key.
# Originally Neon implementation of checkpoint and compaction is not able to split key which leads
# to large (several gigabytes) layer files (both ephemeral and delta layers).
# It may cause problems with uploading to S3 and also degrade performance because ephemeral file swapping.
#
def test_large_schema(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start("main")
conn = endpoint.connect()
cur = conn.cursor()
tables = 2 # 10 is too much for debug build
partitions = 1000
for i in range(1, tables + 1):
print(f"iteration {i} / {tables}")
# Restart compute. Restart is actually not strictly needed.
# It is done mostly because this test originally tries to model the problem reported by Ketteq.
endpoint.stop()
# Kill and restart the pageserver.
# env.pageserver.stop(immediate=True)
# env.pageserver.start()
endpoint.start()
retry_sleep = 0.5
max_retries = 200
retries = 0
while True:
try:
conn = endpoint.connect()
cur = conn.cursor()
cur.execute(f"CREATE TABLE if not exists t_{i}(pk integer) partition by range (pk)")
for j in range(1, partitions + 1):
cur.execute(
f"create table if not exists p_{i}_{j} partition of t_{i} for values from ({j}) to ({j + 1})"
)
cur.execute(f"insert into t_{i} values (generate_series(1,{partitions}))")
cur.execute("vacuum full")
conn.close()
except Exception as error:
# It's normal that it takes some time for the pageserver to
# restart, and for the connection to fail until it does. It
# should eventually recover, so retry until it succeeds.
print(f"failed: {error}")
if retries < max_retries:
retries += 1
print(f"retry {retries} / {max_retries}")
time.sleep(retry_sleep)
continue
else:
raise
break
conn = endpoint.connect()
cur = conn.cursor()
for i in range(1, tables + 1):
cur.execute(f"SELECT count(*) FROM t_{i}")
assert cur.fetchone() == (partitions,)
cur.execute("set enable_sort=off")
cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid")
# Check layer file sizes
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id)
for filename in os.listdir(timeline_path):
if filename.startswith("00000"):
log.info(f"layer {filename} size is {os.path.getsize(timeline_path + filename)}")
assert os.path.getsize(timeline_path + filename) < 512_000_000