Files
neon/test_runner/regress/test_pg_regress.py
Alex Chi Z. a155914c1c fix(neon): disable create tablespace stmt (#8657)
part of https://github.com/neondatabase/neon/issues/8653

Disable create tablespace stmt. It turns out it requires much less
effort to do the regress test mode flag than patching the test cases,
and given that we might need to support tablespaces in the future, I
decided to add a new flag `regress_test_mode` to change the behavior of
create tablespace.

Tested manually that without setting regress_test_mode, create
tablespace will be rejected.



---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
2024-08-09 09:18:55 +01:00

327 lines
12 KiB
Python

#
# This file runs pg_regress-based tests.
#
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING, cast
import pytest
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
check_restored_datadir_content,
tenant_get_shards,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import s3_storage
if TYPE_CHECKING:
from typing import Optional
from fixtures.neon_fixtures import PgBin
from pytest import CaptureFixture
TENANT_CONF = {
# Scaled down thresholds so that we are exercising the pageserver beyond just writing
# ephemeral/L0 layers, and because debug-mode code is slow to read from full sized ephemeral layer files.
"pitr_interval": "60s",
"checkpoint_distance": f"{8 * 1024 * 1024}",
"compaction_target_size": f"{8 * 1024 * 1024}",
}
# # Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
# # There should have been compactions mid-test as well, this final check is in addition those.
# for (shard, pageserver) in tenant_get_shards(env, env.initial_tenant):
# pageserver.http_client().timeline_checkpoint(env.initial_tenant, env.initial_timeline, force_repartition=True, force_image_layer_creation=True)
def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: Endpoint):
"""
After running some opaque tests that create interesting content in a timeline, run
some generic integrity checks that the storage stack is able to reproduce the written
data properly.
"""
ignored_files: Optional[list[str]] = None
# Neon handles unlogged relations in a special manner. During a
# basebackup, we ship the init fork as the main fork. This presents a
# problem in that the endpoint's data directory and the basebackup will
# have differences and will fail the eventual file comparison.
#
# Unlogged tables were introduced in version 9.1. ALTER TABLE grew
# support for setting the persistence of a table in 9.5. The reason that
# this doesn't affect versions < 15 (but probably would between 9.1 and
# 9.5) is that all the regression tests that deal with unlogged tables
# up until that point dropped the unlogged tables or set them to logged
# at some point during the test.
#
# In version 15, Postgres grew support for unlogged sequences, and with
# that came a few more regression tests. These tests did not all drop
# the unlogged tables/sequences prior to finishing.
#
# But unlogged sequences came with a bug in that, sequences didn't
# inherit the persistence of their "parent" tables if they had one. This
# was fixed and backported to 15, thus exacerbating our problem a bit.
#
# So what we can do is just ignore file differences between the data
# directory and basebackup for unlogged relations.
results = cast(
"list[tuple[str, str]]",
endpoint.safe_psql(
"""
SELECT
relkind,
pg_relation_filepath(
pg_filenode_relation(reltablespace, relfilenode)
) AS unlogged_relation_paths
FROM pg_class
WHERE relpersistence = 'u'
""",
dbname=db_name,
),
)
unlogged_relation_files: list[str] = []
for r in results:
unlogged_relation_files.append(r[1])
# This is related to the following Postgres commit:
#
# commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
# Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
# Date: 2023-08-23 09:21:31 -0500
#
# Use the buffer cache when initializing an unlogged index.
#
# This patch was backpatched to 16. Without it, the LSN in the
# page header would be 0/0 in the data directory, which wouldn't
# match the LSN generated during the basebackup, thus creating
# a difference.
if env.pg_version <= PgVersion.V15 and r[0] == "i":
unlogged_relation_files.append(f"{r[1]}_init")
ignored_files = unlogged_relation_files
check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)
# Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
# There should have been compactions mid-test as well, this final check is in addition those.
for shard, pageserver in tenant_get_shards(env, env.initial_tenant):
pageserver.http_client().timeline_checkpoint(
shard, env.initial_timeline, force_repartition=True, force_image_layer_creation=True
)
# Run the main PostgreSQL regression tests, in src/test/regress.
#
@pytest.mark.timeout(900) # Contains many sub-tests, is slow in debug builds
@pytest.mark.parametrize("shard_count", [None, 4])
def test_pg_regress(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_bin: PgBin,
capsys: CaptureFixture[str],
base_dir: Path,
pg_distrib_dir: Path,
shard_count: Optional[int],
):
DBNAME = "regression"
"""
:param shard_count: if None, create an unsharded tenant. Otherwise create a tenant with this
many shards.
"""
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF,
initial_tenant_shard_count=shard_count,
)
# Connect to postgres and create a database called "regression".
endpoint = env.endpoints.create_start(
"main",
config_lines=[
# Enable the test mode, so that we don't need to patch the test cases.
"neon.regress_test_mode = true",
],
)
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_regress will need.
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
schedule = src_path / "parallel_schedule"
pg_regress = build_path / "pg_regress"
pg_regress_command = [
str(pg_regress),
'--bindir=""',
"--use-existing",
f"--bindir={bindir}",
f"--dlpath={build_path}",
f"--schedule={schedule}",
f"--inputdir={src_path}",
]
env_vars = {
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
# We don't capture the output. It's not too chatty, and it always
# logs the exact same data to `regression.out` anyway.
with capsys.disabled():
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
post_checks(env, test_output_dir, DBNAME, endpoint)
# Run the PostgreSQL "isolation" tests, in src/test/isolation.
#
@pytest.mark.timeout(600) # Contains many sub-tests, is slow in debug builds
@pytest.mark.parametrize("shard_count", [None, 4])
def test_isolation(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_bin: PgBin,
capsys: CaptureFixture[str],
base_dir: Path,
pg_distrib_dir: Path,
shard_count: Optional[int],
):
DBNAME = "isolation_regression"
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
)
# Connect to postgres and create a database called "regression".
# isolation tests use prepared transactions, so enable them
endpoint = env.endpoints.create_start(
"main",
config_lines=[
"max_prepared_transactions=100",
# Enable the test mode, so that we don't need to patch the test cases.
"neon.regress_test_mode = true",
],
)
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
# Create some local directories for pg_isolation_regress to run in.
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_isolation_regress will need.
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
schedule = src_path / "isolation_schedule"
pg_isolation_regress = build_path / "pg_isolation_regress"
pg_isolation_regress_command = [
str(pg_isolation_regress),
"--use-existing",
f"--bindir={bindir}",
f"--dlpath={build_path}",
f"--inputdir={src_path}",
f"--schedule={schedule}",
]
env_vars = {
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
# We don't capture the output. It's not too chatty, and it always
# logs the exact same data to `regression.out` anyway.
with capsys.disabled():
pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)
# This fails with a mismatch on `pg_multixact/offsets/0000`
# post_checks(env, test_output_dir, DBNAME, endpoint)
# Run extra Neon-specific pg_regress-based tests. The tests and their
# schedule file are in the sql_regress/ directory.
@pytest.mark.parametrize("shard_count", [None, 4])
def test_sql_regress(
neon_env_builder: NeonEnvBuilder,
test_output_dir: Path,
pg_bin: PgBin,
capsys: CaptureFixture[str],
base_dir: Path,
pg_distrib_dir: Path,
shard_count: Optional[int],
):
DBNAME = "regression"
if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
)
# Connect to postgres and create a database called "regression".
endpoint = env.endpoints.create_start(
"main",
config_lines=[
# Enable the test mode, so that we don't need to patch the test cases.
"neon.regress_test_mode = true",
],
)
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_regress will need.
# This test runs neon specific tests
build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
src_path = base_dir / "test_runner/sql_regress"
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
schedule = src_path / "parallel_schedule"
pg_regress = build_path / "pg_regress"
pg_regress_command = [
str(pg_regress),
"--use-existing",
f"--bindir={bindir}",
f"--dlpath={build_path}",
f"--schedule={schedule}",
f"--inputdir={src_path}",
]
env_vars = {
"PGPORT": str(endpoint.default_options["port"]),
"PGUSER": endpoint.default_options["user"],
"PGHOST": endpoint.default_options["host"],
}
# Run the command.
# We don't capture the output. It's not too chatty, and it always
# logs the exact same data to `regression.out` anyway.
with capsys.disabled():
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
post_checks(env, test_output_dir, DBNAME, endpoint)