mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-16 01:42:55 +00:00
Previously we worked around file comparison issues by dropping unlogged relations in the pg_regress tests, but this would lead to an unnecessary diff when compared to upstream in our Postgres fork. Instead, we can precompute the files that we know will be different, and ignore them.
266 lines
9.8 KiB
Python
266 lines
9.8 KiB
Python
#
|
|
# This file runs pg_regress-based tests.
|
|
#
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, cast
|
|
|
|
import pytest
|
|
from fixtures.neon_fixtures import (
|
|
NeonEnvBuilder,
|
|
check_restored_datadir_content,
|
|
)
|
|
from fixtures.pg_version import PgVersion
|
|
from fixtures.remote_storage import s3_storage
|
|
|
|
if TYPE_CHECKING:
|
|
from typing import Optional
|
|
|
|
from fixtures.neon_fixtures import PgBin
|
|
from pytest import CaptureFixture
|
|
|
|
|
|
# Run the main PostgreSQL regression tests, in src/test/regress.
|
|
#
|
|
@pytest.mark.parametrize("shard_count", [None, 4])
|
|
def test_pg_regress(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
test_output_dir: Path,
|
|
build_type: str,
|
|
pg_bin: PgBin,
|
|
capsys: CaptureFixture[str],
|
|
base_dir: Path,
|
|
pg_distrib_dir: Path,
|
|
shard_count: Optional[int],
|
|
):
|
|
DBNAME = "regression"
|
|
|
|
"""
|
|
:param shard_count: if None, create an unsharded tenant. Otherwise create a tenant with this
|
|
many shards.
|
|
"""
|
|
if shard_count is not None:
|
|
neon_env_builder.num_pageservers = shard_count
|
|
|
|
if build_type == "debug":
|
|
# Disable vectored read path cross validation since it makes the test time out.
|
|
neon_env_builder.pageserver_config_override = "validate_vectored_get=false"
|
|
|
|
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
|
neon_env_builder.enable_scrub_on_exit()
|
|
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
|
|
|
|
# Connect to postgres and create a database called "regression".
|
|
endpoint = env.endpoints.create_start("main")
|
|
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
|
|
|
|
# Create some local directories for pg_regress to run in.
|
|
runpath = test_output_dir / "regress"
|
|
(runpath / "testtablespace").mkdir(parents=True)
|
|
|
|
# Compute all the file locations that pg_regress will need.
|
|
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
|
|
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
|
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
|
schedule = src_path / "parallel_schedule"
|
|
pg_regress = build_path / "pg_regress"
|
|
|
|
pg_regress_command = [
|
|
str(pg_regress),
|
|
'--bindir=""',
|
|
"--use-existing",
|
|
f"--bindir={bindir}",
|
|
f"--dlpath={build_path}",
|
|
f"--schedule={schedule}",
|
|
f"--inputdir={src_path}",
|
|
]
|
|
|
|
env_vars = {
|
|
"PGPORT": str(endpoint.default_options["port"]),
|
|
"PGUSER": endpoint.default_options["user"],
|
|
"PGHOST": endpoint.default_options["host"],
|
|
}
|
|
|
|
# Run the command.
|
|
# We don't capture the output. It's not too chatty, and it always
|
|
# logs the exact same data to `regression.out` anyway.
|
|
with capsys.disabled():
|
|
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
|
|
|
ignored_files: Optional[list[str]] = None
|
|
|
|
# Neon handles unlogged relations in a special manner. During a
|
|
# basebackup, we ship the init fork as the main fork. This presents a
|
|
# problem in that the endpoint's data directory and the basebackup will
|
|
# have differences and will fail the eventual file comparison.
|
|
#
|
|
# Unlogged tables were introduced in version 9.1. ALTER TABLE grew
|
|
# support for setting the persistence of a table in 9.5. The reason that
|
|
# this doesn't affect versions < 15 (but probably would between 9.1 and
|
|
# 9.5) is that all the regression tests that deal with unlogged tables
|
|
# up until that point dropped the unlogged tables or set them to logged
|
|
# at some point during the test.
|
|
#
|
|
# In version 15, Postgres grew support for unlogged sequences, and with
|
|
# that came a few more regression tests. These tests did not all drop
|
|
# the unlogged tables/sequences prior to finishing.
|
|
#
|
|
# But unlogged sequences came with a bug in that, sequences didn't
|
|
# inherit the persistence of their "parent" tables if they had one. This
|
|
# was fixed and backported to 15, thus exacerbating our problem a bit.
|
|
#
|
|
# So what we can do is just ignore file differences between the data
|
|
# directory and basebackup for unlogged relations.
|
|
results = cast(
|
|
"list[tuple[str, str]]",
|
|
endpoint.safe_psql(
|
|
"""
|
|
SELECT
|
|
relkind,
|
|
pg_relation_filepath(
|
|
pg_filenode_relation(reltablespace, relfilenode)
|
|
) AS unlogged_relation_paths
|
|
FROM pg_class
|
|
WHERE relpersistence = 'u'
|
|
""",
|
|
dbname=DBNAME,
|
|
),
|
|
)
|
|
|
|
unlogged_relation_files: list[str] = []
|
|
for r in results:
|
|
unlogged_relation_files.append(r[1])
|
|
# This is related to the following Postgres commit:
|
|
#
|
|
# commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
|
|
# Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
|
|
# Date: 2023-08-23 09:21:31 -0500
|
|
#
|
|
# Use the buffer cache when initializing an unlogged index.
|
|
#
|
|
# This patch was backpatched to 16. Without it, the LSN in the
|
|
# page header would be 0/0 in the data directory, which wouldn't
|
|
# match the LSN generated during the basebackup, thus creating
|
|
# a difference.
|
|
if env.pg_version <= PgVersion.V15 and r[0] == "i":
|
|
unlogged_relation_files.append(f"{r[1]}_init")
|
|
|
|
ignored_files = unlogged_relation_files
|
|
|
|
check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)
|
|
|
|
|
|
# Run the PostgreSQL "isolation" tests, in src/test/isolation.
|
|
#
|
|
@pytest.mark.parametrize("shard_count", [None, 4])
|
|
def test_isolation(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
test_output_dir: Path,
|
|
pg_bin: PgBin,
|
|
capsys: CaptureFixture[str],
|
|
base_dir: Path,
|
|
pg_distrib_dir: Path,
|
|
shard_count: Optional[int],
|
|
):
|
|
if shard_count is not None:
|
|
neon_env_builder.num_pageservers = shard_count
|
|
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
|
neon_env_builder.enable_scrub_on_exit()
|
|
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
|
|
|
|
# Connect to postgres and create a database called "regression".
|
|
# isolation tests use prepared transactions, so enable them
|
|
endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=100"])
|
|
endpoint.safe_psql("CREATE DATABASE isolation_regression")
|
|
|
|
# Create some local directories for pg_isolation_regress to run in.
|
|
runpath = test_output_dir / "regress"
|
|
(runpath / "testtablespace").mkdir(parents=True)
|
|
|
|
# Compute all the file locations that pg_isolation_regress will need.
|
|
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
|
|
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
|
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
|
schedule = src_path / "isolation_schedule"
|
|
pg_isolation_regress = build_path / "pg_isolation_regress"
|
|
|
|
pg_isolation_regress_command = [
|
|
str(pg_isolation_regress),
|
|
"--use-existing",
|
|
f"--bindir={bindir}",
|
|
f"--dlpath={build_path}",
|
|
f"--inputdir={src_path}",
|
|
f"--schedule={schedule}",
|
|
]
|
|
|
|
env_vars = {
|
|
"PGPORT": str(endpoint.default_options["port"]),
|
|
"PGUSER": endpoint.default_options["user"],
|
|
"PGHOST": endpoint.default_options["host"],
|
|
}
|
|
|
|
# Run the command.
|
|
# We don't capture the output. It's not too chatty, and it always
|
|
# logs the exact same data to `regression.out` anyway.
|
|
with capsys.disabled():
|
|
pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)
|
|
|
|
|
|
# Run extra Neon-specific pg_regress-based tests. The tests and their
|
|
# schedule file are in the sql_regress/ directory.
|
|
@pytest.mark.parametrize("shard_count", [None, 4])
|
|
def test_sql_regress(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
test_output_dir: Path,
|
|
pg_bin: PgBin,
|
|
capsys: CaptureFixture[str],
|
|
base_dir: Path,
|
|
pg_distrib_dir: Path,
|
|
shard_count: Optional[int],
|
|
):
|
|
if shard_count is not None:
|
|
neon_env_builder.num_pageservers = shard_count
|
|
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
|
neon_env_builder.enable_scrub_on_exit()
|
|
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
|
|
|
|
# Connect to postgres and create a database called "regression".
|
|
endpoint = env.endpoints.create_start("main")
|
|
endpoint.safe_psql("CREATE DATABASE regression")
|
|
|
|
# Create some local directories for pg_regress to run in.
|
|
runpath = test_output_dir / "regress"
|
|
(runpath / "testtablespace").mkdir(parents=True)
|
|
|
|
# Compute all the file locations that pg_regress will need.
|
|
# This test runs neon specific tests
|
|
build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
|
|
src_path = base_dir / "test_runner/sql_regress"
|
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
|
schedule = src_path / "parallel_schedule"
|
|
pg_regress = build_path / "pg_regress"
|
|
|
|
pg_regress_command = [
|
|
str(pg_regress),
|
|
"--use-existing",
|
|
f"--bindir={bindir}",
|
|
f"--dlpath={build_path}",
|
|
f"--schedule={schedule}",
|
|
f"--inputdir={src_path}",
|
|
]
|
|
|
|
env_vars = {
|
|
"PGPORT": str(endpoint.default_options["port"]),
|
|
"PGUSER": endpoint.default_options["user"],
|
|
"PGHOST": endpoint.default_options["host"],
|
|
}
|
|
|
|
# Run the command.
|
|
# We don't capture the output. It's not too chatty, and it always
|
|
# logs the exact same data to `regression.out` anyway.
|
|
with capsys.disabled():
|
|
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
|
|
|
|
check_restored_datadir_content(test_output_dir, env, endpoint)
|