neon/test_runner/regress/test_pg_regress.py

#
# This file runs pg_regress-based tests.
#
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING, cast

import pytest
from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    check_restored_datadir_content,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import s3_storage

if TYPE_CHECKING:
    from typing import Optional

    from fixtures.neon_fixtures import PgBin
    from pytest import CaptureFixture


# Run the main PostgreSQL regression tests, in src/test/regress.
#
@pytest.mark.parametrize("shard_count", [None, 4])
def test_pg_regress(
    neon_env_builder: NeonEnvBuilder,
    test_output_dir: Path,
    build_type: str,
    pg_bin: PgBin,
    capsys: CaptureFixture[str],
    base_dir: Path,
    pg_distrib_dir: Path,
    shard_count: Optional[int],
):
    DBNAME = "regression"

    """
    :param shard_count: if None, create an unsharded tenant.  Otherwise create a tenant with this
                        many shards.
    """
    if shard_count is not None:
        neon_env_builder.num_pageservers = shard_count

    if build_type == "debug":
        # Disable vectored read path cross validation since it makes the test time out.
        neon_env_builder.pageserver_config_override = "validate_vectored_get=false"

    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    endpoint = env.endpoints.create_start("main")
    endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")

    # Create some local directories for pg_regress to run in.
    runpath = test_output_dir / "regress"
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
    pg_regress = build_path / "pg_regress"

    pg_regress_command = [
        str(pg_regress),
        '--bindir=""',
        "--use-existing",
        f"--bindir={bindir}",
        f"--dlpath={build_path}",
        f"--schedule={schedule}",
        f"--inputdir={src_path}",
    ]

    env_vars = {
        "PGPORT": str(endpoint.default_options["port"]),
        "PGUSER": endpoint.default_options["user"],
        "PGHOST": endpoint.default_options["host"],
    }

    # Run the command.
    # We don't capture the output. It's not too chatty, and it always
    # logs the exact same data to `regression.out` anyway.
    with capsys.disabled():
        pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

        ignored_files: Optional[list[str]] = None

        # Neon handles unlogged relations in a special manner. During a
        # basebackup, we ship the init fork as the main fork. This presents a
        # problem in that the endpoint's data directory and the basebackup will
        # have differences and will fail the eventual file comparison.
        #
        # Unlogged tables were introduced in version 9.1. ALTER TABLE grew
        # support for setting the persistence of a table in 9.5. The reason that
        # this doesn't affect versions < 15 (but probably would between 9.1 and
        # 9.5) is that all the regression tests that deal with unlogged tables
        # up until that point dropped the unlogged tables or set them to logged
        # at some point during the test.
        #
        # In version 15, Postgres grew support for unlogged sequences, and with
        # that came a few more regression tests. These tests did not all drop
        # the unlogged tables/sequences prior to finishing.
        #
        # But unlogged sequences came with a bug in that, sequences didn't
        # inherit the persistence of their "parent" tables if they had one. This
        # was fixed and backported to 15, thus exacerbating our problem a bit.
        #
        # So what we can do is just ignore file differences between the data
        # directory and basebackup for unlogged relations.
        results = cast(
            "list[tuple[str, str]]",
            endpoint.safe_psql(
                """
            SELECT
                relkind,
                pg_relation_filepath(
                    pg_filenode_relation(reltablespace, relfilenode)
                ) AS unlogged_relation_paths
            FROM pg_class
            WHERE relpersistence = 'u'
            """,
                dbname=DBNAME,
            ),
        )

        unlogged_relation_files: list[str] = []
        for r in results:
            unlogged_relation_files.append(r[1])
            # This is related to the following Postgres commit:
            #
            # commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
            # Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
            # Date:   2023-08-23 09:21:31 -0500
            #
            # Use the buffer cache when initializing an unlogged index.
            #
            # This patch was backpatched to 16. Without it, the LSN in the
            # page header would be 0/0 in the data directory, which wouldn't
            # match the LSN generated during the basebackup, thus creating
            # a difference.
            if env.pg_version <= PgVersion.V15 and r[0] == "i":
                unlogged_relation_files.append(f"{r[1]}_init")

        ignored_files = unlogged_relation_files

        check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)


# Run the PostgreSQL "isolation" tests, in src/test/isolation.
#
@pytest.mark.parametrize("shard_count", [None, 4])
def test_isolation(
    neon_env_builder: NeonEnvBuilder,
    test_output_dir: Path,
    pg_bin: PgBin,
    capsys: CaptureFixture[str],
    base_dir: Path,
    pg_distrib_dir: Path,
    shard_count: Optional[int],
):
    if shard_count is not None:
        neon_env_builder.num_pageservers = shard_count
    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    # isolation tests use prepared transactions, so enable them
    endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=100"])
    endpoint.safe_psql("CREATE DATABASE isolation_regression")

    # Create some local directories for pg_isolation_regress to run in.
    runpath = test_output_dir / "regress"
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_isolation_regress will need.
    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "isolation_schedule"
    pg_isolation_regress = build_path / "pg_isolation_regress"

    pg_isolation_regress_command = [
        str(pg_isolation_regress),
        "--use-existing",
        f"--bindir={bindir}",
        f"--dlpath={build_path}",
        f"--inputdir={src_path}",
        f"--schedule={schedule}",
    ]

    env_vars = {
        "PGPORT": str(endpoint.default_options["port"]),
        "PGUSER": endpoint.default_options["user"],
        "PGHOST": endpoint.default_options["host"],
    }

    # Run the command.
    # We don't capture the output. It's not too chatty, and it always
    # logs the exact same data to `regression.out` anyway.
    with capsys.disabled():
        pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)


# Run extra Neon-specific pg_regress-based tests. The tests and their
# schedule file are in the sql_regress/ directory.
@pytest.mark.parametrize("shard_count", [None, 4])
def test_sql_regress(
    neon_env_builder: NeonEnvBuilder,
    test_output_dir: Path,
    pg_bin: PgBin,
    capsys: CaptureFixture[str],
    base_dir: Path,
    pg_distrib_dir: Path,
    shard_count: Optional[int],
):
    if shard_count is not None:
        neon_env_builder.num_pageservers = shard_count
    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
    neon_env_builder.enable_scrub_on_exit()
    env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)

    # Connect to postgres and create a database called "regression".
    endpoint = env.endpoints.create_start("main")
    endpoint.safe_psql("CREATE DATABASE regression")

    # Create some local directories for pg_regress to run in.
    runpath = test_output_dir / "regress"
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
    # This test runs neon specific tests
    build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
    src_path = base_dir / "test_runner/sql_regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
    pg_regress = build_path / "pg_regress"

    pg_regress_command = [
        str(pg_regress),
        "--use-existing",
        f"--bindir={bindir}",
        f"--dlpath={build_path}",
        f"--schedule={schedule}",
        f"--inputdir={src_path}",
    ]

    env_vars = {
        "PGPORT": str(endpoint.default_options["port"]),
        "PGUSER": endpoint.default_options["user"],
        "PGHOST": endpoint.default_options["host"],
    }

    # Run the command.
    # We don't capture the output. It's not too chatty, and it always
    # logs the exact same data to `regression.out` anyway.
    with capsys.disabled():
        pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

        check_restored_datadir_content(test_output_dir, env, endpoint)