Merge branch 'main' into amasterov/regress-arm

This commit is contained in:
a-masterov
2024-09-10 14:29:04 +02:00
committed by GitHub
149 changed files with 2062 additions and 1191 deletions

View File

@@ -18,8 +18,7 @@ Prerequisites:
Regression tests are in the 'regress' directory. They can be run in
parallel to minimize total runtime. Most regression test sets up their
environment with its own pageservers and safekeepers (but see
`TEST_SHARED_FIXTURES`).
environment with its own pageservers and safekeepers.
'pg_clients' contains tests for connecting with various client
libraries. Each client test uses a Dockerfile that pulls an image that
@@ -74,7 +73,6 @@ This is used to construct full path to the postgres binaries.
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
`TEST_OUTPUT`: Set the directory where test state and test output files
should go.
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
`RUST_LOG`: logging configuration to pass into Neon CLI
Useful parameters and commands:
@@ -259,11 +257,8 @@ compute Postgres nodes. The connections between them can be configured to use JW
authentication tokens, and some other configuration options can be tweaked too.
The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
or make other destructive changes in that environment. Also don't assume that
there are no tenants or branches or data in the cluster. For convenience, there is a
branch called `empty`, though. The convention is to create a test-specific branch of
that and load any test data there, instead of the 'main' branch.
fixture. For convenience, there is a branch called `main` in environments created with
'neon_simple_env', ready to be used in the test.
For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
fixture:

View File

@@ -140,6 +140,14 @@ class TenantId(Id):
return self.id.hex()
class NodeId(Id):
def __repr__(self) -> str:
return f'`NodeId("{self.id.hex()}")'
def __str__(self) -> str:
return self.id.hex()
class TimelineId(Id):
def __repr__(self) -> str:
return f'TimelineId("{self.id.hex()}")'

View File

@@ -57,12 +57,11 @@ from _pytest.fixtures import FixtureRequest
from psycopg2.extensions import connection as PgConnection
from psycopg2.extensions import cursor as PgCursor
from psycopg2.extensions import make_dsn, parse_dsn
from typing_extensions import Literal
from urllib3.util.retry import Retry
from fixtures import overlayfs
from fixtures.broker import NeonBroker
from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
from fixtures.endpoint.http import EndpointHttpClient
from fixtures.log_helper import log
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
@@ -221,33 +220,6 @@ def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
return NeonAPI(neon_api_key, neon_api_base_url)
def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]:
"""Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar.
This function can be used as a scope like this:
@pytest.fixture(scope=shareable_scope)
def myfixture(...)
...
"""
scope: Literal["session", "function"]
if os.environ.get("TEST_SHARED_FIXTURES") is None:
# Create the environment in the per-test output directory
scope = "function"
elif (
os.environ.get("BUILD_TYPE") is not None
and os.environ.get("DEFAULT_PG_VERSION") is not None
):
scope = "session"
else:
pytest.fail(
"Shared environment(TEST_SHARED_FIXTURES) requires BUILD_TYPE and DEFAULT_PG_VERSION to be set",
pytrace=False,
)
return scope
@pytest.fixture(scope="session")
def worker_port_num():
return (32768 - BASE_PORT) // int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1"))
@@ -758,6 +730,9 @@ class NeonEnvBuilder:
patch_script = ""
for ps in self.env.pageservers:
patch_script += f"UPDATE nodes SET listen_http_port={ps.service_port.http}, listen_pg_port={ps.service_port.pg} WHERE node_id = '{ps.id}';"
# This is a temporary to get the backward compat test happy
# since the compat snapshot was generated with an older version of neon local
patch_script += f"UPDATE nodes SET availability_zone_id='{ps.az_id}' WHERE node_id = '{ps.id}' AND availability_zone_id IS NULL;"
patch_script_path.write_text(patch_script)
# Update the config with info about tenants and timelines
@@ -1428,8 +1403,8 @@ class NeonEnv:
return "ep-" + str(self.endpoint_counter)
@pytest.fixture(scope=shareable_scope)
def _shared_simple_env(
@pytest.fixture(scope="function")
def neon_simple_env(
request: FixtureRequest,
pytestconfig: Config,
port_distributor: PortDistributor,
@@ -1447,19 +1422,13 @@ def _shared_simple_env(
pageserver_io_buffer_alignment: Optional[int],
) -> Iterator[NeonEnv]:
"""
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
is set, this is shared by all tests using `neon_simple_env`.
Simple Neon environment, with no authentication and no safekeepers.
This fixture will use RemoteStorageKind.LOCAL_FS with pageserver.
"""
if os.environ.get("TEST_SHARED_FIXTURES") is None:
# Create the environment in the per-test output directory
repo_dir = get_test_repo_dir(request, top_output_dir)
else:
# We're running shared fixtures. Share a single directory.
repo_dir = top_output_dir / "shared_repo"
shutil.rmtree(repo_dir, ignore_errors=True)
# Create the environment in the per-test output directory
repo_dir = get_test_repo_dir(request, top_output_dir)
with NeonEnvBuilder(
top_output_dir=top_output_dir,
@@ -1481,27 +1450,9 @@ def _shared_simple_env(
) as builder:
env = builder.init_start()
# For convenience in tests, create a branch from the freshly-initialized cluster.
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
yield env
@pytest.fixture(scope="function")
def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
"""
Simple Neon environment, with no authentication and no safekeepers.
If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
environment for all tests that use 'neon_simple_env', keeping the
page server and safekeepers running. Any compute nodes are stopped after
each the test, however.
"""
yield _shared_simple_env
_shared_simple_env.endpoints.stop_all()
@pytest.fixture(scope="function")
def neon_env_builder(
pytestconfig: Config,
@@ -2557,7 +2508,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
def tenant_describe(self, tenant_id: TenantId):
"""
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int}
:return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int, preferred_az_id: str}
"""
response = self.request(
"GET",
@@ -2567,6 +2518,30 @@ class NeonStorageController(MetricsGetter, LogUtils):
response.raise_for_status()
return response.json()
def nodes(self):
"""
:return: list of {"id": ""}
"""
response = self.request(
"GET",
f"{self.api}/control/v1/node",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
return response.json()
def node_shards(self, node_id: NodeId):
"""
:return: list of {"shard_id": "", "is_secondary": bool}
"""
response = self.request(
"GET",
f"{self.api}/control/v1/node/{node_id}/shards",
headers=self.headers(TokenScope.ADMIN),
)
response.raise_for_status()
return response.json()
def tenant_shard_split(
self, tenant_id: TenantId, shard_count: int, shard_stripe_size: Optional[int] = None
) -> list[TenantShardId]:
@@ -2883,6 +2858,17 @@ class NeonStorageController(MetricsGetter, LogUtils):
return None
raise e
def set_preferred_azs(self, preferred_azs: dict[TenantShardId, str]) -> list[TenantShardId]:
response = self.request(
"PUT",
f"{self.api}/control/v1/preferred_azs",
headers=self.headers(TokenScope.ADMIN),
json={str(tid): az for tid, az in preferred_azs.items()},
)
response.raise_for_status()
return [TenantShardId.parse(tid) for tid in response.json()["updated"]]
def __enter__(self) -> "NeonStorageController":
return self
@@ -4860,14 +4846,7 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
# This is autouse, so the test output directory always gets created, even
# if a test doesn't put anything there. It also solves a problem with the
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
# creates the repo in the test output directory. But it cannot depend on
# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
# it has 'session' scope and cannot access fixtures with 'function'
# scope. So it uses the get_test_output_dir() function to get the path, and
# this fixture ensures that the directory exists. That works because
# 'autouse' fixtures are run before other fixtures.
# if a test doesn't put anything there.
#
# NB: we request the overlay dir fixture so the fixture does its cleanups
@pytest.fixture(scope="function", autouse=True)

View File

@@ -50,6 +50,19 @@ class SafekeeperMetrics(Metrics):
).value
@dataclass
class TermBumpResponse:
previous_term: int
current_term: int
@classmethod
def from_json(cls, d: Dict[str, Any]) -> "TermBumpResponse":
return TermBumpResponse(
previous_term=d["previous_term"],
current_term=d["current_term"],
)
class SafekeeperHttpClient(requests.Session, MetricsGetter):
HTTPError = requests.HTTPError
@@ -252,6 +265,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
res.raise_for_status()
return res.json()
def term_bump(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
term: Optional[int],
) -> TermBumpResponse:
body = {}
if term is not None:
body["term"] = term
res = self.post(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/term_bump",
json=body,
)
res.raise_for_status()
return TermBumpResponse.from_json(res.json())
def record_safekeeper_info(self, tenant_id: TenantId, timeline_id: TimelineId, body):
res = self.post(
f"http://localhost:{self.port}/v1/record_safekeeper_info/{tenant_id}/{timeline_id}",

View File

@@ -22,10 +22,8 @@ if TYPE_CHECKING:
def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("test_logical_replication", "empty")
endpoint = env.endpoints.create_start("test_logical_replication")
endpoint = env.endpoints.create_start("main")
log.info("postgres is running on 'test_logical_replication' branch")
pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
endpoint.safe_psql("create publication pub1 for table pgbench_accounts, pgbench_history")

View File

@@ -8,11 +8,10 @@ from fixtures.neon_fixtures import NeonEnv
#
def test_basebackup_error(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_basebackup_error", "empty")
pageserver_http = env.pageserver.http_client()
# Introduce failpoint
pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))
with pytest.raises(Exception, match="basebackup-before-control-file"):
env.endpoints.create_start("test_basebackup_error")
env.endpoints.create_start("main")

View File

@@ -11,7 +11,6 @@ from fixtures.utils import query_scalar
#
def test_clog_truncate(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_clog_truncate", "empty")
# set aggressive autovacuum to make sure that truncation will happen
config = [
@@ -24,7 +23,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
"autovacuum_freeze_max_age=100000",
]
endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
endpoint = env.endpoints.create_start("main", config_lines=config)
# Install extension containing function needed for test
endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
@@ -58,7 +57,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# create new branch after clog truncation and start a compute node on it
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
env.neon_cli.create_branch(
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
)
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

View File

@@ -4,9 +4,8 @@ from fixtures.neon_fixtures import NeonEnv
def test_compute_catalog(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_config", "empty")
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
client = endpoint.http_client()
objects = client.dbs_and_roles()

View File

@@ -9,10 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
#
def test_config(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_config", "empty")
# change config
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:

View File

@@ -17,9 +17,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
if env.pg_version == PgVersion.V14 and strategy == "wal_log":
pytest.skip("wal_log strategy not supported on PostgreSQL 14")
env.neon_cli.create_branch("test_createdb", "empty")
endpoint = env.endpoints.create_start("test_createdb")
endpoint = env.endpoints.create_start("main")
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
@@ -33,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
@@ -62,8 +60,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
#
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_dropdb", "empty")
endpoint = env.endpoints.create_start("test_dropdb")
endpoint = env.endpoints.create_start("main")
with endpoint.cursor() as cur:
cur.execute("CREATE DATABASE foodb")
@@ -80,14 +77,10 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create two branches before and after database drop.
env.neon_cli.create_branch(
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
)
env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
endpoint_before = env.endpoints.create_start("test_before_dropdb")
env.neon_cli.create_branch(
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
)
env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
endpoint_after = env.endpoints.create_start("test_after_dropdb")
# Test that database exists on the branch before drop

View File

@@ -7,8 +7,7 @@ from fixtures.utils import query_scalar
#
def test_createuser(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_createuser", "empty")
endpoint = env.endpoints.create_start("test_createuser")
endpoint = env.endpoints.create_start("main")
with endpoint.cursor() as cur:
# Cause a 'relmapper' change in the original branch
@@ -19,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createuser2")
# Test that you can connect to new branch as a new user

View File

@@ -290,9 +290,8 @@ def assert_db_connlimit(endpoint: Any, db_name: str, connlimit: int, msg: str):
# Here we test the latter. The first one is tested in test_ddl_forwarding
def test_ddl_forwarding_invalid_db(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_ddl_forwarding_invalid_db", "empty")
endpoint = env.endpoints.create_start(
"test_ddl_forwarding_invalid_db",
"main",
# Some non-existent url
config_lines=["neon.console_url=http://localhost:9999/unknown/api/v0/roles_and_databases"],
)

View File

@@ -10,11 +10,9 @@ def test_explain_with_lfc_stats(neon_simple_env: NeonEnv):
cache_dir = Path(env.repo_dir) / "file_cache"
cache_dir.mkdir(exist_ok=True)
branchname = "test_explain_with_lfc_stats"
env.neon_cli.create_branch(branchname, "empty")
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
endpoint = env.endpoints.create_start(
branchname,
"main",
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",

View File

@@ -16,9 +16,8 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
@pytest.mark.timeout(600)
def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_lfc_resize", "empty")
endpoint = env.endpoints.create_start(
"test_lfc_resize",
"main",
config_lines=[
"neon.file_cache_path='file.cache'",
"neon.max_file_cache_size=512MB",

View File

@@ -12,11 +12,9 @@ def test_lfc_working_set_approximation(neon_simple_env: NeonEnv):
cache_dir = Path(env.repo_dir) / "file_cache"
cache_dir.mkdir(exist_ok=True)
branchname = "test_approximate_working_set_size"
env.neon_cli.create_branch(branchname, "empty")
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
endpoint = env.endpoints.create_start(
branchname,
"main",
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",

View File

@@ -5,7 +5,7 @@ import threading
import time
from typing import List
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import query_scalar
@@ -15,11 +15,8 @@ def test_local_file_cache_unlink(neon_env_builder: NeonEnvBuilder):
cache_dir = os.path.join(env.repo_dir, "file_cache")
os.mkdir(cache_dir)
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
env.neon_cli.create_branch("test_local_file_cache_unlink", "empty")
endpoint = env.endpoints.create_start(
"test_local_file_cache_unlink",
"main",
config_lines=[
"shared_buffers='1MB'",
f"neon.file_cache_path='{cache_dir}/file.cache'",

View File

@@ -36,10 +36,8 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_logical_replication", "empty")
endpoint = env.endpoints.create_start(
"test_logical_replication", config_lines=["log_statement=all"]
)
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main", config_lines=["log_statement=all"])
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
@@ -185,10 +183,9 @@ def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("test_logical_replication", "empty")
# set low neon.logical_replication_max_snap_files
endpoint = env.endpoints.create_start(
"test_logical_replication",
"main",
config_lines=["log_statement=all", "neon.logical_replication_max_snap_files=1"],
)
@@ -472,7 +469,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env
env.neon_cli.create_branch("test_replication_shutdown_publisher", "empty")
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
pub = env.endpoints.create("test_replication_shutdown_publisher")
env.neon_cli.create_branch("test_replication_shutdown_subscriber")

View File

@@ -9,9 +9,8 @@ if TYPE_CHECKING:
def test_migrations(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_migrations", "empty")
endpoint = env.endpoints.create("test_migrations")
endpoint = env.endpoints.create("main")
endpoint.respec(skip_pg_catalog_updates=False)
endpoint.start()

View File

@@ -14,8 +14,7 @@ from fixtures.utils import query_scalar
#
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_multixact", "empty")
endpoint = env.endpoints.create_start("test_multixact")
endpoint = env.endpoints.create_start("main")
cur = endpoint.connect().cursor()
cur.execute(
@@ -73,7 +72,9 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
assert int(next_multixact_id) > int(next_multixact_id_old)
# Branch at this point
env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
env.neon_cli.create_branch(
"test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
)
endpoint_new = env.endpoints.create_start("test_multixact_new")
next_multixact_id_new = endpoint_new.safe_psql(

View File

@@ -6,7 +6,7 @@ from fixtures.utils import wait_until
def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
env = neon_simple_env
env.neon_cli.create_branch("test_neon_superuser_publisher", "empty")
env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
pub = env.endpoints.create("test_neon_superuser_publisher")
env.neon_cli.create_branch("test_neon_superuser_subscriber")

View File

@@ -41,8 +41,7 @@ async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
# Load data into one table with COPY TO from 5 parallel connections
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
env = neon_simple_env
env.neon_cli.create_branch("test_parallel_copy", "empty")
endpoint = env.endpoints.create_start("test_parallel_copy")
endpoint = env.endpoints.create_start("main")
# Create test table
conn = endpoint.connect()

View File

@@ -42,11 +42,9 @@ def test_cancellations(neon_simple_env: NeonEnv):
ps_http = ps.http_client()
ps_http.is_testing_enabled_or_skip()
env.neon_cli.create_branch("test_config", "empty")
# We don't want to have any racy behaviour with autovacuum IOs
ep = env.endpoints.create_start(
"test_config",
"main",
config_lines=[
"autovacuum = off",
"shared_buffers = 128MB",

View File

@@ -22,8 +22,8 @@ def check_wal_segment(pg_waldump_path: str, segment_path: str, test_output_dir):
def test_pg_waldump(neon_simple_env: NeonEnv, test_output_dir, pg_bin: PgBin):
env = neon_simple_env
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_pg_waldump", "empty")
endpoint = env.endpoints.create_start("test_pg_waldump")
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main")
cur = endpoint.connect().cursor()
cur.execute(

View File

@@ -15,12 +15,8 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
#
def test_read_validation(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation", "empty")
endpoint = env.endpoints.create_start(
"test_read_validation",
)
endpoint = env.endpoints.create_start("main")
with closing(endpoint.connect()) as con:
with con.cursor() as c:
for e in extensions:
@@ -131,13 +127,9 @@ def test_read_validation(neon_simple_env: NeonEnv):
def test_read_validation_neg(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_read_validation_neg", "empty")
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
endpoint = env.endpoints.create_start(
"test_read_validation_neg",
)
endpoint = env.endpoints.create_start("main")
with closing(endpoint.connect()) as con:
with con.cursor() as c:

View File

@@ -22,8 +22,7 @@ from fixtures.utils import query_scalar
#
def test_readonly_node(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_readonly_node", "empty")
endpoint_main = env.endpoints.create_start("test_readonly_node")
endpoint_main = env.endpoints.create_start("main")
env.pageserver.allowed_errors.extend(
[
@@ -74,12 +73,12 @@ def test_readonly_node(neon_simple_env: NeonEnv):
# Create first read-only node at the point where only 100 rows were inserted
endpoint_hundred = env.endpoints.create_start(
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
branch_name="main", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
)
# And another at the point where 200100 rows were inserted
endpoint_more = env.endpoints.create_start(
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b
branch_name="main", endpoint_id="ep-readonly_node_more", lsn=lsn_b
)
# On the 'hundred' node, we should see only 100 rows
@@ -100,7 +99,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
# Check creating a node at segment boundary
endpoint = env.endpoints.create_start(
branch_name="test_readonly_node",
branch_name="main",
endpoint_id="ep-branch_segment_boundary",
lsn=Lsn("0/3000000"),
)
@@ -112,7 +111,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
with pytest.raises(Exception, match="invalid basebackup lsn"):
# compute node startup with invalid LSN should fail
env.endpoints.create_start(
branch_name="test_readonly_node",
branch_name="main",
endpoint_id="ep-readonly_node_preinitdb",
lsn=Lsn("0/42"),
)
@@ -218,14 +217,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
# Similar test, but with more data, and we force checkpoints
def test_timetravel(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
env.neon_cli.create_branch("test_timetravel", "empty")
endpoint = env.endpoints.create_start("test_timetravel")
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
client = env.pageserver.http_client()
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
endpoint = env.endpoints.create_start("main")
lsns = []
@@ -249,7 +244,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
# run checkpoint manually to force a new layer file
pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id)
client.timeline_checkpoint(tenant_id, timeline_id)
##### Restart pageserver
env.endpoints.stop_all()
@@ -258,7 +253,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
for i, lsn in lsns:
endpoint_old = env.endpoints.create_start(
branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
branch_name="main", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
)
with endpoint_old.cursor() as cur:
assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000

View File

@@ -1552,6 +1552,12 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
literal_shard_count = 1 if shard_count is None else shard_count
assert len(describe["shards"]) == literal_shard_count
nodes = env.storage_controller.nodes()
assert len(nodes) == 2
describe1 = env.storage_controller.node_shards(nodes[0]["id"])
describe2 = env.storage_controller.node_shards(nodes[1]["id"])
assert len(describe1["shards"]) + len(describe2["shards"]) == literal_shard_count
# Check the data is still there: this implicitly proves that we recovered generation numbers
# properly, for the timeline which was written to after a generation bump.
for timeline, branch, expect_rows in [
@@ -2512,3 +2518,55 @@ def eq_safekeeper_records(a: dict[str, Any], b: dict[str, Any]) -> bool:
del d[key]
return compared[0] == compared[1]
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
def assign_az(ps_cfg):
az = f"az-{ps_cfg['id']}"
ps_cfg["availability_zone"] = az
neon_env_builder.pageserver_config_override = assign_az
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_configs()
env.start()
tids = [TenantId.generate() for _ in range(0, 3)]
for tid in tids:
env.storage_controller.tenant_create(tid)
shards = env.storage_controller.tenant_describe(tid)["shards"]
assert len(shards) == 1
attached_to = shards[0]["node_attached"]
expected_az = env.get_pageserver(attached_to).az_id
assert shards[0]["preferred_az_id"] == expected_az
updated = env.storage_controller.set_preferred_azs(
{TenantShardId(tid, 0, 0): "foo" for tid in tids}
)
assert set(updated) == set([TenantShardId(tid, 0, 0) for tid in tids])
for tid in tids:
shards = env.storage_controller.tenant_describe(tid)["shards"]
assert len(shards) == 1
assert shards[0]["preferred_az_id"] == "foo"
# Generate a layer to avoid shard split handling on ps from tripping
# up on debug assert.
timeline_id = TimelineId.generate()
env.neon_cli.create_timeline("bar", tids[0], timeline_id)
workload = Workload(env, tids[0], timeline_id, branch_name="bar")
workload.init()
workload.write_rows(256)
workload.validate()
env.storage_controller.tenant_shard_split(tids[0], shard_count=2)
shards = env.storage_controller.tenant_describe(tids[0])["shards"]
assert len(shards) == 2
for shard in shards:
attached_to = shard["node_attached"]
expected_az = env.get_pageserver(attached_to).az_id
assert shard["preferred_az_id"] == expected_az

View File

@@ -9,8 +9,7 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
# CLOG.
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_subxacts", "empty")
endpoint = env.endpoints.create_start("test_subxacts")
endpoint = env.endpoints.create_start("main")
pg_conn = endpoint.connect()
cur = pg_conn.cursor()

View File

@@ -68,10 +68,13 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
# construct pair of branches to validate that pageserver prohibits
# deletion of ancestor timelines when they have child branches
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
parent_timeline_id = env.neon_cli.create_branch(
new_branch_name="test_ancestor_branch_delete_parent", ancestor_branch_name="main"
)
leaf_timeline_id = env.neon_cli.create_branch(
"test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent"
new_branch_name="test_ancestor_branch_delete_branch1",
ancestor_branch_name="test_ancestor_branch_delete_parent",
)
timeline_path = env.pageserver.timeline_dir(env.initial_tenant, parent_timeline_id)

View File

@@ -1,17 +1,32 @@
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import List, Optional
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
LogCursor,
NeonEnvBuilder,
NeonPageserver,
)
from fixtures.pageserver.utils import wait_timeline_detail_404
def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
@pytest.mark.parametrize("sharded", [True, False])
def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool):
neon_env_builder.num_pageservers = 2 if sharded else 1
env = neon_env_builder.init_start(
initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"}
initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"},
initial_tenant_shard_count=2 if sharded else None,
)
ps = env.pageserver
http = ps.http_client()
if sharded:
http = env.storage_controller.pageserver_api()
else:
http = env.pageserver.http_client()
pss = ManyPageservers(list(map(lambda ps: ScrollableLog(ps, None), env.pageservers)))
foo_branch = env.neon_cli.create_branch("foo", "main", env.initial_tenant)
@@ -22,9 +37,8 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
tenant_before = http.tenant_status(env.initial_tenant)
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_active_line)
assert ps.log_contains(gc_skipped_line, offset) is None
pss.assert_log_contains(gc_active_line)
pss.assert_log_does_not_contain(gc_skipped_line)
http.timeline_block_gc(env.initial_tenant, foo_branch)
@@ -34,34 +48,78 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }"
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
pss.assert_log_contains(gc_skipped_line)
ps.restart()
ps.quiesce_tenants()
pss.restart()
pss.quiesce_tenants()
_, offset = env.pageserver.assert_log_contains(init_gc_skipped, offset)
pss.assert_log_contains(init_gc_skipped)
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
pss.assert_log_contains(gc_skipped_line)
# deletion unblocks gc
http.timeline_delete(env.initial_tenant, foo_branch)
wait_timeline_detail_404(http, env.initial_tenant, foo_branch, 10, 1.0)
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_active_line, offset)
pss.assert_log_contains(gc_active_line)
http.timeline_block_gc(env.initial_tenant, env.initial_timeline)
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_skipped_line, offset)
pss.assert_log_contains(gc_skipped_line)
# removing the manual block also unblocks gc
http.timeline_unblock_gc(env.initial_tenant, env.initial_timeline)
wait_for_another_gc_round()
_, offset = ps.assert_log_contains(gc_active_line, offset)
pss.assert_log_contains(gc_active_line)
def wait_for_another_gc_round():
time.sleep(2)
@dataclass
class ScrollableLog:
pageserver: NeonPageserver
offset: Optional[LogCursor]
def assert_log_contains(self, what: str):
msg, offset = self.pageserver.assert_log_contains(what, offset=self.offset)
old = self.offset
self.offset = offset
log.info(f"{old} -> {offset}: {msg}")
def assert_log_does_not_contain(self, what: str):
assert self.pageserver.log_contains(what) is None
@dataclass(frozen=True)
class ManyPageservers:
many: List[ScrollableLog]
def assert_log_contains(self, what: str):
for one in self.many:
one.assert_log_contains(what)
def assert_log_does_not_contain(self, what: str):
for one in self.many:
one.assert_log_does_not_contain(what)
def restart(self):
def do_restart(x: ScrollableLog):
x.pageserver.restart()
with ThreadPoolExecutor(max_workers=len(self.many)) as rt:
rt.map(do_restart, self.many)
rt.shutdown(wait=True)
def quiesce_tenants(self):
def do_quiesce(x: ScrollableLog):
x.pageserver.quiesce_tenants()
with ThreadPoolExecutor(max_workers=len(self.many)) as rt:
rt.map(do_quiesce, self.many)
rt.shutdown(wait=True)

View File

@@ -36,7 +36,7 @@ from fixtures.utils import get_timeline_dir_size, wait_until
def test_timeline_size(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "main")
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -68,7 +68,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "empty")
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "main")
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)

View File

@@ -9,10 +9,7 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
#
def test_twophase(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_twophase", "empty")
endpoint = env.endpoints.create_start(
"test_twophase", config_lines=["max_prepared_transactions=5"]
)
endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=5"])
conn = endpoint.connect()
cur = conn.cursor()
@@ -56,7 +53,7 @@ def test_twophase(neon_simple_env: NeonEnv):
assert len(twophase_files) == 2
# Create a branch with the transaction in prepared state
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "main")
# Start compute on the new branch
endpoint2 = env.endpoints.create_start(

View File

@@ -9,8 +9,7 @@ from fixtures.pg_version import PgVersion
#
def test_unlogged(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_unlogged", "empty")
endpoint = env.endpoints.create_start("test_unlogged")
endpoint = env.endpoints.create_start("main")
conn = endpoint.connect()
cur = conn.cursor()
@@ -22,7 +21,7 @@ def test_unlogged(neon_simple_env: NeonEnv):
cur.execute("INSERT INTO iut (id) values (42);")
# create another compute to fetch inital empty contents from pageserver
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged")
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "main")
endpoint2 = env.endpoints.create_start("test_unlogged_basebackup")
conn2 = endpoint2.connect()

View File

@@ -13,8 +13,7 @@ from fixtures.utils import query_scalar
def test_vm_bit_clear(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
endpoint = env.endpoints.create_start("test_vm_bit_clear")
endpoint = env.endpoints.create_start("main")
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
@@ -58,7 +57,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
cur.execute("UPDATE vmtest_cold_update2 SET id = 5000, filler=repeat('x', 200) WHERE id = 1")
# Branch at this point, to test that later
fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear")
fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "main")
# Clear the buffer cache, to force the VM page to be re-fetched from
# the page server

View File

@@ -1057,6 +1057,24 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder):
endpoint.start()
# Try restarting endpoint immediately after xlog switch.
# https://github.com/neondatabase/neon/issues/8911
def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("create table t (i int)")
endpoint.safe_psql("SELECT pg_switch_wal()")
# we want immediate shutdown to have endpoint restart on xlog switch record,
# so prevent shutdown checkpoint.
endpoint.stop(mode="immediate")
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("SELECT 'works'")
# Context manager which logs passed time on exit.
class DurationLogger:
def __init__(self, desc):
@@ -2176,6 +2194,43 @@ def test_patch_control_file(neon_env_builder: NeonEnvBuilder):
assert res["timelines"][0]["control_file"]["timeline_start_lsn"] == "0/1"
def test_term_bump(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main")
# initialize safekeeper
endpoint.safe_psql("create table t(key int, value text)")
http_cli = env.safekeepers[0].http_client()
# check that bump up to specific term works
curr_term = http_cli.timeline_status(tenant_id, timeline_id).term
bump_to = curr_term + 3
res = http_cli.term_bump(tenant_id, timeline_id, bump_to)
log.info(f"bump to {bump_to} res: {res}")
assert res.current_term >= bump_to
# check that bump to none increments current term
res = http_cli.term_bump(tenant_id, timeline_id, None)
log.info(f"bump to None res: {res}")
assert res.current_term > bump_to
assert res.current_term > res.previous_term
# check that bumping doesn't work downward
res = http_cli.term_bump(tenant_id, timeline_id, 2)
log.info(f"bump to 2 res: {res}")
assert res.current_term > bump_to
assert res.current_term == res.previous_term
# check that this doesn't kill endpoint because last WAL flush was his and
# thus its basebackup is still good
endpoint.safe_psql("insert into t values (1, 'payload')")
# Test disables periodic pushes from safekeeper to the broker and checks that
# pageserver can still discover safekeepers with discovery requests.
def test_broker_discovery(neon_env_builder: NeonEnvBuilder):

View File

@@ -23,8 +23,7 @@ run_broken = pytest.mark.skipif(
def test_broken(neon_simple_env: NeonEnv, pg_bin):
env = neon_simple_env
env.neon_cli.create_branch("test_broken", "empty")
env.endpoints.create_start("test_broken")
env.endpoints.create_start("main")
log.info("postgres is running")
log.info("THIS NEXT COMMAND WILL FAIL:")