mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-20 22:50:38 +00:00
Merge branch 'main' into amasterov/regress-arm
This commit is contained in:
47
test_runner/fixtures/auth_tokens.py
Normal file
47
test_runner/fixtures/auth_tokens.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import jwt
|
||||
|
||||
from fixtures.common_types import TenantId
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuthKeys:
|
||||
priv: str
|
||||
|
||||
def generate_token(self, *, scope: TokenScope, **token_data: Any) -> str:
|
||||
token_data = {key: str(val) for key, val in token_data.items()}
|
||||
token = jwt.encode({"scope": scope, **token_data}, self.priv, algorithm="EdDSA")
|
||||
# cast(Any, self.priv)
|
||||
|
||||
# jwt.encode can return 'bytes' or 'str', depending on Python version or type
|
||||
# hinting or something (not sure what). If it returned 'bytes', convert it to 'str'
|
||||
# explicitly.
|
||||
if isinstance(token, bytes):
|
||||
token = token.decode()
|
||||
|
||||
return token
|
||||
|
||||
def generate_pageserver_token(self) -> str:
|
||||
return self.generate_token(scope=TokenScope.PAGE_SERVER_API)
|
||||
|
||||
def generate_safekeeper_token(self) -> str:
|
||||
return self.generate_token(scope=TokenScope.SAFEKEEPER_DATA)
|
||||
|
||||
# generate token giving access to only one tenant
|
||||
def generate_tenant_token(self, tenant_id: TenantId) -> str:
|
||||
return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))
|
||||
|
||||
|
||||
# TODO: Replace with `StrEnum` when we upgrade to python 3.11
|
||||
class TokenScope(str, Enum):
|
||||
ADMIN = "admin"
|
||||
PAGE_SERVER_API = "pageserverapi"
|
||||
GENERATIONS_API = "generations_api"
|
||||
SAFEKEEPER_DATA = "safekeeperdata"
|
||||
TENANT = "tenant"
|
||||
SCRUBBER = "scrubber"
|
||||
@@ -1,63 +0,0 @@
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
@dataclass
|
||||
class NeonBroker:
|
||||
"""An object managing storage_broker instance"""
|
||||
|
||||
logfile: Path
|
||||
port: int
|
||||
neon_binpath: Path
|
||||
handle: Optional[subprocess.Popen[Any]] = None # handle of running daemon
|
||||
|
||||
def listen_addr(self):
|
||||
return f"127.0.0.1:{self.port}"
|
||||
|
||||
def client_url(self):
|
||||
return f"http://{self.listen_addr()}"
|
||||
|
||||
def check_status(self):
|
||||
return True # TODO
|
||||
|
||||
def try_start(self):
|
||||
if self.handle is not None:
|
||||
log.debug(f"storage_broker is already running on port {self.port}")
|
||||
return
|
||||
|
||||
listen_addr = self.listen_addr()
|
||||
log.info(f'starting storage_broker to listen incoming connections at "{listen_addr}"')
|
||||
with open(self.logfile, "wb") as logfile:
|
||||
args = [
|
||||
str(self.neon_binpath / "storage_broker"),
|
||||
f"--listen-addr={listen_addr}",
|
||||
]
|
||||
self.handle = subprocess.Popen(args, stdout=logfile, stderr=logfile)
|
||||
|
||||
# wait for start
|
||||
started_at = time.time()
|
||||
while True:
|
||||
try:
|
||||
self.check_status()
|
||||
except Exception as e:
|
||||
elapsed = time.time() - started_at
|
||||
if elapsed > 5:
|
||||
raise RuntimeError(
|
||||
f"timed out waiting {elapsed:.0f}s for storage_broker start: {e}"
|
||||
) from e
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
break # success
|
||||
|
||||
def stop(self, immediate: bool = False):
|
||||
if self.handle is not None:
|
||||
if immediate:
|
||||
self.handle.kill()
|
||||
else:
|
||||
self.handle.terminate()
|
||||
self.handle.wait()
|
||||
@@ -13,7 +13,7 @@ DEFAULT_WAL_SEG_SIZE = 16 * 1024 * 1024
|
||||
class Lsn:
|
||||
"""
|
||||
Datatype for an LSN. Internally it is a 64-bit integer, but the string
|
||||
representation is like "1/123abcd". See also pg_lsn datatype in Postgres
|
||||
representation is like "1/0123abcd". See also pg_lsn datatype in Postgres
|
||||
"""
|
||||
|
||||
def __init__(self, x: Union[int, str]):
|
||||
|
||||
@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
|
||||
from contextlib import _GeneratorContextManager, contextmanager
|
||||
|
||||
# Type-related stuff
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterator, List
|
||||
|
||||
import pytest
|
||||
@@ -229,11 +230,11 @@ class VanillaCompare(PgCompare):
|
||||
pass # TODO find something
|
||||
|
||||
def report_size(self):
|
||||
data_size = self.pg.get_subdir_size("base")
|
||||
data_size = self.pg.get_subdir_size(Path("base"))
|
||||
self.zenbenchmark.record(
|
||||
"data_size", data_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
|
||||
)
|
||||
wal_size = self.pg.get_subdir_size("pg_wal")
|
||||
wal_size = self.pg.get_subdir_size(Path("pg_wal"))
|
||||
self.zenbenchmark.record(
|
||||
"wal_size", wal_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
|
||||
)
|
||||
|
||||
@@ -43,7 +43,6 @@ from urllib.parse import quote, urlparse
|
||||
import asyncpg
|
||||
import backoff
|
||||
import httpx
|
||||
import jwt
|
||||
import psycopg2
|
||||
import psycopg2.sql
|
||||
import pytest
|
||||
@@ -60,7 +59,7 @@ from psycopg2.extensions import make_dsn, parse_dsn
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from fixtures import overlayfs
|
||||
from fixtures.broker import NeonBroker
|
||||
from fixtures.auth_tokens import AuthKeys, TokenScope
|
||||
from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
|
||||
from fixtures.endpoint.http import EndpointHttpClient
|
||||
from fixtures.log_helper import log
|
||||
@@ -93,6 +92,7 @@ from fixtures.utils import (
|
||||
allure_add_grafana_links,
|
||||
allure_attach_from_dir,
|
||||
assert_no_errors,
|
||||
get_dir_size,
|
||||
get_self_dir,
|
||||
print_gc_result,
|
||||
subprocess_capture,
|
||||
@@ -158,7 +158,7 @@ def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]:
|
||||
yield binpath
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
@pytest.fixture(scope="session")
|
||||
def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:
|
||||
if env_postgres_bin := os.environ.get("POSTGRES_DISTRIB_DIR"):
|
||||
distrib_dir = Path(env_postgres_bin).resolve()
|
||||
@@ -182,25 +182,6 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]:
|
||||
yield output_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
|
||||
versioned_dir = pg_distrib_dir / pg_version.v_prefixed
|
||||
|
||||
psql_bin_path = versioned_dir / "bin/psql"
|
||||
postgres_bin_path = versioned_dir / "bin/postgres"
|
||||
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# When testing against a remote server, we only need the client binary.
|
||||
if not psql_bin_path.exists():
|
||||
raise Exception(f"psql not found at '{psql_bin_path}'")
|
||||
else:
|
||||
if not postgres_bin_path.exists():
|
||||
raise Exception(f"postgres not found at '{postgres_bin_path}'")
|
||||
|
||||
log.info(f"versioned_pg_distrib_dir is {versioned_dir}")
|
||||
yield versioned_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api_key() -> str:
|
||||
api_key = os.getenv("NEON_API_KEY")
|
||||
@@ -243,36 +224,11 @@ def worker_base_port(worker_seq_no: int, worker_port_num: int) -> int:
|
||||
return BASE_PORT + worker_seq_no * worker_port_num
|
||||
|
||||
|
||||
def get_dir_size(path: str) -> int:
|
||||
"""Return size in bytes."""
|
||||
totalbytes = 0
|
||||
for root, _dirs, files in os.walk(path):
|
||||
for name in files:
|
||||
totalbytes += os.path.getsize(os.path.join(root, name))
|
||||
|
||||
return totalbytes
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def port_distributor(worker_base_port: int, worker_port_num: int) -> PortDistributor:
|
||||
return PortDistributor(base_port=worker_base_port, port_number=worker_port_num)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def default_broker(
|
||||
port_distributor: PortDistributor,
|
||||
test_output_dir: Path,
|
||||
neon_binpath: Path,
|
||||
) -> Iterator[NeonBroker]:
|
||||
# multiple pytest sessions could get launched in parallel, get them different ports/datadirs
|
||||
client_port = port_distributor.get_port()
|
||||
broker_logfile = test_output_dir / "repo" / "storage_broker.log"
|
||||
|
||||
broker = NeonBroker(logfile=broker_logfile, port=client_port, neon_binpath=neon_binpath)
|
||||
yield broker
|
||||
broker.stop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def run_id() -> Iterator[uuid.UUID]:
|
||||
yield uuid.uuid4()
|
||||
@@ -401,44 +357,6 @@ class PgProtocol:
|
||||
return self.safe_psql(query, log_query=log_query)[0][0]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuthKeys:
|
||||
priv: str
|
||||
|
||||
def generate_token(self, *, scope: TokenScope, **token_data: Any) -> str:
|
||||
token_data = {key: str(val) for key, val in token_data.items()}
|
||||
token = jwt.encode({"scope": scope, **token_data}, self.priv, algorithm="EdDSA")
|
||||
# cast(Any, self.priv)
|
||||
|
||||
# jwt.encode can return 'bytes' or 'str', depending on Python version or type
|
||||
# hinting or something (not sure what). If it returned 'bytes', convert it to 'str'
|
||||
# explicitly.
|
||||
if isinstance(token, bytes):
|
||||
token = token.decode()
|
||||
|
||||
return token
|
||||
|
||||
def generate_pageserver_token(self) -> str:
|
||||
return self.generate_token(scope=TokenScope.PAGE_SERVER_API)
|
||||
|
||||
def generate_safekeeper_token(self) -> str:
|
||||
return self.generate_token(scope=TokenScope.SAFEKEEPER_DATA)
|
||||
|
||||
# generate token giving access to only one tenant
|
||||
def generate_tenant_token(self, tenant_id: TenantId) -> str:
|
||||
return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))
|
||||
|
||||
|
||||
# TODO: Replace with `StrEnum` when we upgrade to python 3.11
|
||||
class TokenScope(str, Enum):
|
||||
ADMIN = "admin"
|
||||
PAGE_SERVER_API = "pageserverapi"
|
||||
GENERATIONS_API = "generations_api"
|
||||
SAFEKEEPER_DATA = "safekeeperdata"
|
||||
TENANT = "tenant"
|
||||
SCRUBBER = "scrubber"
|
||||
|
||||
|
||||
class NeonEnvBuilder:
|
||||
"""
|
||||
Builder object to create a Neon runtime environment
|
||||
@@ -453,7 +371,6 @@ class NeonEnvBuilder:
|
||||
self,
|
||||
repo_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
broker: NeonBroker,
|
||||
run_id: uuid.UUID,
|
||||
mock_s3_server: MockS3Server,
|
||||
neon_binpath: Path,
|
||||
@@ -494,7 +411,6 @@ class NeonEnvBuilder:
|
||||
# Safekeepers remote storage
|
||||
self.safekeepers_remote_storage: Optional[RemoteStorage] = None
|
||||
|
||||
self.broker = broker
|
||||
self.run_id = run_id
|
||||
self.mock_s3_server: MockS3Server = mock_s3_server
|
||||
self.pageserver_config_override = pageserver_config_override
|
||||
@@ -553,10 +469,6 @@ class NeonEnvBuilder:
|
||||
self.env = NeonEnv(self)
|
||||
return self.env
|
||||
|
||||
def start(self):
|
||||
assert self.env is not None, "environment is not already initialized, call init() first"
|
||||
self.env.start()
|
||||
|
||||
def init_start(
|
||||
self,
|
||||
initial_tenant_conf: Optional[Dict[str, Any]] = None,
|
||||
@@ -572,7 +484,7 @@ class NeonEnvBuilder:
|
||||
Configuring pageserver with remote storage is now the default. There will be a warning if pageserver is created without one.
|
||||
"""
|
||||
env = self.init_configs(default_remote_storage_if_missing=default_remote_storage_if_missing)
|
||||
self.start()
|
||||
env.start()
|
||||
|
||||
# Prepare the default branch to start the postgres on later.
|
||||
# Pageserver itself does not create tenants and timelines, until started first and asked via HTTP API.
|
||||
@@ -937,8 +849,11 @@ class NeonEnvBuilder:
|
||||
|
||||
for directory_to_clean in reversed(directories_to_clean):
|
||||
if not os.listdir(directory_to_clean):
|
||||
log.debug(f"Removing empty directory {directory_to_clean}")
|
||||
directory_to_clean.rmdir()
|
||||
log.info(f"Removing empty directory {directory_to_clean}")
|
||||
try:
|
||||
directory_to_clean.rmdir()
|
||||
except Exception as e:
|
||||
log.error(f"Error removing empty directory {directory_to_clean}: {e}")
|
||||
|
||||
def cleanup_remote_storage(self):
|
||||
for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
|
||||
@@ -1007,6 +922,8 @@ class NeonEnvBuilder:
|
||||
|
||||
self.env.storage_controller.assert_no_errors()
|
||||
|
||||
self.env.broker.assert_no_errors()
|
||||
|
||||
try:
|
||||
self.overlay_cleanup_teardown()
|
||||
except Exception as e:
|
||||
@@ -1060,7 +977,7 @@ class NeonEnv:
|
||||
self.endpoints = EndpointFactory(self)
|
||||
self.safekeepers: List[Safekeeper] = []
|
||||
self.pageservers: List[NeonPageserver] = []
|
||||
self.broker = config.broker
|
||||
self.broker = NeonBroker(self)
|
||||
self.pageserver_remote_storage = config.pageserver_remote_storage
|
||||
self.safekeepers_remote_storage = config.safekeepers_remote_storage
|
||||
self.pg_version = config.pg_version
|
||||
@@ -1073,9 +990,6 @@ class NeonEnv:
|
||||
self.pg_distrib_dir = config.pg_distrib_dir
|
||||
self.endpoint_counter = 0
|
||||
self.storage_controller_config = config.storage_controller_config
|
||||
|
||||
# generate initial tenant ID here instead of letting 'neon init' generate it,
|
||||
# so that we don't need to dig it out of the config file afterwards.
|
||||
self.initial_tenant = config.initial_tenant
|
||||
self.initial_timeline = config.initial_timeline
|
||||
|
||||
@@ -1238,7 +1152,7 @@ class NeonEnv:
|
||||
max_workers=2 + len(self.pageservers) + len(self.safekeepers)
|
||||
) as executor:
|
||||
futs.append(
|
||||
executor.submit(lambda: self.broker.try_start() or None)
|
||||
executor.submit(lambda: self.broker.start() or None)
|
||||
) # The `or None` is for the linter
|
||||
|
||||
for pageserver in self.pageservers:
|
||||
@@ -1295,7 +1209,7 @@ class NeonEnv:
|
||||
pageserver.stop(immediate=immediate)
|
||||
except RuntimeError:
|
||||
stop_later.append(pageserver)
|
||||
self.broker.stop(immediate=immediate)
|
||||
self.broker.stop()
|
||||
|
||||
# TODO: for nice logging we need python 3.11 ExceptionGroup
|
||||
for ps in stop_later:
|
||||
@@ -1409,7 +1323,6 @@ def neon_simple_env(
|
||||
pytestconfig: Config,
|
||||
port_distributor: PortDistributor,
|
||||
mock_s3_server: MockS3Server,
|
||||
default_broker: NeonBroker,
|
||||
run_id: uuid.UUID,
|
||||
top_output_dir: Path,
|
||||
test_output_dir: Path,
|
||||
@@ -1434,7 +1347,6 @@ def neon_simple_env(
|
||||
top_output_dir=top_output_dir,
|
||||
repo_dir=repo_dir,
|
||||
port_distributor=port_distributor,
|
||||
broker=default_broker,
|
||||
mock_s3_server=mock_s3_server,
|
||||
neon_binpath=neon_binpath,
|
||||
pg_distrib_dir=pg_distrib_dir,
|
||||
@@ -1462,7 +1374,6 @@ def neon_env_builder(
|
||||
neon_binpath: Path,
|
||||
pg_distrib_dir: Path,
|
||||
pg_version: PgVersion,
|
||||
default_broker: NeonBroker,
|
||||
run_id: uuid.UUID,
|
||||
request: FixtureRequest,
|
||||
test_overlay_dir: Path,
|
||||
@@ -1498,7 +1409,6 @@ def neon_env_builder(
|
||||
neon_binpath=neon_binpath,
|
||||
pg_distrib_dir=pg_distrib_dir,
|
||||
pg_version=pg_version,
|
||||
broker=default_broker,
|
||||
run_id=run_id,
|
||||
preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")),
|
||||
pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
|
||||
@@ -1521,14 +1431,6 @@ class PageserverPort:
|
||||
http: int
|
||||
|
||||
|
||||
CREATE_TIMELINE_ID_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"^Created timeline '(?P<timeline_id>[^']+)'", re.MULTILINE
|
||||
)
|
||||
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
|
||||
)
|
||||
|
||||
|
||||
class AbstractNeonCli(abc.ABC):
|
||||
"""
|
||||
A typed wrapper around an arbitrary Neon CLI tool.
|
||||
@@ -1757,6 +1659,9 @@ class NeonCli(AbstractNeonCli):
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
if timeline_id is None:
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
cmd = [
|
||||
"timeline",
|
||||
"create",
|
||||
@@ -1764,23 +1669,16 @@ class NeonCli(AbstractNeonCli):
|
||||
new_branch_name,
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
"--timeline-id",
|
||||
str(timeline_id),
|
||||
"--pg-version",
|
||||
self.env.pg_version,
|
||||
]
|
||||
|
||||
if timeline_id is not None:
|
||||
cmd.extend(["--timeline-id", str(timeline_id)])
|
||||
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
|
||||
|
||||
created_timeline_id = None
|
||||
if matches is not None:
|
||||
created_timeline_id = matches.group("timeline_id")
|
||||
|
||||
return TimelineId(str(created_timeline_id))
|
||||
return timeline_id
|
||||
|
||||
def create_branch(
|
||||
self,
|
||||
@@ -1788,12 +1686,17 @@ class NeonCli(AbstractNeonCli):
|
||||
ancestor_branch_name: Optional[str] = None,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
new_timeline_id: Optional[TimelineId] = None,
|
||||
) -> TimelineId:
|
||||
if new_timeline_id is None:
|
||||
new_timeline_id = TimelineId.generate()
|
||||
cmd = [
|
||||
"timeline",
|
||||
"branch",
|
||||
"--branch-name",
|
||||
new_branch_name,
|
||||
"--timeline-id",
|
||||
str(new_timeline_id),
|
||||
"--tenant-id",
|
||||
str(tenant_id or self.env.initial_tenant),
|
||||
]
|
||||
@@ -1805,16 +1708,7 @@ class NeonCli(AbstractNeonCli):
|
||||
res = self.raw_cli(cmd)
|
||||
res.check_returncode()
|
||||
|
||||
matches = CREATE_TIMELINE_ID_EXTRACTOR.search(res.stdout)
|
||||
|
||||
created_timeline_id = None
|
||||
if matches is not None:
|
||||
created_timeline_id = matches.group("timeline_id")
|
||||
|
||||
if created_timeline_id is None:
|
||||
raise Exception("could not find timeline id after `neon timeline create` invocation")
|
||||
else:
|
||||
return TimelineId(str(created_timeline_id))
|
||||
return TimelineId(str(new_timeline_id))
|
||||
|
||||
def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
|
||||
"""
|
||||
@@ -1823,6 +1717,9 @@ class NeonCli(AbstractNeonCli):
|
||||
|
||||
# main [b49f7954224a0ad25cc0013ea107b54b]
|
||||
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
|
||||
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
|
||||
)
|
||||
res = self.raw_cli(
|
||||
["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
|
||||
)
|
||||
@@ -1933,6 +1830,18 @@ class NeonCli(AbstractNeonCli):
|
||||
args.extend(["-m", "immediate"])
|
||||
return self.raw_cli(args)
|
||||
|
||||
def broker_start(
|
||||
self, timeout_in_seconds: Optional[int] = None
|
||||
) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "start"]
|
||||
if timeout_in_seconds is not None:
|
||||
cmd.append(f"--start-timeout={timeout_in_seconds}s")
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def broker_stop(self) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["storage_broker", "stop"]
|
||||
return self.raw_cli(cmd)
|
||||
|
||||
def endpoint_create(
|
||||
self,
|
||||
branch_name: str,
|
||||
@@ -3355,12 +3264,12 @@ class PgBin:
|
||||
)
|
||||
return base_path
|
||||
|
||||
def get_pg_controldata_checkpoint_lsn(self, pgdata: str) -> Lsn:
|
||||
def get_pg_controldata_checkpoint_lsn(self, pgdata: Path) -> Lsn:
|
||||
"""
|
||||
Run pg_controldata on given datadir and extract checkpoint lsn.
|
||||
"""
|
||||
|
||||
pg_controldata_path = os.path.join(self.pg_bin_path, "pg_controldata")
|
||||
pg_controldata_path = self.pg_bin_path / "pg_controldata"
|
||||
cmd = f"{pg_controldata_path} -D {pgdata}"
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
|
||||
checkpoint_lsn = re.findall(
|
||||
@@ -3443,6 +3352,7 @@ class VanillaPostgres(PgProtocol):
|
||||
assert not self.running
|
||||
with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
|
||||
conf_file.write("\n".join(options))
|
||||
conf_file.write("\n")
|
||||
|
||||
def edit_hba(self, hba: List[str]):
|
||||
"""Prepend hba lines into pg_hba.conf file."""
|
||||
@@ -3468,9 +3378,9 @@ class VanillaPostgres(PgProtocol):
|
||||
self.running = False
|
||||
self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"])
|
||||
|
||||
def get_subdir_size(self, subdir) -> int:
|
||||
def get_subdir_size(self, subdir: Path) -> int:
|
||||
"""Return size of pgdatadir subdirectory in bytes."""
|
||||
return get_dir_size(os.path.join(self.pgdatadir, subdir))
|
||||
return get_dir_size(self.pgdatadir / subdir)
|
||||
|
||||
def __enter__(self) -> "VanillaPostgres":
|
||||
return self
|
||||
@@ -3496,6 +3406,7 @@ def vanilla_pg(
|
||||
pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
|
||||
port = port_distributor.get_port()
|
||||
with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
|
||||
vanilla_pg.configure(["shared_preload_libraries='neon_rmgr'"])
|
||||
yield vanilla_pg
|
||||
|
||||
|
||||
@@ -3996,7 +3907,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
self.env = env
|
||||
self.branch_name: Optional[str] = None # dubious
|
||||
self.endpoint_id: Optional[str] = None # dubious, see asserts below
|
||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||
self.pgdata_dir: Optional[Path] = None # Path to computenode PGDATA
|
||||
self.tenant_id = tenant_id
|
||||
self.pg_port = pg_port
|
||||
self.http_port = http_port
|
||||
@@ -4053,7 +3964,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
allow_multiple=allow_multiple,
|
||||
)
|
||||
path = Path("endpoints") / self.endpoint_id / "pgdata"
|
||||
self.pgdata_dir = os.path.join(self.env.repo_dir, path)
|
||||
self.pgdata_dir = self.env.repo_dir / path
|
||||
self.logfile = self.endpoint_path() / "compute.log"
|
||||
|
||||
config_lines = config_lines or []
|
||||
@@ -4106,21 +4017,21 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
path = Path("endpoints") / self.endpoint_id
|
||||
return self.env.repo_dir / path
|
||||
|
||||
def pg_data_dir_path(self) -> str:
|
||||
def pg_data_dir_path(self) -> Path:
|
||||
"""Path to Postgres data directory"""
|
||||
return os.path.join(self.endpoint_path(), "pgdata")
|
||||
return self.endpoint_path() / "pgdata"
|
||||
|
||||
def pg_xact_dir_path(self) -> str:
|
||||
def pg_xact_dir_path(self) -> Path:
|
||||
"""Path to pg_xact dir"""
|
||||
return os.path.join(self.pg_data_dir_path(), "pg_xact")
|
||||
return self.pg_data_dir_path() / "pg_xact"
|
||||
|
||||
def pg_twophase_dir_path(self) -> str:
|
||||
def pg_twophase_dir_path(self) -> Path:
|
||||
"""Path to pg_twophase dir"""
|
||||
return os.path.join(self.pg_data_dir_path(), "pg_twophase")
|
||||
return self.pg_data_dir_path() / "pg_twophase"
|
||||
|
||||
def config_file_path(self) -> str:
|
||||
def config_file_path(self) -> Path:
|
||||
"""Path to the postgresql.conf in the endpoint directory (not the one in pgdata)"""
|
||||
return os.path.join(self.endpoint_path(), "postgresql.conf")
|
||||
return self.endpoint_path() / "postgresql.conf"
|
||||
|
||||
def config(self, lines: List[str]) -> "Endpoint":
|
||||
"""
|
||||
@@ -4175,7 +4086,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
json.dump(dict(data_dict, **kwargs), file, indent=4)
|
||||
|
||||
# Please note: Migrations only run if pg_skip_catalog_updates is false
|
||||
def wait_for_migrations(self, num_migrations: int = 10):
|
||||
def wait_for_migrations(self, num_migrations: int = 11):
|
||||
with self.cursor() as cur:
|
||||
|
||||
def check_migrations_done():
|
||||
@@ -4285,7 +4196,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
log.info(f'checkpointing at LSN {self.safe_psql("select pg_current_wal_lsn()")[0][0]}')
|
||||
self.safe_psql("checkpoint")
|
||||
assert self.pgdata_dir is not None # please mypy
|
||||
return get_dir_size(os.path.join(self.pgdata_dir, "pg_wal")) / 1024 / 1024
|
||||
return get_dir_size(self.pgdata_dir / "pg_wal") / 1024 / 1024
|
||||
|
||||
def clear_shared_buffers(self, cursor: Optional[Any] = None):
|
||||
"""
|
||||
@@ -4654,6 +4565,40 @@ class Safekeeper(LogUtils):
|
||||
wait_until(20, 0.5, paused)
|
||||
|
||||
|
||||
class NeonBroker(LogUtils):
|
||||
"""An object managing storage_broker instance"""
|
||||
|
||||
def __init__(self, env: NeonEnv):
|
||||
super().__init__(logfile=env.repo_dir / "storage_broker.log")
|
||||
self.env = env
|
||||
self.port: int = self.env.port_distributor.get_port()
|
||||
self.running = False
|
||||
|
||||
def start(
|
||||
self,
|
||||
timeout_in_seconds: Optional[int] = None,
|
||||
):
|
||||
assert not self.running
|
||||
self.env.neon_cli.broker_start(timeout_in_seconds)
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
def stop(self):
|
||||
if self.running:
|
||||
self.env.neon_cli.broker_stop()
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
def listen_addr(self):
|
||||
return f"127.0.0.1:{self.port}"
|
||||
|
||||
def client_url(self):
|
||||
return f"http://{self.listen_addr()}"
|
||||
|
||||
def assert_no_errors(self):
|
||||
assert_no_errors(self.logfile, "storage_controller", [])
|
||||
|
||||
|
||||
# TODO: Replace with `StrEnum` when we upgrade to python 3.11
|
||||
class NodeKind(str, Enum):
|
||||
PAGESERVER = "pageserver"
|
||||
|
||||
@@ -39,7 +39,7 @@ def single_timeline(
|
||||
log.info("detach template tenant form pageserver")
|
||||
env.pageserver.tenant_detach(template_tenant)
|
||||
|
||||
log.info(f"duplicating template tenant {ncopies} times in S3")
|
||||
log.info(f"duplicating template tenant {ncopies} times in remote storage")
|
||||
tenants = fixtures.pageserver.remote_storage.duplicate_tenant(env, template_tenant, ncopies)
|
||||
|
||||
# In theory we could just attach all the tenants, force on-demand downloads via mgmt API, and be done.
|
||||
|
||||
@@ -24,7 +24,7 @@ def build_type() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def platform() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ class PgVersion(str, enum.Enum):
|
||||
V14 = "14"
|
||||
V15 = "15"
|
||||
V16 = "16"
|
||||
V17 = "17"
|
||||
# Instead of making version an optional parameter in methods, we can use this fake entry
|
||||
# to explicitly rely on the default server version (could be different from pg_version fixture value)
|
||||
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"
|
||||
|
||||
@@ -107,7 +107,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
|
||||
env.neon_cli.create_branch("b0")
|
||||
|
||||
endpoint = env.endpoints.create_start("b0")
|
||||
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
|
||||
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
|
||||
|
||||
branch_creation_durations = []
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
|
||||
|
||||
env.neon_cli.create_branch("root")
|
||||
endpoint_root = env.endpoints.create_start("root")
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint_root.connstr(), "-s10"])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", endpoint_root.connstr(), "-s10"])
|
||||
|
||||
fork_at_current_lsn(env, endpoint_root, "child", "root")
|
||||
|
||||
|
||||
@@ -24,13 +24,13 @@ def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
|
||||
|
||||
endpoint.safe_psql("create publication pub1 for table pgbench_accounts, pgbench_history")
|
||||
|
||||
# now start subscriber
|
||||
vanilla_pg.start()
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", vanilla_pg.connstr()])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", vanilla_pg.connstr()])
|
||||
|
||||
vanilla_pg.safe_psql("truncate table pgbench_accounts")
|
||||
vanilla_pg.safe_psql("truncate table pgbench_history")
|
||||
@@ -99,9 +99,9 @@ def test_subscriber_lag(
|
||||
sub_connstr = benchmark_project_sub.connstr
|
||||
|
||||
if benchmark_project_pub.is_new:
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=pub_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=pub_env)
|
||||
if benchmark_project_sub.is_new:
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=sub_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=sub_env)
|
||||
|
||||
pub_conn = psycopg2.connect(pub_connstr)
|
||||
sub_conn = psycopg2.connect(sub_connstr)
|
||||
@@ -193,8 +193,8 @@ def test_publisher_restart(
|
||||
pub_connstr = benchmark_project_pub.connstr
|
||||
sub_connstr = benchmark_project_sub.connstr
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=pub_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=sub_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=pub_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=sub_env)
|
||||
|
||||
pub_conn = psycopg2.connect(pub_connstr)
|
||||
sub_conn = psycopg2.connect(sub_connstr)
|
||||
@@ -288,7 +288,7 @@ def test_snap_files(
|
||||
is_super = cur.fetchall()[0][0]
|
||||
assert is_super, "This benchmark won't work if we don't have superuser"
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=env)
|
||||
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
|
||||
@@ -85,7 +85,7 @@ def test_ro_replica_lag(
|
||||
endpoint_id=replica["endpoint"]["id"],
|
||||
)["uri"]
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s100"], env=master_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=master_env)
|
||||
|
||||
master_workload = pg_bin.run_nonblocking(
|
||||
["pgbench", "-c10", pgbench_duration, "-Mprepared"],
|
||||
@@ -212,7 +212,7 @@ def test_replication_start_stop(
|
||||
for i in range(num_replicas):
|
||||
replica_env[i]["PGHOST"] = replicas[i]["endpoint"]["host"]
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10"], env=master_env)
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10"], env=master_env)
|
||||
|
||||
# Sync replicas
|
||||
with psycopg2.connect(master_connstr) as conn_master:
|
||||
|
||||
@@ -84,7 +84,7 @@ def test_storage_controller_many_tenants(
|
||||
compute_reconfigure_listener.register_on_notify(lambda body: time.sleep(0.01))
|
||||
|
||||
env = neon_env_builder.init_configs()
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
|
||||
# We will intentionally stress reconciler concurrrency, which triggers a warning when lots
|
||||
# of shards are hitting the delayed path.
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"public_extensions": [],
|
||||
"library_index": {
|
||||
"TODO": "We still need PG17 extensions"
|
||||
},
|
||||
"extension_data": {}
|
||||
}
|
||||
@@ -52,7 +52,7 @@ def test_branching_with_pgbench(
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-T15", connstr])
|
||||
|
||||
env.neon_cli.create_branch("b0", tenant_id=tenant)
|
||||
|
||||
@@ -21,7 +21,7 @@ from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
timeline_delete_wait_completed,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.workload import Workload
|
||||
|
||||
@@ -156,6 +156,9 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@skip_on_postgres(
|
||||
PgVersion.V17, "There are no snapshots yet"
|
||||
) # TODO: revert this once we have snapshots
|
||||
def test_backward_compatibility(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
@@ -178,7 +181,7 @@ def test_backward_compatibility(
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
|
||||
env.pageserver.allowed_errors.append(ingest_lag_log_line)
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
|
||||
check_neon_works(
|
||||
env,
|
||||
@@ -203,6 +206,9 @@ def test_backward_compatibility(
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@skip_on_postgres(
|
||||
PgVersion.V17, "There are no snapshots yet"
|
||||
) # TODO: revert this once we have snapshots
|
||||
def test_forward_compatibility(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
test_output_dir: Path,
|
||||
@@ -265,7 +271,7 @@ def test_forward_compatibility(
|
||||
# does not include logs from previous runs
|
||||
assert not env.pageserver.log_contains("git-env:" + prev_pageserver_version)
|
||||
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
|
||||
# ensure the specified pageserver is running
|
||||
assert env.pageserver.log_contains("git-env:" + prev_pageserver_version)
|
||||
|
||||
@@ -291,7 +291,7 @@ def pgbench_init_tenant(
|
||||
)
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
pg_bin.run(["pgbench", "-i", f"-s{scale}", endpoint.connstr()])
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", endpoint.connstr()])
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
return (tenant_id, timeline_id)
|
||||
|
||||
@@ -44,6 +44,8 @@ def test_remote_extensions(
|
||||
):
|
||||
if pg_version == PgVersion.V16:
|
||||
pytest.skip("TODO: PG16 extension building")
|
||||
if pg_version == PgVersion.V17:
|
||||
pytest.skip("TODO: PG17 extension building")
|
||||
|
||||
# setup mock http server
|
||||
# that expects request for anon.tar.zst
|
||||
|
||||
@@ -199,7 +199,7 @@ def test_hot_standby_gc(neon_env_builder: NeonEnvBuilder, pause_apply: bool):
|
||||
def run_pgbench(connstr: str, pg_bin: PgBin):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
# s10 is about 150MB of data. In debug mode init takes about 15s on SSD.
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", connstr])
|
||||
log.info("pgbench init done")
|
||||
pg_bin.run_capture(["pgbench", "-T60", connstr])
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ def test_migrations(neon_simple_env: NeonEnv):
|
||||
endpoint.respec(skip_pg_catalog_updates=False)
|
||||
endpoint.start()
|
||||
|
||||
num_migrations = 10
|
||||
num_migrations = 11
|
||||
endpoint.wait_for_migrations(num_migrations=num_migrations)
|
||||
|
||||
with endpoint.cursor() as cur:
|
||||
|
||||
@@ -134,6 +134,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
|
||||
env.neon_cli.pageserver_stop(env.pageserver.id)
|
||||
env.neon_cli.safekeeper_stop()
|
||||
env.neon_cli.storage_controller_stop(False)
|
||||
env.neon_cli.broker_stop()
|
||||
|
||||
# Keep NeonEnv state up to date, it usually owns starting/stopping services
|
||||
env.pageserver.running = False
|
||||
@@ -176,6 +177,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Stop this to get out of the way of the following `start`
|
||||
env.neon_cli.storage_controller_stop(False)
|
||||
env.neon_cli.broker_stop()
|
||||
|
||||
# Default start
|
||||
res = env.neon_cli.raw_cli(["start"])
|
||||
|
||||
@@ -134,7 +134,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_configs()
|
||||
env.broker.try_start()
|
||||
env.broker.start()
|
||||
for sk in env.safekeepers:
|
||||
sk.start()
|
||||
env.storage_controller.start()
|
||||
|
||||
@@ -74,7 +74,7 @@ def test_metric_collection(
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*metrics endpoint refused the sent metrics*",
|
||||
".*metrics_collection: failed to upload to S3: Failed to upload data of length .* to storage path.*",
|
||||
".*metrics_collection: failed to upload to remote storage: Failed to upload data of length .* to storage path.*",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", f"-T{int(n_reconnects*timeout)}", connstr])
|
||||
|
||||
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
|
||||
|
||||
@@ -19,7 +19,7 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
|
||||
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
|
||||
pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr])
|
||||
|
||||
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
|
||||
|
||||
@@ -20,16 +20,19 @@ def test_postgres_version(base_dir: Path, pg_bin: PgBin, pg_version: PgVersion):
|
||||
output = f.read().strip()
|
||||
|
||||
# `postgres --version` prints something like "postgres (PostgreSQL) 15.6 (85d809c124a898847a97d66a211f7d5ef4f8e0cb)".
|
||||
pattern = r"postgres \(PostgreSQL\) (?P<version>\d+\.\d+) \((?P<commit>[0-9a-f]{40})\)"
|
||||
# beta- and release candidate releases would use '17beta1' and '18rc2' instead of .-separated numbers.
|
||||
pattern = (
|
||||
r"postgres \(PostgreSQL\) (?P<version>\d+(?:beta|rc|\.)\d+) \((?P<commit>[0-9a-f]{40})\)"
|
||||
)
|
||||
match = re.search(pattern, output, re.IGNORECASE)
|
||||
assert match is not None, f"Can't parse {output} with {pattern}"
|
||||
|
||||
version = match.group("version")
|
||||
commit = match.group("commit")
|
||||
|
||||
assert (
|
||||
pg_version.v_prefixed in expected_revisions
|
||||
), f"Version `{pg_version.v_prefixed}` doesn't exist in `vendor/revisions.json`, please update it if these changes are intentional"
|
||||
|
||||
msg = f"Unexpected Postgres {pg_version} version: `{output}`, please update `vendor/revisions.json` if these changes are intentional"
|
||||
assert [version, commit] == expected_revisions[pg_version.v_prefixed], msg
|
||||
if "." in version:
|
||||
assert (
|
||||
pg_version.v_prefixed in expected_revisions
|
||||
), f"Released PostgreSQL version `{pg_version.v_prefixed}` doesn't exist in `vendor/revisions.json`, please update it if these changes are intentional"
|
||||
msg = f"Unexpected Postgres {pg_version} version: `{output}`, please update `vendor/revisions.json` if these changes are intentional"
|
||||
assert [version, commit] == expected_revisions[pg_version.v_prefixed], msg
|
||||
|
||||
@@ -374,7 +374,7 @@ def test_sharding_split_smoke(
|
||||
non_default_tenant_config = {"gc_horizon": 77 * 1024 * 1024}
|
||||
|
||||
env = neon_env_builder.init_configs(True)
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.neon_cli.create_tenant(
|
||||
@@ -1436,7 +1436,7 @@ def test_sharding_unlogged_relation(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_configs()
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
@@ -1475,7 +1475,7 @@ def test_top_tenants(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_configs()
|
||||
neon_env_builder.start()
|
||||
env.start()
|
||||
|
||||
tenants = []
|
||||
n_tenants = 8
|
||||
|
||||
@@ -7,6 +7,7 @@ from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
import pytest
|
||||
from fixtures.auth_tokens import TokenScope
|
||||
from fixtures.common_types import TenantId, TenantShardId, TimelineId
|
||||
from fixtures.compute_reconfigure import ComputeReconfigure
|
||||
from fixtures.log_helper import log
|
||||
@@ -18,7 +19,6 @@ from fixtures.neon_fixtures import (
|
||||
PgBin,
|
||||
StorageControllerApiException,
|
||||
StorageControllerLeadershipStatus,
|
||||
TokenScope,
|
||||
last_flush_lsn_upload,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
@@ -69,7 +69,7 @@ def test_storage_controller_smoke(
|
||||
env = neon_env_builder.init_configs()
|
||||
|
||||
# Start services by hand so that we can skip a pageserver (this will start + register later)
|
||||
env.broker.try_start()
|
||||
env.broker.start()
|
||||
env.storage_controller.start()
|
||||
env.pageservers[0].start()
|
||||
env.pageservers[1].start()
|
||||
@@ -292,7 +292,7 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
|
||||
|
||||
# Start services by hand so that we can skip registration on one of the pageservers
|
||||
env = neon_env_builder.init_configs()
|
||||
env.broker.try_start()
|
||||
env.broker.start()
|
||||
env.storage_controller.start()
|
||||
|
||||
# This is the pageserver where we'll initially create the tenant. Run it in emergency
|
||||
@@ -2048,8 +2048,11 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
|
||||
# Make a change to the tenant config to trigger a slow reconcile
|
||||
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
|
||||
virtual_ps_http.patch_tenant_config_client_side(tid, {"compaction_threshold": 5}, None)
|
||||
env.storage_controller.allowed_errors.append(
|
||||
".*Accepted configuration update but reconciliation failed.*"
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Accepted configuration update but reconciliation failed.*",
|
||||
".*Leader is stepped down instance",
|
||||
]
|
||||
)
|
||||
|
||||
observed_state = env.storage_controller.step_down()
|
||||
@@ -2072,9 +2075,9 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
|
||||
assert "compaction_threshold" in ps_tenant_conf.effective_config
|
||||
assert ps_tenant_conf.effective_config["compaction_threshold"] == 5
|
||||
|
||||
# Validate that the storcon is not replying to the usual requests
|
||||
# once it has stepped down.
|
||||
with pytest.raises(StorageControllerApiException, match="stepped_down"):
|
||||
# Validate that the storcon attempts to forward the request, but stops.
|
||||
# when it realises it is still the current leader.
|
||||
with pytest.raises(StorageControllerApiException, match="Leader is stepped down instance"):
|
||||
env.storage_controller.tenant_list()
|
||||
|
||||
# Validate that we can step down multiple times and the observed state
|
||||
@@ -2123,7 +2126,7 @@ def start_env(env: NeonEnv, storage_controller_port: int):
|
||||
max_workers=2 + len(env.pageservers) + len(env.safekeepers)
|
||||
) as executor:
|
||||
futs.append(
|
||||
executor.submit(lambda: env.broker.try_start() or None)
|
||||
executor.submit(lambda: env.broker.start() or None)
|
||||
) # The `or None` is for the linter
|
||||
|
||||
for pageserver in env.pageservers:
|
||||
@@ -2221,6 +2224,15 @@ def test_storage_controller_leadership_transfer(
|
||||
env.storage_controller.wait_until_ready()
|
||||
env.storage_controller.consistency_check()
|
||||
|
||||
if not step_down_times_out:
|
||||
# Check that the stepped down instance forwards requests
|
||||
# to the new leader while it's still running.
|
||||
storage_controller_proxy.route_to(f"http://127.0.0.1:{storage_controller_1_port}")
|
||||
env.storage_controller.tenant_list()
|
||||
env.storage_controller.node_configure(env.pageservers[0].id, {"scheduling": "Pause"})
|
||||
status = env.storage_controller.node_status(env.pageservers[0].id)
|
||||
assert status["scheduling"] == "Pause"
|
||||
|
||||
if step_down_times_out:
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
|
||||
@@ -106,7 +106,7 @@ def test_threshold_based_eviction(
|
||||
|
||||
# create a bunch of layers
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as pg:
|
||||
pg_bin.run(["pgbench", "-i", "-s", "3", pg.connstr()])
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", "-s", "3", pg.connstr()])
|
||||
last_flush_lsn_upload(env, pg, tenant_id, timeline_id)
|
||||
# wrap up and shutdown safekeepers so that no more layers will be created after the final checkpoint
|
||||
for sk in env.safekeepers:
|
||||
|
||||
@@ -118,6 +118,9 @@ def test_ancestor_detach_branched_from(
|
||||
truncated_layers = 0
|
||||
elif branchpoint == Branchpoint.AFTER_L0:
|
||||
branch_at = Lsn(last_lsn + 8)
|
||||
# make sure the branch point is not on a page header
|
||||
if 0 < (branch_at.lsn_int % 8192) < 40:
|
||||
branch_at += 40
|
||||
rows = 8192
|
||||
# as there is no 8 byte walrecord, nothing should get copied from the straddling layer
|
||||
truncated_layers = 0
|
||||
|
||||
@@ -1,19 +1,32 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fixtures.common_types import TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
PgBin,
|
||||
fork_at_current_lsn,
|
||||
import_timeline_from_vanilla_postgres,
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
# Test branching, when a transaction is in prepared state
|
||||
#
|
||||
def test_twophase(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=5"])
|
||||
def twophase_test_on_timeline(env: NeonEnv):
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_twophase", config_lines=["max_prepared_transactions=5"]
|
||||
)
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
# FIXME: Switch to the next WAL segment, to work around the bug fixed in
|
||||
# https://github.com/neondatabase/neon/pull/8914. When that is merged, this can be
|
||||
# removed.
|
||||
cur.execute("select pg_switch_wal()")
|
||||
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
|
||||
# Prepare a transaction that will insert a row
|
||||
@@ -53,7 +66,7 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "main")
|
||||
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
|
||||
|
||||
# Start compute on the new branch
|
||||
endpoint2 = env.endpoints.create_start(
|
||||
@@ -80,3 +93,50 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
# Only one committed insert is visible on the original branch
|
||||
cur.execute("SELECT * FROM foo")
|
||||
assert cur.fetchall() == [("three",)]
|
||||
|
||||
|
||||
def test_twophase(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test branching, when a transaction is in prepared state
|
||||
"""
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_twophase")
|
||||
|
||||
twophase_test_on_timeline(env)
|
||||
|
||||
|
||||
def test_twophase_nonzero_epoch(
|
||||
neon_simple_env: NeonEnv,
|
||||
test_output_dir: Path,
|
||||
pg_bin: PgBin,
|
||||
vanilla_pg,
|
||||
):
|
||||
"""
|
||||
Same as 'test_twophase' test, but with a non-zero XID epoch, i.e. after 4 billion XIDs
|
||||
have been consumed. (This is to ensure that we correctly use the full 64-bit XIDs in
|
||||
pg_twophase filenames with PostgreSQL v17.)
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
# Reset the vanilla Postgres instance with a higher XID epoch
|
||||
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
|
||||
cmd = [pg_resetwal_path, "--epoch=1000000000", "-D", str(vanilla_pg.pgdatadir)]
|
||||
pg_bin.run_capture(cmd)
|
||||
|
||||
timeline_id = TimelineId.generate()
|
||||
|
||||
# Import the cluster to Neon
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
|
||||
import_timeline_from_vanilla_postgres(
|
||||
test_output_dir,
|
||||
env,
|
||||
pg_bin,
|
||||
env.initial_tenant,
|
||||
timeline_id,
|
||||
"test_twophase",
|
||||
vanilla_pg.connstr(),
|
||||
)
|
||||
vanilla_pg.stop() # don't need the original server anymore
|
||||
|
||||
twophase_test_on_timeline(env)
|
||||
|
||||
@@ -19,7 +19,6 @@ import psycopg2.errors
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
import requests
|
||||
from fixtures.broker import NeonBroker
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
@@ -1439,11 +1438,7 @@ class SafekeeperEnv:
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.port_distributor = port_distributor
|
||||
self.broker = NeonBroker(
|
||||
logfile=Path(self.repo_dir) / "storage_broker.log",
|
||||
port=self.port_distributor.get_port(),
|
||||
neon_binpath=neon_binpath,
|
||||
)
|
||||
self.fake_broker_endpoint = f"http://127.0.0.1:{port_distributor.get_port()}"
|
||||
self.pg_bin = pg_bin
|
||||
self.num_safekeepers = num_safekeepers
|
||||
self.bin_safekeeper = str(neon_binpath / "safekeeper")
|
||||
@@ -1492,7 +1487,7 @@ class SafekeeperEnv:
|
||||
"--id",
|
||||
str(i),
|
||||
"--broker-endpoint",
|
||||
self.broker.client_url(),
|
||||
self.fake_broker_endpoint,
|
||||
]
|
||||
log.info(f'Running command "{" ".join(cmd)}"')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user