Merge branch 'main' into yuchen/virtual-file-config

This commit is contained in:
Yuchen Liang
2024-10-07 11:49:13 -04:00
committed by GitHub
220 changed files with 3905 additions and 2059 deletions

View File

@@ -340,23 +340,27 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare:
@pytest.fixture(scope="function", autouse=True)
def sync_after_each_test():
# The fixture calls `sync(2)` after each test if `SYNC_AFTER_EACH_TEST` env var is `true`
def sync_between_tests():
# The fixture calls `sync(2)` after each test if `SYNC_BETWEEN_TESTS` env var is `true`
#
# In CI, `SYNC_AFTER_EACH_TEST` is set to `true` only for benchmarks (`test_runner/performance`)
# In CI, `SYNC_BETWEEN_TESTS` is set to `true` only for benchmarks (`test_runner/performance`)
# that are run on self-hosted runners because some of these tests are pretty write-heavy
# and create issues to start the processes within 10s
key = "SYNC_AFTER_EACH_TEST"
key = "SYNC_BETWEEN_TESTS"
enabled = os.environ.get(key) == "true"
if enabled:
start = time.time()
# we only run benches on unices, the method might not exist on windows
os.sync()
elapsed = time.time() - start
log.info(f"called sync before test {elapsed=}")
yield
if not enabled:
# regress test, or running locally
return
start = time.time()
# we only run benches on unices, the method might not exist on windows
os.sync()
elapsed = time.time() - start
log.info(f"called sync after test {elapsed=}")
if enabled:
start = time.time()
# we only run benches on unices, the method might not exist on windows
os.sync()
elapsed = time.time() - start
log.info(f"called sync after test {elapsed=}")

View File

@@ -0,0 +1,662 @@
from __future__ import annotations
import abc
import json
import os
import re
import subprocess
import tempfile
import textwrap
from itertools import chain, product
from pathlib import Path
from typing import (
Any,
Dict,
List,
Optional,
Tuple,
TypeVar,
cast,
)
import toml
from fixtures.common_types import Lsn, TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.pageserver.common_types import IndexPartDump
from fixtures.pg_version import PgVersion
from fixtures.utils import AuxFileStore
T = TypeVar("T")
class AbstractNeonCli(abc.ABC):
"""
A typed wrapper around an arbitrary Neon CLI tool.
Supports a way to run arbitrary command directly via CLI.
Do not use directly, use specific subclasses instead.
"""
def __init__(self, extra_env: Optional[Dict[str, str]], binpath: Path):
self.extra_env = extra_env
self.binpath = binpath
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
def raw_cli(
self,
arguments: List[str],
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True,
timeout=None,
) -> "subprocess.CompletedProcess[str]":
"""
Run the command with the specified arguments.
Arguments must be in list form, e.g. ['endpoint', 'create']
Return both stdout and stderr, which can be accessed as
>>> result = env.neon_cli.raw_cli(...)
>>> assert result.stderr == ""
>>> log.info(result.stdout)
If `check_return_code`, on non-zero exit code logs failure and raises.
"""
assert isinstance(arguments, list)
assert isinstance(self.COMMAND, str)
command_path = str(self.binpath / self.COMMAND)
args = [command_path] + arguments
log.info('Running command "{}"'.format(" ".join(args)))
env_vars = os.environ.copy()
# extra env
for extra_env_key, extra_env_value in (self.extra_env or {}).items():
env_vars[extra_env_key] = extra_env_value
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
env_vars[extra_env_key] = extra_env_value
# Pass through coverage settings
var = "LLVM_PROFILE_FILE"
val = os.environ.get(var)
if val:
env_vars[var] = val
# Intercept CalledProcessError and print more info
try:
res = subprocess.run(
args,
env=env_vars,
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
)
except subprocess.TimeoutExpired as e:
if e.stderr:
stderr = e.stderr.decode(errors="replace")
else:
stderr = ""
if e.stdout:
stdout = e.stdout.decode(errors="replace")
else:
stdout = ""
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
raise
indent = " "
if not res.returncode:
stripped = res.stdout.strip()
lines = stripped.splitlines()
if len(lines) < 2:
log.debug(f"Run {res.args} success: {stripped}")
else:
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
elif check_return_code:
# this way command output will be in recorded and shown in CI in failure message
indent = indent * 2
msg = textwrap.dedent(
"""\
Run %s failed:
stdout:
%s
stderr:
%s
"""
)
msg = msg % (
res.args,
textwrap.indent(res.stdout.strip(), indent),
textwrap.indent(res.stderr.strip(), indent),
)
log.info(msg)
raise RuntimeError(msg) from subprocess.CalledProcessError(
res.returncode, res.args, res.stdout, res.stderr
)
return res
class NeonLocalCli(AbstractNeonCli):
"""A typed wrapper around the `neon_local` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
Note: The methods in this class are supposed to be faithful wrappers of the underlying
'neon_local' commands. If you're tempted to add any logic here, please consider putting it
in the caller instead!
There are a few exceptions where these wrapper methods intentionally differ from the
underlying commands, however:
- Many 'neon_local' commands take an optional 'tenant_id' argument and use the default from
the config file if it's omitted. The corresponding wrappers require an explicit 'tenant_id'
argument. The idea is that we don't want to rely on the config file's default in tests,
because NeonEnv has its own 'initial_tenant'. They are currently always the same, but we
want to rely on the Neonenv's default instead of the config file default in tests.
- Similarly, --pg_version argument is always required in the wrappers, even when it's
optional in the 'neon_local' command. The default in 'neon_local' is a specific
hardcoded version, but in tests, we never want to accidentally rely on that;, we
always want to use the version from the test fixtures.
- Wrappers for commands that create a new tenant or timeline ID require the new tenant
or timeline ID to be passed by the caller, while the 'neon_local' commands will
generate a random ID if it's not specified. This is because we don't want to have to
parse the ID from the 'neon_local' output. Making it required ensures that the
caller has to generate it.
"""
COMMAND = "neon_local"
def __init__(
self,
extra_env: Optional[Dict[str, str]],
binpath: Path,
repo_dir: Path,
pg_distrib_dir: Path,
):
if extra_env is None:
env_vars = {}
else:
env_vars = extra_env.copy()
env_vars["NEON_REPO_DIR"] = str(repo_dir)
env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir)
super().__init__(env_vars, binpath)
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
return super().raw_cli(*args, **kwargs)
def tenant_create(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
pg_version: PgVersion,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
):
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
args = [
"tenant",
"create",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
if aux_file_policy is AuxFileStore.V2:
args.extend(["-c", "switch_aux_file_policy:v2"])
elif aux_file_policy is AuxFileStore.V1:
args.extend(["-c", "switch_aux_file_policy:v1"])
elif aux_file_policy is AuxFileStore.CrossValidation:
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
if set_default:
args.append("--set-default")
if shard_count is not None:
args.extend(["--shard-count", str(shard_count)])
if shard_stripe_size is not None:
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
if placement_policy is not None:
args.extend(["--placement-policy", str(placement_policy)])
res = self.raw_cli(args)
res.check_returncode()
def tenant_import(self, tenant_id: TenantId):
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
res = self.raw_cli(args)
res.check_returncode()
def tenant_set_default(self, tenant_id: TenantId):
"""
Update default tenant for future operations that require tenant_id.
"""
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
res.check_returncode()
def tenant_config(self, tenant_id: TenantId, conf: Dict[str, str]):
"""
Update tenant config.
"""
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
res = self.raw_cli(args)
res.check_returncode()
def tenant_list(self) -> "subprocess.CompletedProcess[str]":
res = self.raw_cli(["tenant", "list"])
res.check_returncode()
return res
def timeline_create(
self,
new_branch_name: str,
tenant_id: TenantId,
timeline_id: TimelineId,
pg_version: PgVersion,
) -> TimelineId:
if timeline_id is None:
timeline_id = TimelineId.generate()
cmd = [
"timeline",
"create",
"--branch-name",
new_branch_name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
]
res = self.raw_cli(cmd)
res.check_returncode()
return timeline_id
def timeline_branch(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
new_branch_name,
ancestor_branch_name: Optional[str] = None,
ancestor_start_lsn: Optional[Lsn] = None,
):
cmd = [
"timeline",
"branch",
"--branch-name",
new_branch_name,
"--timeline-id",
str(timeline_id),
"--tenant-id",
str(tenant_id),
]
if ancestor_branch_name is not None:
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
if ancestor_start_lsn is not None:
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
res = self.raw_cli(cmd)
res.check_returncode()
def timeline_import(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
new_branch_name: str,
base_lsn: Lsn,
base_tarfile: Path,
pg_version: PgVersion,
end_lsn: Optional[Lsn] = None,
wal_tarfile: Optional[Path] = None,
):
cmd = [
"timeline",
"import",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
pg_version,
"--branch-name",
new_branch_name,
"--base-lsn",
str(base_lsn),
"--base-tarfile",
str(base_tarfile),
]
if end_lsn is not None:
cmd.extend(["--end-lsn", str(end_lsn)])
if wal_tarfile is not None:
cmd.extend(["--wal-tarfile", str(wal_tarfile)])
res = self.raw_cli(cmd)
res.check_returncode()
def timeline_list(self, tenant_id: TenantId) -> List[Tuple[str, TimelineId]]:
"""
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
"""
# main [b49f7954224a0ad25cc0013ea107b54b]
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
res = self.raw_cli(["timeline", "list", "--tenant-id", str(tenant_id)])
timelines_cli = sorted(
map(
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
)
)
return timelines_cli
def init(
self,
init_config: Dict[str, Any],
force: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
init_config_tmpfile.write(toml.dumps(init_config))
init_config_tmpfile.flush()
cmd = [
"init",
f"--config={init_config_tmpfile.name}",
]
if force is not None:
cmd.extend(["--force", force])
res = self.raw_cli(cmd)
res.check_returncode()
return res
def storage_controller_start(
self,
timeout_in_seconds: Optional[int] = None,
instance_id: Optional[int] = None,
base_port: Optional[int] = None,
):
cmd = ["storage_controller", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
if base_port is not None:
cmd.append(f"--base-port={base_port}")
return self.raw_cli(cmd)
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
cmd = ["storage_controller", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
return self.raw_cli(cmd)
def pageserver_start(
self,
id: int,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}"]
if timeout_in_seconds is not None:
start_args.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop", f"--id={id}"]
if immediate:
cmd.extend(["-m", "immediate"])
log.info(f"Stopping pageserver with {cmd}")
return self.raw_cli(cmd)
def safekeeper_start(
self,
id: int,
extra_opts: Optional[List[str]] = None,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
if extra_opts is not None:
extra_opts = [f"-e={opt}" for opt in extra_opts]
else:
extra_opts = []
if timeout_in_seconds is not None:
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=extra_env_vars
)
def safekeeper_stop(
self, id: Optional[int] = None, immediate=False
) -> "subprocess.CompletedProcess[str]":
args = ["safekeeper", "stop"]
if id is not None:
args.append(str(id))
if immediate:
args.extend(["-m", "immediate"])
return self.raw_cli(args)
def storage_broker_start(
self, timeout_in_seconds: Optional[int] = None
) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(cmd)
def storage_broker_stop(self) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "stop"]
return self.raw_cli(cmd)
def endpoint_create(
self,
branch_name: str,
pg_port: int,
http_port: int,
tenant_id: TenantId,
pg_version: PgVersion,
endpoint_id: Optional[str] = None,
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"create",
"--tenant-id",
str(tenant_id),
"--branch-name",
branch_name,
"--pg-version",
pg_version,
]
if lsn is not None:
args.extend(["--lsn", str(lsn)])
if pg_port is not None:
args.extend(["--pg-port", str(pg_port)])
if http_port is not None:
args.extend(["--http-port", str(http_port)])
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:
args.extend(["--hot-standby", "true"])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args)
res.check_returncode()
return res
def endpoint_start(
self,
endpoint_id: str,
safekeepers: Optional[List[int]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
basebackup_request_tries: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"start",
]
extra_env_vars = {}
if basebackup_request_tries is not None:
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
if remote_ext_config is not None:
args.extend(["--remote-ext-config", remote_ext_config])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
if endpoint_id is not None:
args.append(endpoint_id)
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()
return res
def endpoint_reconfigure(
self,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
pageserver_id: Optional[int] = None,
safekeepers: Optional[List[int]] = None,
check_return_code=True,
) -> "subprocess.CompletedProcess[str]":
args = ["endpoint", "reconfigure", endpoint_id]
if tenant_id is not None:
args.extend(["--tenant-id", str(tenant_id)])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
return self.raw_cli(args, check_return_code=check_return_code)
def endpoint_stop(
self,
endpoint_id: str,
destroy=False,
check_return_code=True,
mode: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"stop",
]
if destroy:
args.append("--destroy")
if mode is not None:
args.append(f"--mode={mode}")
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
def mappings_map_branch(
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
) -> "subprocess.CompletedProcess[str]":
"""
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
Usually needed when creating branches via PageserverHttpClient and not neon_local.
After creating a name mapping, you can use EndpointFactory.create_start
with this registered branch name.
"""
args = [
"mappings",
"map",
"--branch-name",
name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
]
return self.raw_cli(args, check_return_code=True)
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["start"], check_return_code=check_return_code)
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["stop"], check_return_code=check_return_code)
class WalCraft(AbstractNeonCli):
"""
A typed wrapper around the `wal_craft` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "wal_craft"
def postgres_config(self) -> List[str]:
res = self.raw_cli(["print-postgres-config"])
res.check_returncode()
return res.stdout.split("\n")
def in_existing(self, type: str, connection: str) -> None:
res = self.raw_cli(["in-existing", type, connection])
res.check_returncode()
class Pagectl(AbstractNeonCli):
"""
A typed wrapper around the `pagectl` utility CLI tool.
"""
COMMAND = "pagectl"
def dump_index_part(self, path: Path) -> IndexPartDump:
res = self.raw_cli(["index-part", "dump", str(path)])
res.check_returncode()
parsed = json.loads(res.stdout)
return IndexPartDump.from_json(parsed)

View File

@@ -9,8 +9,6 @@ import os
import re
import shutil
import subprocess
import tempfile
import textwrap
import threading
import time
import uuid
@@ -20,8 +18,7 @@ from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from fcntl import LOCK_EX, LOCK_UN, flock
from functools import cached_property, partial
from itertools import chain, product
from functools import cached_property
from pathlib import Path
from types import TracebackType
from typing import (
@@ -64,11 +61,12 @@ from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, Timeline
from fixtures.endpoint.http import EndpointHttpClient
from fixtures.log_helper import log
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
from fixtures.neon_cli import NeonLocalCli, Pagectl
from fixtures.pageserver.allowed_errors import (
DEFAULT_PAGESERVER_ALLOWED_ERRORS,
DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS,
)
from fixtures.pageserver.common_types import IndexPartDump, LayerName, parse_layer_file_name
from fixtures.pageserver.common_types import LayerName, parse_layer_file_name
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import (
wait_for_last_record_lsn,
@@ -86,7 +84,7 @@ from fixtures.remote_storage import (
remote_storage_to_toml_dict,
)
from fixtures.safekeeper.http import SafekeeperHttpClient
from fixtures.safekeeper.utils import are_walreceivers_absent
from fixtures.safekeeper.utils import wait_walreceivers_absent
from fixtures.utils import (
ATTACHMENT_NAME_REGEX,
allure_add_grafana_links,
@@ -493,7 +491,7 @@ class NeonEnvBuilder:
log.debug(
f"Services started, creating initial tenant {env.initial_tenant} and its initial timeline"
)
initial_tenant, initial_timeline = env.neon_cli.create_tenant(
initial_tenant, initial_timeline = env.create_tenant(
tenant_id=env.initial_tenant,
conf=initial_tenant_conf,
timeline_id=env.initial_timeline,
@@ -954,10 +952,16 @@ class NeonEnv:
initial_tenant - tenant ID of the initial tenant created in the repository
neon_cli - can be used to run the 'neon' CLI tool
neon_cli - can be used to run the 'neon_local' CLI tool
create_tenant() - initializes a new tenant in the page server, returns
the tenant id
create_tenant() - initializes a new tenant and an initial empty timeline on it,
returns the tenant and timeline id
create_branch() - branch a new timeline from an existing one, returns
the new timeline id
create_timeline() - initializes a new timeline by running initdb, returns
the new timeline id
"""
BASE_PAGESERVER_ID = 1
@@ -968,8 +972,6 @@ class NeonEnv:
self.rust_log_override = config.rust_log_override
self.port_distributor = config.port_distributor
self.s3_mock_server = config.mock_s3_server
self.neon_cli = NeonCli(env=self)
self.pagectl = Pagectl(env=self)
self.endpoints = EndpointFactory(self)
self.safekeepers: List[Safekeeper] = []
self.pageservers: List[NeonPageserver] = []
@@ -989,6 +991,21 @@ class NeonEnv:
self.initial_tenant = config.initial_tenant
self.initial_timeline = config.initial_timeline
neon_local_env_vars = {}
if self.rust_log_override is not None:
neon_local_env_vars["RUST_LOG"] = self.rust_log_override
self.neon_cli = NeonLocalCli(
extra_env=neon_local_env_vars,
binpath=self.neon_local_binpath,
repo_dir=self.repo_dir,
pg_distrib_dir=self.pg_distrib_dir,
)
pagectl_env_vars = {}
if self.rust_log_override is not None:
pagectl_env_vars["RUST_LOG"] = self.rust_log_override
self.pagectl = Pagectl(extra_env=pagectl_env_vars, binpath=self.neon_binpath)
# The URL for the pageserver to use as its control_plane_api config
if config.storage_controller_port_override is not None:
log.info(
@@ -1316,6 +1333,74 @@ class NeonEnv:
self.endpoint_counter += 1
return "ep-" + str(self.endpoint_counter)
def create_tenant(
self,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
) -> Tuple[TenantId, TimelineId]:
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
tenant_id = tenant_id or TenantId.generate()
timeline_id = timeline_id or TimelineId.generate()
self.neon_cli.tenant_create(
tenant_id=tenant_id,
timeline_id=timeline_id,
pg_version=self.pg_version,
conf=conf,
shard_count=shard_count,
shard_stripe_size=shard_stripe_size,
placement_policy=placement_policy,
set_default=set_default,
aux_file_policy=aux_file_policy,
)
return tenant_id, timeline_id
def config_tenant(self, tenant_id: Optional[TenantId], conf: Dict[str, str]):
"""
Update tenant config.
"""
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.tenant_config(tenant_id, conf)
def create_branch(
self,
new_branch_name: str = DEFAULT_BRANCH_NAME,
tenant_id: Optional[TenantId] = None,
ancestor_branch_name: Optional[str] = None,
ancestor_start_lsn: Optional[Lsn] = None,
new_timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
new_timeline_id = new_timeline_id or TimelineId.generate()
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.timeline_branch(
tenant_id, new_timeline_id, new_branch_name, ancestor_branch_name, ancestor_start_lsn
)
return new_timeline_id
def create_timeline(
self,
new_branch_name: str,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
timeline_id = timeline_id or TimelineId.generate()
tenant_id = tenant_id or self.initial_tenant
self.neon_cli.timeline_create(new_branch_name, tenant_id, timeline_id, self.pg_version)
return timeline_id
@pytest.fixture(scope="function")
def neon_simple_env(
@@ -1435,597 +1520,6 @@ class PageserverPort:
http: int
class AbstractNeonCli(abc.ABC):
"""
A typed wrapper around an arbitrary Neon CLI tool.
Supports a way to run arbitrary command directly via CLI.
Do not use directly, use specific subclasses instead.
"""
def __init__(self, env: NeonEnv):
self.env = env
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
def raw_cli(
self,
arguments: List[str],
extra_env_vars: Optional[Dict[str, str]] = None,
check_return_code=True,
timeout=None,
local_binpath=False,
) -> "subprocess.CompletedProcess[str]":
"""
Run the command with the specified arguments.
Arguments must be in list form, e.g. ['pg', 'create']
Return both stdout and stderr, which can be accessed as
>>> result = env.neon_cli.raw_cli(...)
>>> assert result.stderr == ""
>>> log.info(result.stdout)
If `check_return_code`, on non-zero exit code logs failure and raises.
If `local_binpath` is true, then we are invoking a test utility
"""
assert isinstance(arguments, list)
assert isinstance(self.COMMAND, str)
if local_binpath:
# Test utility
bin_neon = str(self.env.neon_local_binpath / self.COMMAND)
else:
# Normal binary
bin_neon = str(self.env.neon_binpath / self.COMMAND)
args = [bin_neon] + arguments
log.info('Running command "{}"'.format(" ".join(args)))
env_vars = os.environ.copy()
env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir)
env_vars["POSTGRES_DISTRIB_DIR"] = str(self.env.pg_distrib_dir)
if self.env.rust_log_override is not None:
env_vars["RUST_LOG"] = self.env.rust_log_override
for extra_env_key, extra_env_value in (extra_env_vars or {}).items():
env_vars[extra_env_key] = extra_env_value
# Pass coverage settings
var = "LLVM_PROFILE_FILE"
val = os.environ.get(var)
if val:
env_vars[var] = val
# Intercept CalledProcessError and print more info
try:
res = subprocess.run(
args,
env=env_vars,
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=timeout,
)
except subprocess.TimeoutExpired as e:
if e.stderr:
stderr = e.stderr.decode(errors="replace")
else:
stderr = ""
if e.stdout:
stdout = e.stdout.decode(errors="replace")
else:
stdout = ""
log.warn(f"CLI timeout: stderr={stderr}, stdout={stdout}")
raise
indent = " "
if not res.returncode:
stripped = res.stdout.strip()
lines = stripped.splitlines()
if len(lines) < 2:
log.debug(f"Run {res.args} success: {stripped}")
else:
log.debug("Run %s success:\n%s" % (res.args, textwrap.indent(stripped, indent)))
elif check_return_code:
# this way command output will be in recorded and shown in CI in failure message
indent = indent * 2
msg = textwrap.dedent(
"""\
Run %s failed:
stdout:
%s
stderr:
%s
"""
)
msg = msg % (
res.args,
textwrap.indent(res.stdout.strip(), indent),
textwrap.indent(res.stderr.strip(), indent),
)
log.info(msg)
raise RuntimeError(msg) from subprocess.CalledProcessError(
res.returncode, res.args, res.stdout, res.stderr
)
return res
class NeonCli(AbstractNeonCli):
"""
A typed wrapper around the `neon` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "neon_local"
def raw_cli(self, *args, **kwargs) -> subprocess.CompletedProcess[str]:
kwargs["local_binpath"] = True
return super().raw_cli(*args, **kwargs)
def create_tenant(
self,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
conf: Optional[Dict[str, Any]] = None,
shard_count: Optional[int] = None,
shard_stripe_size: Optional[int] = None,
placement_policy: Optional[str] = None,
set_default: bool = False,
aux_file_policy: Optional[AuxFileStore] = None,
) -> Tuple[TenantId, TimelineId]:
"""
Creates a new tenant, returns its id and its initial timeline's id.
"""
tenant_id = tenant_id or TenantId.generate()
timeline_id = timeline_id or TimelineId.generate()
args = [
"tenant",
"create",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--pg-version",
self.env.pg_version,
]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
if aux_file_policy is AuxFileStore.V2:
args.extend(["-c", "switch_aux_file_policy:v2"])
elif aux_file_policy is AuxFileStore.V1:
args.extend(["-c", "switch_aux_file_policy:v1"])
elif aux_file_policy is AuxFileStore.CrossValidation:
args.extend(["-c", "switch_aux_file_policy:cross-validation"])
if set_default:
args.append("--set-default")
if shard_count is not None:
args.extend(["--shard-count", str(shard_count)])
if shard_stripe_size is not None:
args.extend(["--shard-stripe-size", str(shard_stripe_size)])
if placement_policy is not None:
args.extend(["--placement-policy", str(placement_policy)])
res = self.raw_cli(args)
res.check_returncode()
return tenant_id, timeline_id
def import_tenant(self, tenant_id: TenantId):
args = ["tenant", "import", "--tenant-id", str(tenant_id)]
res = self.raw_cli(args)
res.check_returncode()
def set_default(self, tenant_id: TenantId):
"""
Update default tenant for future operations that require tenant_id.
"""
res = self.raw_cli(["tenant", "set-default", "--tenant-id", str(tenant_id)])
res.check_returncode()
def config_tenant(self, tenant_id: TenantId, conf: Dict[str, str]):
"""
Update tenant config.
"""
args = ["tenant", "config", "--tenant-id", str(tenant_id)]
if conf is not None:
args.extend(
chain.from_iterable(
product(["-c"], (f"{key}:{value}" for key, value in conf.items()))
)
)
res = self.raw_cli(args)
res.check_returncode()
def list_tenants(self) -> "subprocess.CompletedProcess[str]":
res = self.raw_cli(["tenant", "list"])
res.check_returncode()
return res
def create_timeline(
self,
new_branch_name: str,
tenant_id: Optional[TenantId] = None,
timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if timeline_id is None:
timeline_id = TimelineId.generate()
cmd = [
"timeline",
"create",
"--branch-name",
new_branch_name,
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
"--timeline-id",
str(timeline_id),
"--pg-version",
self.env.pg_version,
]
res = self.raw_cli(cmd)
res.check_returncode()
return timeline_id
def create_branch(
self,
new_branch_name: str = DEFAULT_BRANCH_NAME,
ancestor_branch_name: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
ancestor_start_lsn: Optional[Lsn] = None,
new_timeline_id: Optional[TimelineId] = None,
) -> TimelineId:
if new_timeline_id is None:
new_timeline_id = TimelineId.generate()
cmd = [
"timeline",
"branch",
"--branch-name",
new_branch_name,
"--timeline-id",
str(new_timeline_id),
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
]
if ancestor_branch_name is not None:
cmd.extend(["--ancestor-branch-name", ancestor_branch_name])
if ancestor_start_lsn is not None:
cmd.extend(["--ancestor-start-lsn", str(ancestor_start_lsn)])
res = self.raw_cli(cmd)
res.check_returncode()
return TimelineId(str(new_timeline_id))
def list_timelines(self, tenant_id: Optional[TenantId] = None) -> List[Tuple[str, TimelineId]]:
"""
Returns a list of (branch_name, timeline_id) tuples out of parsed `neon timeline list` CLI output.
"""
# main [b49f7954224a0ad25cc0013ea107b54b]
# ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540]
TIMELINE_DATA_EXTRACTOR: re.Pattern = re.compile( # type: ignore[type-arg]
r"\s?(?P<branch_name>[^\s]+)\s\[(?P<timeline_id>[^\]]+)\]", re.MULTILINE
)
res = self.raw_cli(
["timeline", "list", "--tenant-id", str(tenant_id or self.env.initial_tenant)]
)
timelines_cli = sorted(
map(
lambda branch_and_id: (branch_and_id[0], TimelineId(branch_and_id[1])),
TIMELINE_DATA_EXTRACTOR.findall(res.stdout),
)
)
return timelines_cli
def init(
self,
init_config: Dict[str, Any],
force: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
with tempfile.NamedTemporaryFile(mode="w+") as init_config_tmpfile:
init_config_tmpfile.write(toml.dumps(init_config))
init_config_tmpfile.flush()
cmd = [
"init",
f"--config={init_config_tmpfile.name}",
]
if force is not None:
cmd.extend(["--force", force])
res = self.raw_cli(cmd)
res.check_returncode()
return res
def storage_controller_start(
self,
timeout_in_seconds: Optional[int] = None,
instance_id: Optional[int] = None,
base_port: Optional[int] = None,
):
cmd = ["storage_controller", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
if base_port is not None:
cmd.append(f"--base-port={base_port}")
return self.raw_cli(cmd)
def storage_controller_stop(self, immediate: bool, instance_id: Optional[int] = None):
cmd = ["storage_controller", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
if instance_id is not None:
cmd.append(f"--instance-id={instance_id}")
return self.raw_cli(cmd)
def pageserver_start(
self,
id: int,
extra_env_vars: Optional[Dict[str, str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
start_args = ["pageserver", "start", f"--id={id}"]
if timeout_in_seconds is not None:
start_args.append(f"--start-timeout={timeout_in_seconds}s")
storage = self.env.pageserver_remote_storage
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
def pageserver_stop(self, id: int, immediate=False) -> "subprocess.CompletedProcess[str]":
cmd = ["pageserver", "stop", f"--id={id}"]
if immediate:
cmd.extend(["-m", "immediate"])
log.info(f"Stopping pageserver with {cmd}")
return self.raw_cli(cmd)
def safekeeper_start(
self,
id: int,
extra_opts: Optional[List[str]] = None,
timeout_in_seconds: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
s3_env_vars = None
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
if extra_opts is not None:
extra_opts = [f"-e={opt}" for opt in extra_opts]
else:
extra_opts = []
if timeout_in_seconds is not None:
extra_opts.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(
["safekeeper", "start", str(id), *extra_opts], extra_env_vars=s3_env_vars
)
def safekeeper_stop(
self, id: Optional[int] = None, immediate=False
) -> "subprocess.CompletedProcess[str]":
args = ["safekeeper", "stop"]
if id is not None:
args.append(str(id))
if immediate:
args.extend(["-m", "immediate"])
return self.raw_cli(args)
def broker_start(
self, timeout_in_seconds: Optional[int] = None
) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "start"]
if timeout_in_seconds is not None:
cmd.append(f"--start-timeout={timeout_in_seconds}s")
return self.raw_cli(cmd)
def broker_stop(self) -> "subprocess.CompletedProcess[str]":
cmd = ["storage_broker", "stop"]
return self.raw_cli(cmd)
def endpoint_create(
self,
branch_name: str,
pg_port: int,
http_port: int,
endpoint_id: Optional[str] = None,
tenant_id: Optional[TenantId] = None,
hot_standby: bool = False,
lsn: Optional[Lsn] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"create",
"--tenant-id",
str(tenant_id or self.env.initial_tenant),
"--branch-name",
branch_name,
"--pg-version",
self.env.pg_version,
]
if lsn is not None:
args.extend(["--lsn", str(lsn)])
if pg_port is not None:
args.extend(["--pg-port", str(pg_port)])
if http_port is not None:
args.extend(["--http-port", str(http_port)])
if endpoint_id is not None:
args.append(endpoint_id)
if hot_standby:
args.extend(["--hot-standby", "true"])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args)
res.check_returncode()
return res
def endpoint_start(
self,
endpoint_id: str,
safekeepers: Optional[List[int]] = None,
remote_ext_config: Optional[str] = None,
pageserver_id: Optional[int] = None,
allow_multiple=False,
basebackup_request_tries: Optional[int] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"start",
]
extra_env_vars = {}
if basebackup_request_tries is not None:
extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries)
if remote_ext_config is not None:
args.extend(["--remote-ext-config", remote_ext_config])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
if endpoint_id is not None:
args.append(endpoint_id)
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if allow_multiple:
args.extend(["--allow-multiple"])
res = self.raw_cli(args, extra_env_vars)
res.check_returncode()
return res
def endpoint_reconfigure(
self,
endpoint_id: str,
tenant_id: Optional[TenantId] = None,
pageserver_id: Optional[int] = None,
safekeepers: Optional[List[int]] = None,
check_return_code=True,
) -> "subprocess.CompletedProcess[str]":
args = ["endpoint", "reconfigure", endpoint_id]
if tenant_id is not None:
args.extend(["--tenant-id", str(tenant_id)])
if pageserver_id is not None:
args.extend(["--pageserver-id", str(pageserver_id)])
if safekeepers is not None:
args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
return self.raw_cli(args, check_return_code=check_return_code)
def endpoint_stop(
self,
endpoint_id: str,
destroy=False,
check_return_code=True,
mode: Optional[str] = None,
) -> "subprocess.CompletedProcess[str]":
args = [
"endpoint",
"stop",
]
if destroy:
args.append("--destroy")
if mode is not None:
args.append(f"--mode={mode}")
if endpoint_id is not None:
args.append(endpoint_id)
return self.raw_cli(args, check_return_code=check_return_code)
def map_branch(
self, name: str, tenant_id: TenantId, timeline_id: TimelineId
) -> "subprocess.CompletedProcess[str]":
"""
Map tenant id and timeline id to a neon_local branch name. They do not have to exist.
Usually needed when creating branches via PageserverHttpClient and not neon_local.
After creating a name mapping, you can use EndpointFactory.create_start
with this registered branch name.
"""
args = [
"mappings",
"map",
"--branch-name",
name,
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
]
return self.raw_cli(args, check_return_code=True)
def start(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["start"], check_return_code=check_return_code)
def stop(self, check_return_code=True) -> "subprocess.CompletedProcess[str]":
return self.raw_cli(["stop"], check_return_code=check_return_code)
class WalCraft(AbstractNeonCli):
"""
A typed wrapper around the `wal_craft` CLI tool.
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
"""
COMMAND = "wal_craft"
def postgres_config(self) -> List[str]:
res = self.raw_cli(["print-postgres-config"])
res.check_returncode()
return res.stdout.split("\n")
def in_existing(self, type: str, connection: str) -> None:
res = self.raw_cli(["in-existing", type, connection])
res.check_returncode()
class ComputeCtl(AbstractNeonCli):
"""
A typed wrapper around the `compute_ctl` CLI tool.
"""
COMMAND = "compute_ctl"
class Pagectl(AbstractNeonCli):
"""
A typed wrapper around the `pagectl` utility CLI tool.
"""
COMMAND = "pagectl"
def dump_index_part(self, path: Path) -> IndexPartDump:
res = self.raw_cli(["index-part", "dump", str(path)])
res.check_returncode()
parsed = json.loads(res.stdout)
return IndexPartDump.from_json(parsed)
class LogUtils:
"""
A mixin class which provides utilities for inspecting the logs of a service.
@@ -2943,6 +2437,10 @@ class NeonPageserver(PgProtocol, LogUtils):
"""
assert self.running is False
storage = self.env.pageserver_remote_storage
if isinstance(storage, S3Storage):
s3_env_vars = storage.access_env_vars()
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
self.env.neon_cli.pageserver_start(
self.id, extra_env_vars=extra_env_vars, timeout_in_seconds=timeout_in_seconds
)
@@ -3963,6 +3461,7 @@ class Endpoint(PgProtocol, LogUtils):
hot_standby=hot_standby,
pg_port=self.pg_port,
http_port=self.http_port,
pg_version=self.env.pg_version,
pageserver_id=pageserver_id,
allow_multiple=allow_multiple,
)
@@ -4110,12 +3609,26 @@ class Endpoint(PgProtocol, LogUtils):
with open(remote_extensions_spec_path, "w") as file:
json.dump(spec, file, indent=4)
def stop(self, mode: str = "fast") -> "Endpoint":
def stop(
self,
mode: str = "fast",
sks_wait_walreceiver_gone: Optional[tuple[List[Safekeeper], TimelineId]] = None,
) -> "Endpoint":
"""
Stop the Postgres instance if it's running.
Because test teardown might try and stop an endpoint concurrently with test code
stopping the endpoint, this method is thread safe
Because test teardown might try and stop an endpoint concurrently with
test code stopping the endpoint, this method is thread safe
If sks_wait_walreceiever_gone is not None, wait for the safekeepers in
this list to have no walreceivers, i.e. compute endpoint connection be
gone. When endpoint is stopped in immediate mode and started again this
avoids race of old connection delivering some data after
sync-safekeepers check, which makes basebackup unusable. TimelineId is
needed because endpoint doesn't know it.
A better solution would be bump term when sync-safekeepers is skipped on
start, see #9079.
Returns self.
"""
@@ -4127,6 +3640,11 @@ class Endpoint(PgProtocol, LogUtils):
self.endpoint_id, check_return_code=self.check_stop_result, mode=mode
)
if sks_wait_walreceiver_gone is not None:
for sk in sks_wait_walreceiver_gone[0]:
cli = sk.http_client()
wait_walreceivers_absent(cli, self.tenant_id, sks_wait_walreceiver_gone[1])
return self
def stop_and_destroy(self, mode: str = "immediate") -> "Endpoint":
@@ -4386,8 +3904,16 @@ class Safekeeper(LogUtils):
extra_opts = self.extra_opts
assert self.running is False
s3_env_vars = None
if isinstance(self.env.safekeepers_remote_storage, S3Storage):
s3_env_vars = self.env.safekeepers_remote_storage.access_env_vars()
self.env.neon_cli.safekeeper_start(
self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds
self.id,
extra_opts=extra_opts,
timeout_in_seconds=timeout_in_seconds,
extra_env_vars=s3_env_vars,
)
self.running = True
# wait for wal acceptor start by checking its status
@@ -4533,7 +4059,7 @@ class Safekeeper(LogUtils):
1) wait for remote_consistent_lsn and wal_backup_lsn on safekeeper to reach it.
2) checkpoint timeline on safekeeper, which should remove WAL before this LSN; optionally wait for that.
"""
cli = self.http_client()
client = self.http_client()
target_segment_file = lsn.segment_name()
@@ -4545,7 +4071,7 @@ class Safekeeper(LogUtils):
assert all(target_segment_file <= s for s in segments)
def are_lsns_advanced():
stat = cli.timeline_status(tenant_id, timeline_id)
stat = client.timeline_status(tenant_id, timeline_id)
log.info(
f"waiting for remote_consistent_lsn and backup_lsn on sk {self.id} to reach {lsn}, currently remote_consistent_lsn={stat.remote_consistent_lsn}, backup_lsn={stat.backup_lsn}"
)
@@ -4554,7 +4080,7 @@ class Safekeeper(LogUtils):
# xxx: max wait is long because we might be waiting for reconnection from
# pageserver to this safekeeper
wait_until(30, 1, are_lsns_advanced)
cli.checkpoint(tenant_id, timeline_id)
client.checkpoint(tenant_id, timeline_id)
if wait_wal_removal:
wait_until(30, 1, are_segments_removed)
@@ -4582,13 +4108,13 @@ class NeonBroker(LogUtils):
timeout_in_seconds: Optional[int] = None,
):
assert not self.running
self.env.neon_cli.broker_start(timeout_in_seconds)
self.env.neon_cli.storage_broker_start(timeout_in_seconds)
self.running = True
return self
def stop(self):
if self.running:
self.env.neon_cli.broker_stop()
self.env.neon_cli.storage_broker_stop()
self.running = False
return self
@@ -5217,10 +4743,10 @@ def flush_ep_to_pageserver(
commit_lsn: Lsn = Lsn(0)
# In principle in the absense of failures polling single sk would be enough.
for sk in env.safekeepers:
cli = sk.http_client()
client = sk.http_client()
# wait until compute connections are gone
wait_until(30, 0.5, partial(are_walreceivers_absent, cli, tenant, timeline))
commit_lsn = max(cli.get_commit_lsn(tenant, timeline), commit_lsn)
wait_walreceivers_absent(client, tenant, timeline)
commit_lsn = max(client.get_commit_lsn(tenant, timeline), commit_lsn)
# Note: depending on WAL filtering implementation, probably most shards
# won't be able to reach commit_lsn (unless gaps are also ack'ed), so this
@@ -5273,7 +4799,12 @@ def fork_at_current_lsn(
the WAL up to that LSN to arrive in the pageserver before creating the branch.
"""
current_lsn = endpoint.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
return env.create_branch(
new_branch_name=new_branch_name,
tenant_id=tenant_id,
ancestor_branch_name=ancestor_branch_name,
ancestor_start_lsn=current_lsn,
)
def import_timeline_from_vanilla_postgres(
@@ -5292,9 +4823,9 @@ def import_timeline_from_vanilla_postgres(
"""
# Take backup of the existing PostgreSQL server with pg_basebackup
basebackup_dir = os.path.join(test_output_dir, "basebackup")
base_tar = os.path.join(basebackup_dir, "base.tar")
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
basebackup_dir = test_output_dir / "basebackup"
base_tar = basebackup_dir / "base.tar"
wal_tar = basebackup_dir / "pg_wal.tar"
os.mkdir(basebackup_dir)
pg_bin.run(
[
@@ -5304,40 +4835,28 @@ def import_timeline_from_vanilla_postgres(
"-d",
vanilla_pg_connstr,
"-D",
basebackup_dir,
str(basebackup_dir),
]
)
# Extract start_lsn and end_lsn form the backup manifest file
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
manifest = json.load(f)
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
start_lsn = Lsn(manifest["WAL-Ranges"][0]["Start-LSN"])
end_lsn = Lsn(manifest["WAL-Ranges"][0]["End-LSN"])
# Import the backup tarballs into the pageserver
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant_id),
"--timeline-id",
str(timeline_id),
"--branch-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base_tar,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal_tar,
"--pg-version",
env.pg_version,
]
env.neon_cli.timeline_import(
tenant_id=tenant_id,
timeline_id=timeline_id,
new_branch_name=branch_name,
base_lsn=start_lsn,
base_tarfile=base_tar,
end_lsn=end_lsn,
wal_tarfile=wal_tar,
pg_version=env.pg_version,
)
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, Lsn(end_lsn))
wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, end_lsn)
def last_flush_lsn_upload(

View File

@@ -586,6 +586,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
timeline_id: TimelineId,
force_repartition=False,
force_image_layer_creation=False,
force_l0_compaction=False,
wait_until_uploaded=False,
enhanced_gc_bottom_most_compaction=False,
):
@@ -595,6 +596,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
query["force_repartition"] = "true"
if force_image_layer_creation:
query["force_image_layer_creation"] = "true"
if force_l0_compaction:
query["force_l0_compaction"] = "true"
if wait_until_uploaded:
query["wait_until_uploaded"] = "true"
if enhanced_gc_bottom_most_compaction:
@@ -701,6 +704,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
timeline_id: TimelineId,
force_repartition=False,
force_image_layer_creation=False,
force_l0_compaction=False,
wait_until_uploaded=False,
compact: Optional[bool] = None,
**kwargs,
@@ -711,6 +715,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
query["force_repartition"] = "true"
if force_image_layer_creation:
query["force_image_layer_creation"] = "true"
if force_l0_compaction:
query["force_l0_compaction"] = "true"
if wait_until_uploaded:
query["wait_until_uploaded"] = "true"

View File

@@ -7,7 +7,7 @@ from pathlib import Path
from typing import Any, List, Tuple
from fixtures.common_types import TenantId, TimelineId
from fixtures.neon_fixtures import NeonEnv, Pagectl
from fixtures.neon_fixtures import NeonEnv
from fixtures.pageserver.common_types import (
InvalidFileName,
parse_layer_file_name,
@@ -35,7 +35,7 @@ def duplicate_one_tenant(env: NeonEnv, template_tenant: TenantId, new_tenant: Te
for file in tl.iterdir():
shutil.copy2(file, dst_tl_dir)
if "__" in file.name:
Pagectl(env).raw_cli(
env.pagectl.raw_cli(
[
"layer",
"rewrite-summary",

View File

@@ -1,11 +1,20 @@
from fixtures.common_types import TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.safekeeper.http import SafekeeperHttpClient
from fixtures.utils import wait_until
def are_walreceivers_absent(
def wait_walreceivers_absent(
sk_http_cli: SafekeeperHttpClient, tenant_id: TenantId, timeline_id: TimelineId
):
status = sk_http_cli.timeline_status(tenant_id, timeline_id)
log.info(f"waiting for walreceivers to be gone, currently {status.walreceivers}")
return len(status.walreceivers) == 0
"""
Wait until there is no walreceiver connections from the compute(s) on the
safekeeper.
"""
def walreceivers_absent():
status = sk_http_cli.timeline_status(tenant_id, timeline_id)
log.info(f"waiting for walreceivers to be gone, currently {status.walreceivers}")
assert len(status.walreceivers) == 0
wait_until(30, 0.5, walreceivers_absent)

View File

@@ -175,7 +175,9 @@ class Workload:
if upload:
# Wait for written data to be uploaded to S3 (force a checkpoint to trigger upload)
ps_http.timeline_checkpoint(
tenant_shard_id, self.timeline_id, wait_until_uploaded=True
tenant_shard_id,
self.timeline_id,
wait_until_uploaded=True,
)
log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
else:

View File

@@ -53,7 +53,7 @@ def setup_env(
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
template_tenant, template_timeline = env.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
ep = env.endpoints.create_start("main", tenant_id=template_tenant)

View File

@@ -81,7 +81,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
template_tenant, template_timeline = env.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -162,7 +162,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
"checkpoint_distance": 268435456,
"image_creation_threshold": 3,
}
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
template_tenant, template_timeline = env.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.tenant_attach(template_tenant, config)
ps_http = env.pageserver.http_client()

View File

@@ -41,7 +41,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
pg_bin = neon_compare.pg_bin
# Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"gc_period": "5 s",
"gc_horizon": f"{4 * 1024 ** 2}",
@@ -64,7 +64,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
endpoint.stop()
env.neon_cli.create_branch("b0", tenant_id=tenant)
env.create_branch("b0", tenant_id=tenant)
threads: List[threading.Thread] = []
threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True))
@@ -78,7 +78,7 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
p = random.randint(0, i)
timer = timeit.default_timer()
env.neon_cli.create_branch(f"b{i + 1}", f"b{p}", tenant_id=tenant)
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{p}", tenant_id=tenant)
dur = timeit.default_timer() - timer
log.info(f"Creating branch b{i+1} took {dur}s")
@@ -104,7 +104,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
# seed the prng so we will measure the same structure every time
rng = random.Random("2024-02-29")
env.neon_cli.create_branch("b0")
env.create_branch("b0")
endpoint = env.endpoints.create_start("b0")
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", endpoint.connstr()])
@@ -121,7 +121,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int, shape:
timer = timeit.default_timer()
# each of these uploads to remote storage before completion
env.neon_cli.create_branch(f"b{i + 1}", parent)
env.create_branch(f"b{i + 1}", ancestor_branch_name=parent)
dur = timeit.default_timer() - timer
branch_creation_durations.append(dur)
@@ -222,7 +222,7 @@ def wait_and_record_startup_metrics(
def test_branch_creation_many_relations(neon_compare: NeonCompare):
env = neon_compare.env
timeline_id = env.neon_cli.create_branch("root")
timeline_id = env.create_branch("root")
endpoint = env.endpoints.create_start("root")
with closing(endpoint.connect()) as conn:
@@ -238,7 +238,7 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
)
with neon_compare.record_duration("create_branch_time_not_busy_root"):
env.neon_cli.create_branch("child_not_busy", "root")
env.create_branch("child_not_busy", ancestor_branch_name="root")
# run a concurrent insertion to make the ancestor "busy" during the branch creation
thread = threading.Thread(
@@ -247,6 +247,6 @@ def test_branch_creation_many_relations(neon_compare: NeonCompare):
thread.start()
with neon_compare.record_duration("create_branch_time_busy_root"):
env.neon_cli.create_branch("child_busy", "root")
env.create_branch("child_busy", ancestor_branch_name="root")
thread.join()

View File

@@ -41,7 +41,7 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
)
neon_compare.zenbenchmark.record_pg_bench_result(branch, res)
env.neon_cli.create_branch("root")
env.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", endpoint_root.connstr(), "-s10"])
@@ -55,14 +55,14 @@ def test_compare_child_and_root_pgbench_perf(neon_compare: NeonCompare):
def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.neon_cli.create_branch("root")
env.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
endpoint_root.safe_psql(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
)
env.neon_cli.create_branch("child", "root")
env.create_branch("child", ancestor_branch_name="root")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):
@@ -73,7 +73,7 @@ def test_compare_child_and_root_write_perf(neon_compare: NeonCompare):
def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
env = neon_compare.env
env.neon_cli.create_branch("root")
env.create_branch("root")
endpoint_root = env.endpoints.create_start("root")
endpoint_root.safe_psql_many(
@@ -83,7 +83,7 @@ def test_compare_child_and_root_read_perf(neon_compare: NeonCompare):
]
)
env.neon_cli.create_branch("child", "root")
env.create_branch("child", ancestor_branch_name="root")
endpoint_child = env.endpoints.create_start("child")
with neon_compare.record_duration("root_run_duration"):

View File

@@ -26,10 +26,8 @@ def test_bulk_tenant_create(
for i in range(tenants_count):
start = timeit.default_timer()
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
)
tenant, _ = env.create_tenant()
env.create_timeline(f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant)
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
# if use_safekeepers == 'with_sa':

View File

@@ -16,7 +16,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
env = neon_env_builder.init_start()
n_records = 1000000
timeline_id = env.neon_cli.create_branch("test_bulk_update")
timeline_id = env.create_branch("test_bulk_update")
tenant_id = env.initial_tenant
endpoint = env.endpoints.create_start("test_bulk_update")
cur = endpoint.connect().cursor()

View File

@@ -17,7 +17,7 @@ def test_compaction(neon_compare: NeonCompare):
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
# Disable background GC and compaction, we'll run compaction manually.
"gc_period": "0s",
@@ -68,7 +68,7 @@ def test_compaction_l0_memory(neon_compare: NeonCompare):
env = neon_compare.env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
# Initially disable compaction so that we will build up a stack of L0s
"compaction_period": "0s",

View File

@@ -11,7 +11,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
tenant_id, _ = env.neon_cli.create_tenant(
tenant_id, _ = env.create_tenant(
conf={
# disable default GC and compaction
"gc_period": "1000 m",
@@ -63,7 +63,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma
log.info(f"Physical storage size {physical_size}")
if mode == "with_snapshots":
if step == n_steps / 2:
env.neon_cli.create_branch("child")
env.create_branch("child")
max_num_of_deltas_above_image = 0
max_total_num_of_deltas = 0

View File

@@ -15,7 +15,7 @@ def test_layer_map(neon_env_builder: NeonEnvBuilder, zenbenchmark):
# We want to have a lot of lot of layer files to exercise the layer map. Disable
# GC, and make checkpoint_distance very small, so that we get a lot of small layer
# files.
tenant, timeline = env.neon_cli.create_tenant(
tenant, timeline = env.create_tenant(
conf={
"gc_period": "0s",
"checkpoint_distance": "16384",

View File

@@ -33,7 +33,7 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
env = neon_env_builder.init_start()
lazy_slru_download = "true" if slru == "lazy" else "false"
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"lazy_slru_download": lazy_slru_download,
}

View File

@@ -85,7 +85,7 @@ def test_sharding_autosplit(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
tenants = {}
for tenant_id in set(TenantId.generate() for _i in range(0, tenant_count)):
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
env.create_tenant(tenant_id, timeline_id, conf=tenant_conf)
endpoint = env.endpoints.create("main", tenant_id=tenant_id)
tenants[tenant_id] = TenantState(timeline_id, endpoint)
endpoint.start()

View File

@@ -27,7 +27,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_startup")
env.create_branch("test_startup")
endpoint = None

View File

@@ -12,7 +12,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
# Override defaults: 4M checkpoint_distance, disable background compaction and gc.
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"checkpoint_distance": "4194304",
"gc_period": "0s",
@@ -45,7 +45,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 100k rows: {lsn_100}")
# Create branch1.
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
env.create_branch(
"branch1", ancestor_branch_name="main", ancestor_start_lsn=lsn_100, tenant_id=tenant
)
endpoint_branch1 = env.endpoints.create_start("branch1", tenant_id=tenant)
branch1_cur = endpoint_branch1.connect().cursor()
@@ -67,7 +69,9 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 200k rows: {lsn_200}")
# Create branch2.
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
env.create_branch(
"branch2", ancestor_branch_name="branch1", ancestor_start_lsn=lsn_200, tenant_id=tenant
)
endpoint_branch2 = env.endpoints.create_start("branch2", tenant_id=tenant)
branch2_cur = endpoint_branch2.connect().cursor()

View File

@@ -41,7 +41,7 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
ps_http = env.pageserver.http_client()
(tenant_id, _) = env.neon_cli.create_tenant()
(tenant_id, _) = env.create_tenant()
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
config_pre_detach = ps_http.tenant_config(tenant_id)
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -109,7 +109,7 @@ def test_empty_config(positive_env: NeonEnv, content_type: Optional[str]):
"""
env = positive_env
ps_http = env.pageserver.http_client()
(tenant_id, _) = env.neon_cli.create_tenant()
(tenant_id, _) = env.create_tenant()
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
config_pre_detach = ps_http.tenant_config(tenant_id)
assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
@@ -182,7 +182,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
fully_custom_config.keys()
), "ensure we cover all config options"
(tenant_id, _) = env.neon_cli.create_tenant()
(tenant_id, _) = env.create_tenant()
ps_http.set_tenant_config(tenant_id, fully_custom_config)
our_tenant_config = ps_http.tenant_config(tenant_id)
assert our_tenant_config.tenant_specific_overrides == fully_custom_config

View File

@@ -76,7 +76,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
branch = "test_compute_auth_to_pageserver"
env.neon_cli.create_branch(branch)
env.create_branch(branch)
endpoint = env.endpoints.create_start(branch)
with closing(endpoint.connect()) as conn:
@@ -186,7 +186,7 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env = neon_env_builder.init_start()
branch = f"test_auth_failures_auth_enabled_{auth_enabled}"
timeline_id = env.neon_cli.create_branch(branch)
timeline_id = env.create_branch(branch)
env.endpoints.create_start(branch)
tenant_token = env.auth_keys.generate_tenant_token(env.initial_tenant)

View File

@@ -98,7 +98,7 @@ def check_backpressure(endpoint: Endpoint, stop_event: threading.Event, polling_
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Create a branch for us
env.neon_cli.create_branch("test_backpressure")
env.create_branch("test_backpressure")
endpoint = env.endpoints.create(
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]

View File

@@ -22,7 +22,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
pageserver_http.configure_failpoints(("simulated-bad-compute-connection", "50%return(15)"))
env.neon_cli.create_branch("test_compute_pageserver_connection_stress")
env.create_branch("test_compute_pageserver_connection_stress")
endpoint = env.endpoints.create_start("test_compute_pageserver_connection_stress")
pg_conn = endpoint.connect()

View File

@@ -53,7 +53,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()
tenant, timeline_main = env.neon_cli.create_tenant(
tenant, timeline_main = env.create_tenant(
conf={
# disable background GC
"gc_period": "0s",
@@ -90,7 +90,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
pageserver_http_client.timeline_checkpoint(tenant, timeline_main)
pageserver_http_client.timeline_gc(tenant, timeline_main, lsn2 - lsn1 + 1024)
env.neon_cli.create_branch(
env.create_branch(
"test_branch", ancestor_branch_name="main", ancestor_start_lsn=lsn1, tenant_id=tenant
)
endpoint_branch = env.endpoints.create_start("test_branch", tenant_id=tenant)
@@ -127,7 +127,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
env.storage_controller.allowed_errors.extend(error_regexes)
# Disable background GC but set the `pitr_interval` to be small, so GC can delete something
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
# disable background GC
"gc_period": "0s",
@@ -145,7 +145,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
}
)
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
b0 = env.create_branch("b0", tenant_id=tenant)
endpoint0 = env.endpoints.create_start("b0", tenant_id=tenant)
res = endpoint0.safe_psql_many(
queries=[
@@ -176,7 +176,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
# The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=lsn, tenant_id=tenant)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
new_timeline_id = TimelineId.generate()

View File

@@ -23,7 +23,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
env.storage_controller.allowed_errors.extend(error_regexes)
# Branch at the point where only 100 rows were inserted
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
branch_behind_timeline_id = env.create_branch("test_branch_behind")
endpoint_main = env.endpoints.create_start("test_branch_behind")
main_cur = endpoint_main.connect().cursor()
@@ -58,8 +58,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 200100 rows: {lsn_b}")
# Branch at the point where only 100 rows were inserted
env.neon_cli.create_branch(
"test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a
env.create_branch(
"test_branch_behind_hundred",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=lsn_a,
)
# Insert many more rows. This generates enough WAL to fill a few segments.
@@ -75,8 +77,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
log.info(f"LSN after 400100 rows: {lsn_c}")
# Branch at the point where only 200100 rows were inserted
env.neon_cli.create_branch(
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
env.create_branch(
"test_branch_behind_more",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=lsn_b,
)
endpoint_hundred = env.endpoints.create_start("test_branch_behind_hundred")
@@ -97,15 +101,17 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
# branch at segment boundary
env.neon_cli.create_branch(
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn=Lsn("0/3000000")
env.create_branch(
"test_branch_segment_boundary",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=Lsn("0/3000000"),
)
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
# branch at pre-initdb lsn (from main branch)
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
env.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
new_timeline_id = TimelineId.generate()
@@ -116,8 +122,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
# branch at pre-ancestor lsn
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
env.neon_cli.create_branch(
"test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42")
env.create_branch(
"test_branch_preinitdb",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=Lsn("0/42"),
)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):
@@ -139,8 +147,10 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
print_gc_result(gc_result)
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
env.neon_cli.create_branch(
"test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
env.create_branch(
"test_branch_create_fail",
ancestor_branch_name="test_branch_behind",
ancestor_start_lsn=gced_lsn,
)
# retry the same with the HTTP API, so that we can inspect the status code
with pytest.raises(TimelineCreate406):

View File

@@ -38,7 +38,7 @@ def test_branching_with_pgbench(
env = neon_simple_env
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"gc_period": "5 s",
"gc_horizon": f"{1024 ** 2}",
@@ -55,7 +55,7 @@ def test_branching_with_pgbench(
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", "-T15", connstr])
env.neon_cli.create_branch("b0", tenant_id=tenant)
env.create_branch("b0", tenant_id=tenant)
endpoints: List[Endpoint] = []
endpoints.append(env.endpoints.create_start("b0", tenant_id=tenant))
@@ -84,9 +84,9 @@ def test_branching_with_pgbench(
threads = []
if ty == "cascade":
env.neon_cli.create_branch(f"b{i + 1}", f"b{i}", tenant_id=tenant)
env.create_branch(f"b{i + 1}", ancestor_branch_name=f"b{i}", tenant_id=tenant)
else:
env.neon_cli.create_branch(f"b{i + 1}", "b0", tenant_id=tenant)
env.create_branch(f"b{i + 1}", ancestor_branch_name="b0", tenant_id=tenant)
endpoints.append(env.endpoints.create_start(f"b{i + 1}", tenant_id=tenant))
@@ -120,7 +120,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
env = neon_simple_env
env.neon_cli.create_branch("b0")
env.create_branch("b0")
endpoint0 = env.endpoints.create_start("b0")
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
@@ -133,7 +133,7 @@ def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBi
start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
env.create_branch("b1", ancestor_branch_name="b0", ancestor_start_lsn=start_lsn)
endpoint1 = env.endpoints.create_start("b1")
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
@@ -173,7 +173,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
wait_until_paused(env, "before-upload-index-pausable")
env.neon_cli.map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline)
with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"):
env.endpoints.create_start(
@@ -432,9 +432,7 @@ def test_branching_while_stuck_find_gc_cutoffs(neon_env_builder: NeonEnvBuilder)
wait_until_paused(env, failpoint)
env.neon_cli.create_branch(
tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch"
)
env.create_branch("branch", ancestor_branch_name="main")
client.configure_failpoints((failpoint, "off"))

View File

@@ -34,7 +34,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
tenant_timelines: List[Tuple[TenantId, TimelineId, Endpoint]] = []
for _ in range(3):
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with endpoint.cursor() as cur:
@@ -84,13 +84,11 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.neon_cli.create_tenant()
tenant_id, _ = env.create_tenant()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
)
executor.submit(env.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id)
for i in range(4)
]
for future in futures:
@@ -111,7 +109,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Introduce failpoint during timeline init (some intermediate files are on disk), before it's checkpointed.
@@ -123,7 +121,7 @@ def test_timeline_init_break_before_checkpoint(neon_env_builder: NeonEnvBuilder)
env.pageserver.restart(immediate=True)
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -151,11 +149,11 @@ def test_timeline_init_break_before_checkpoint_recreate(
]
)
env.neon_cli.create_tenant(env.initial_tenant)
env.create_tenant(env.initial_tenant)
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Some fixed timeline ID (like control plane does)
@@ -176,7 +174,7 @@ def test_timeline_init_break_before_checkpoint_recreate(
env.pageserver.restart(immediate=True)
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
@@ -201,7 +199,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
tenant_id = env.initial_tenant
timelines_dir = env.pageserver.timeline_dir(tenant_id)
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
old_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# Introduce failpoint when creating a new timeline, right after creating its directory
@@ -211,7 +209,7 @@ def test_timeline_create_break_after_dir_creation(neon_env_builder: NeonEnvBuild
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
# "New" timeline is not present in the list, allowing pageserver to retry the same request
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
new_tenant_timelines = env.neon_cli.timeline_list(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"

View File

@@ -34,7 +34,7 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver):
ignore_notify
)
env.neon_cli.create_branch("test_change_pageserver")
env.create_branch("test_change_pageserver")
endpoint = env.endpoints.create_start("test_change_pageserver")
# Put this tenant into a dual-attached state

View File

@@ -56,8 +56,10 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# create new branch after clog truncation and start a compute node on it
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
env.neon_cli.create_branch(
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
env.create_branch(
"test_clog_truncate_new",
ancestor_branch_name="main",
ancestor_start_lsn=lsn_after_truncation,
)
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

View File

@@ -23,7 +23,7 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
env = neon_simple_env
def start_workload():
env.neon_cli.create_branch("test_lsof_pageserver_pid")
env.create_branch("test_lsof_pageserver_pid")
endpoint = env.endpoints.create_start("test_lsof_pageserver_pid")
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:

View File

@@ -63,7 +63,10 @@ page_cache_size=10
log.info(f"Running churn round {i}/{churn_rounds} ...")
workload.churn_rows(row_count, env.pageserver.id)
ps_http.timeline_compact(tenant_id, timeline_id)
# Force L0 compaction to ensure the number of layers is within bounds; we don't want to count L0 layers
# in this benchmark. In other words, this smoke test ensures number of L1 layers are bound.
ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True)
assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1
log.info("Validating at workload end ...")
workload.validate(env.pageserver.id)

View File

@@ -517,7 +517,7 @@ def test_historic_storage_formats(
assert metadata_summary["tenant_count"] >= 1
assert metadata_summary["timeline_count"] >= 1
env.neon_cli.import_tenant(dataset.tenant_id)
env.neon_cli.tenant_import(dataset.tenant_id)
# Discover timelines
timelines = env.pageserver.http_client().timeline_list(dataset.tenant_id)

View File

@@ -38,7 +38,7 @@ def test_safekeepers_reconfigure_reorder(
):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_reconfigure_reorder")
env.create_branch("test_safekeepers_reconfigure_reorder")
endpoint = env.endpoints.create_start("test_safekeepers_reconfigure_reorder")

View File

@@ -1,6 +1,7 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
from fixtures.neon_cli import WalCraft
from fixtures.neon_fixtures import NeonEnvBuilder
# Restart nodes with WAL end having specially crafted shape, like last record
# crossing segment boundary, to test decoding issues.
@@ -18,7 +19,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
)
def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_crafted_wal_end")
env.create_branch("test_crafted_wal_end")
env.pageserver.allowed_errors.extend(
[
# seems like pageserver stop triggers these
@@ -27,7 +28,7 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
)
endpoint = env.endpoints.create("test_crafted_wal_end")
wal_craft = WalCraft(env)
wal_craft = WalCraft(extra_env=None, binpath=env.neon_binpath)
endpoint.config(wal_craft.postgres_config())
endpoint.start()
res = endpoint.safe_psql_many(

View File

@@ -31,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
env.create_branch("test_createdb2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
@@ -77,10 +77,14 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create two branches before and after database drop.
env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
env.create_branch(
"test_before_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_before_drop
)
endpoint_before = env.endpoints.create_start("test_before_dropdb")
env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
env.create_branch(
"test_after_dropdb", ancestor_branch_name="main", ancestor_start_lsn=lsn_after_drop
)
endpoint_after = env.endpoints.create_start("test_after_dropdb")
# Test that database exists on the branch before drop

View File

@@ -18,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
env.create_branch("test_createuser2", ancestor_branch_name="main", ancestor_start_lsn=lsn)
endpoint2 = env.endpoints.create_start("test_createuser2")
# Test that you can connect to new branch as a new user

View File

@@ -59,11 +59,11 @@ def test_min_resident_size_override_handling(
env.pageserver.stop()
env.pageserver.start()
tenant_id, _ = env.neon_cli.create_tenant()
tenant_id, _ = env.create_tenant()
assert_overrides(tenant_id, config_level_override)
# Also ensure that specifying the paramter to create_tenant works, in addition to http-level recconfig.
tenant_id, _ = env.neon_cli.create_tenant(conf={"min_resident_size_override": "100"})
tenant_id, _ = env.create_tenant(conf={"min_resident_size_override": "100"})
assert_config(tenant_id, 100, 100)
ps_http.set_tenant_config(tenant_id, {})
assert_config(tenant_id, None, config_level_override)
@@ -280,7 +280,7 @@ def _eviction_env(
def pgbench_init_tenant(
layer_size: int, scale: int, env: NeonEnv, pg_bin: PgBin
) -> Tuple[TenantId, TimelineId]:
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
"gc_period": "0s",
"compaction_period": "0s",

View File

@@ -81,7 +81,7 @@ def test_remote_extensions(
# Start a compute node with remote_extension spec
# and check that it can download the extensions and use them to CREATE EXTENSION.
env = neon_env_builder_local.init_start()
env.neon_cli.create_branch("test_remote_extensions")
env.create_branch("test_remote_extensions")
endpoint = env.endpoints.create(
"test_remote_extensions",
config_lines=["log_min_messages=debug3"],

View File

@@ -15,7 +15,7 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
Test that triggering crash from neon_test_utils crashes the endpoint
"""
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_endpoint_crash")
env.create_branch("test_endpoint_crash")
endpoint = env.endpoints.create_start("test_endpoint_crash")
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")

View File

@@ -3,7 +3,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_fsm_truncate")
env.create_branch("test_fsm_truncate")
endpoint = env.endpoints.create_start("test_fsm_truncate")
endpoint.safe_psql(
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"

View File

@@ -68,7 +68,7 @@ async def update_and_gc(env: NeonEnv, endpoint: Endpoint, timeline: TimelineId):
def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
timeline = env.neon_cli.create_branch("test_gc_aggressive", "main")
timeline = env.create_branch("test_gc_aggressive", ancestor_branch_name="main")
endpoint = env.endpoints.create_start("test_gc_aggressive")
with endpoint.cursor() as cur:
@@ -99,7 +99,7 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder):
# Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_gc_index_upload", "main")
timeline_id = env.create_branch("test_gc_index_upload", ancestor_branch_name="main")
endpoint = env.endpoints.create_start("test_gc_index_upload")
pageserver_http = env.pageserver.http_client()

View File

@@ -98,27 +98,15 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
)
def import_tar(base, wal):
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--branch-name",
branch_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
"--pg-version",
env.pg_version,
]
env.neon_cli.timeline_import(
tenant_id=tenant,
timeline_id=timeline,
new_branch_name=branch_name,
base_tarfile=base,
base_lsn=start_lsn,
wal_tarfile=wal,
end_lsn=end_lsn,
pg_version=env.pg_version,
)
# Importing empty file fails
@@ -158,7 +146,7 @@ def test_import_from_pageserver_small(
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
timeline = env.create_branch("test_import_from_pageserver_small")
endpoint = env.endpoints.create_start("test_import_from_pageserver_small")
num_rows = 3000
@@ -177,7 +165,7 @@ def test_import_from_pageserver_multisegment(
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
timeline = env.create_branch("test_import_from_pageserver_multisegment")
endpoint = env.endpoints.create_start("test_import_from_pageserver_multisegment")
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
@@ -268,23 +256,13 @@ def _import(
branch_name = "import_from_pageserver"
client = env.pageserver.http_client()
env.pageserver.tenant_create(tenant)
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
str(tenant),
"--timeline-id",
str(timeline),
"--branch-name",
branch_name,
"--base-lsn",
str(lsn),
"--base-tarfile",
str(tar_output_file),
"--pg-version",
env.pg_version,
]
env.neon_cli.timeline_import(
tenant_id=tenant,
timeline_id=timeline,
new_branch_name=branch_name,
base_lsn=lsn,
base_tarfile=tar_output_file,
pg_version=env.pg_version,
)
# Wait for data to land in s3

View File

@@ -178,9 +178,9 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
def tenant_update_config(changes):
tenant_config.update(changes)
env.neon_cli.config_tenant(tenant_id, tenant_config)
env.config_tenant(tenant_id, tenant_config)
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
log.info("tenant id is %s", tenant_id)
env.initial_tenant = tenant_id # update_and_gc relies on this
ps_http = env.pageserver.http_client()

View File

@@ -8,7 +8,7 @@ def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",
@@ -52,7 +52,7 @@ def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
# small checkpoint distance to create more delta layer files
"checkpoint_distance": f"{1024 ** 2}",

View File

@@ -56,7 +56,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
}
tenant_id, timeline_id = env.neon_cli.create_tenant(conf=tenant_config)
tenant_id, timeline_id = env.create_tenant(conf=tenant_config)
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)

View File

@@ -219,7 +219,7 @@ def test_ondemand_wal_download_in_replication_slot_funcs(neon_env_builder: NeonE
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("init")
env.create_branch("init")
endpoint = env.endpoints.create_start("init")
with endpoint.connect().cursor() as cur:
@@ -270,7 +270,7 @@ def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("init")
env.create_branch("init")
endpoint = env.endpoints.create_start("init")
with endpoint.connect().cursor() as cur:
@@ -352,7 +352,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("init")
env.create_branch("init")
endpoint = env.endpoints.create_start("init")
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
@@ -397,7 +397,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
env.neon_cli.create_branch("init")
env.create_branch("init")
endpoint = env.endpoints.create_start("init")
cur = endpoint.connect().cursor()
@@ -445,7 +445,7 @@ def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
def test_slots_and_branching(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant, timeline = env.neon_cli.create_tenant()
tenant, timeline = env.create_tenant()
env.pageserver.http_client()
main_branch = env.endpoints.create_start("main", tenant_id=tenant)
@@ -457,7 +457,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
# Create branch ws.
env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
env.create_branch("ws", ancestor_branch_name="main", tenant_id=tenant)
ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
# Check that we can create slot with the same name
@@ -469,10 +469,10 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
env.create_branch("test_replication_shutdown_publisher", ancestor_branch_name="main")
pub = env.endpoints.create("test_replication_shutdown_publisher")
env.neon_cli.create_branch("test_replication_shutdown_subscriber")
env.create_branch("test_replication_shutdown_subscriber")
sub = env.endpoints.create("test_replication_shutdown_subscriber")
pub.respec(skip_pg_catalog_updates=False)
@@ -575,7 +575,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
vanilla_pg.start()
vanilla_pg.safe_psql("create extension neon;")
env.neon_cli.create_branch("subscriber")
env.create_branch("subscriber")
sub = env.endpoints.create("subscriber")
sub.start()

View File

@@ -32,7 +32,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
"""
env = neon_env_builder.init_start()
tenant_id, _ = env.neon_cli.create_tenant(
tenant_id, _ = env.create_tenant(
conf={
# disable default GC and compaction
"gc_period": "1000 m",
@@ -43,7 +43,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
}
)
timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id)
timeline_id = env.create_branch("test_lsn_mapping", tenant_id=tenant_id)
endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id)
timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0]
@@ -123,8 +123,8 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder, with_lease: bool):
endpoint_here.stop_and_destroy()
# Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff
timeline_id_child = env.neon_cli.create_branch(
"test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping"
timeline_id_child = env.create_branch(
"test_lsn_mapping_child", ancestor_branch_name="test_lsn_mapping", tenant_id=tenant_id
)
# Timestamp is in the unreachable past
@@ -190,7 +190,7 @@ def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch("test_ts_of_lsn_api")
new_timeline_id = env.create_branch("test_ts_of_lsn_api")
endpoint_main = env.endpoints.create_start("test_ts_of_lsn_api")
cur = endpoint_main.connect().cursor()

View File

@@ -72,9 +72,7 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
assert int(next_multixact_id) > int(next_multixact_id_old)
# Branch at this point
env.neon_cli.create_branch(
"test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
)
env.create_branch("test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn)
endpoint_new = env.endpoints.create_start("test_multixact_new")
next_multixact_id_new = endpoint_new.safe_psql(

View File

@@ -31,7 +31,7 @@ def helper_compare_timeline_list(
)
)
timelines_cli = env.neon_cli.list_timelines(initial_tenant)
timelines_cli = env.neon_cli.timeline_list(initial_tenant)
cli_timeline_ids = sorted([timeline_id for (_, timeline_id) in timelines_cli])
assert timelines_api == cli_timeline_ids
@@ -44,17 +44,19 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a branch for us
main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main")
main_timeline_id = env.create_branch("test_cli_branch_list_main")
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a nested branch
nested_timeline_id = env.neon_cli.create_branch(
"test_cli_branch_list_nested", "test_cli_branch_list_main"
nested_timeline_id = env.create_branch(
"test_cli_branch_list_nested", ancestor_branch_name="test_cli_branch_list_main"
)
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Check that all new branches are visible via CLI
timelines_cli = [timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()]
timelines_cli = [
timeline_id for (_, timeline_id) in env.neon_cli.timeline_list(env.initial_tenant)
]
assert main_timeline_id in timelines_cli
assert nested_timeline_id in timelines_cli
@@ -64,7 +66,7 @@ def helper_compare_tenant_list(pageserver_http_client: PageserverHttpClient, env
tenants = pageserver_http_client.tenant_list()
tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))
res = env.neon_cli.list_tenants()
res = env.neon_cli.tenant_list()
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
assert tenants_api == tenants_cli
@@ -77,18 +79,18 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant1, _ = env.neon_cli.create_tenant()
tenant1, _ = env.create_tenant()
# check tenant1 appeared
helper_compare_tenant_list(pageserver_http_client, env)
# Create new tenant
tenant2, _ = env.neon_cli.create_tenant()
tenant2, _ = env.create_tenant()
# check tenant2 appeared
helper_compare_tenant_list(pageserver_http_client, env)
res = env.neon_cli.list_tenants()
res = env.neon_cli.tenant_list()
tenants = sorted(map(lambda t: TenantId(t.split()[0]), res.stdout.splitlines()))
assert env.initial_tenant in tenants
@@ -98,8 +100,8 @@ def test_cli_tenant_list(neon_simple_env: NeonEnv):
def test_cli_tenant_create(neon_simple_env: NeonEnv):
env = neon_simple_env
tenant_id, _ = env.neon_cli.create_tenant()
timelines = env.neon_cli.list_timelines(tenant_id)
tenant_id, _ = env.create_tenant()
timelines = env.neon_cli.timeline_list(tenant_id)
# an initial timeline should be created upon tenant creation
assert len(timelines) == 1
@@ -132,7 +134,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
env.neon_cli.pageserver_stop(env.pageserver.id)
env.neon_cli.safekeeper_stop()
env.neon_cli.storage_controller_stop(False)
env.neon_cli.broker_stop()
env.neon_cli.storage_broker_stop()
# Keep NeonEnv state up to date, it usually owns starting/stopping services
env.pageserver.running = False
@@ -175,7 +177,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
# Stop this to get out of the way of the following `start`
env.neon_cli.storage_controller_stop(False)
env.neon_cli.broker_stop()
env.neon_cli.storage_broker_stop()
# Default start
res = env.neon_cli.raw_cli(["start"])

View File

@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
# Verify that the neon extension is installed and has the correct version.
def test_neon_extension(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_create_extension_neon")
env.create_branch("test_create_extension_neon")
endpoint_main = env.endpoints.create("test_create_extension_neon")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -35,7 +35,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
# Verify that the neon extension can be upgraded/downgraded.
def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_neon_extension_compatibility")
env.create_branch("test_neon_extension_compatibility")
endpoint_main = env.endpoints.create("test_neon_extension_compatibility")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
@@ -72,7 +72,7 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
# Verify that the neon extension can be auto-upgraded to the latest version.
def test_neon_extension_auto_upgrade(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_neon_extension_auto_upgrade")
env.create_branch("test_neon_extension_auto_upgrade")
endpoint_main = env.endpoints.create("test_neon_extension_auto_upgrade")
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon

View File

@@ -1,4 +1,5 @@
import pytest
from fixtures.common_types import TimelineId
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.port_distributor import PortDistributor
@@ -10,22 +11,36 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
# Skipping the init step that creates a local tenant in Pytest tests
try:
env.neon_cli.start()
env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
env.create_tenant(tenant_id=env.initial_tenant, set_default=True)
main_branch_name = "main"
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
main_branch_name, pg_port, http_port, endpoint_id="ep-basic-main"
main_branch_name,
pg_port,
http_port,
endpoint_id="ep-basic-main",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
env.neon_cli.endpoint_start("ep-basic-main")
branch_name = "migration-check"
env.neon_cli.create_branch(branch_name)
env.neon_cli.timeline_branch(
tenant_id=env.initial_tenant,
timeline_id=TimelineId.generate(),
new_branch_name=branch_name,
)
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(
branch_name, pg_port, http_port, endpoint_id=f"ep-{branch_name}"
branch_name,
pg_port,
http_port,
endpoint_id=f"ep-{branch_name}",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
env.neon_cli.endpoint_start(f"ep-{branch_name}")
finally:
@@ -43,12 +58,26 @@ def test_neon_two_primary_endpoints_fail(
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep1")
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
endpoint_id="ep1",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
pg_port = port_distributor.get_port()
http_port = port_distributor.get_port()
# ep1 is not running so create will succeed
env.neon_cli.endpoint_create(branch_name, pg_port, http_port, "ep2")
env.neon_cli.endpoint_create(
branch_name,
pg_port,
http_port,
endpoint_id="ep2",
tenant_id=env.initial_tenant,
pg_version=env.pg_version,
)
env.neon_cli.endpoint_start("ep1")

View File

@@ -6,10 +6,10 @@ from fixtures.utils import wait_until
def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
env = neon_simple_env
env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
env.create_branch("test_neon_superuser_publisher", ancestor_branch_name="main")
pub = env.endpoints.create("test_neon_superuser_publisher")
env.neon_cli.create_branch("test_neon_superuser_subscriber")
env.create_branch("test_neon_superuser_subscriber")
sub = env.endpoints.create("test_neon_superuser_subscriber")
pub.respec(skip_pg_catalog_updates=False)

View File

@@ -435,7 +435,9 @@ $$;
# Wait until pageserver has received all the data, and restart the endpoint
wait_for_wal_insert_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop(mode="immediate") # 'immediate' to avoid writing shutdown checkpoint
endpoint.stop(
mode="immediate", sks_wait_walreceiver_gone=(env.safekeepers, timeline_id)
) # 'immediate' to avoid writing shutdown checkpoint
endpoint.start()
# Check that the next-multixid value wrapped around correctly

View File

@@ -5,7 +5,7 @@ from fixtures.pageserver.http import PageserverHttpClient
def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
res_1 = endpoint.safe_psql_many(

View File

@@ -17,7 +17,7 @@ from fixtures.utils import print_gc_result, query_scalar
def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
# Disable pitr, because here we want to test branch creation after GC
env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
env.neon_cli.create_branch("test_old_request_lsn", "main")
env.create_branch("test_old_request_lsn", ancestor_branch_name="main")
endpoint = env.endpoints.create_start("test_old_request_lsn")
pg_conn = endpoint.connect()

View File

@@ -545,7 +545,7 @@ def test_compaction_downloads_on_demand_without_image_creation(neon_env_builder:
layer_sizes += layer.layer_file_size
pageserver_http.evict_layer(tenant_id, timeline_id, layer.layer_file_name)
env.neon_cli.config_tenant(tenant_id, {"compaction_threshold": "3"})
env.config_tenant(tenant_id, {"compaction_threshold": "3"})
pageserver_http.timeline_compact(tenant_id, timeline_id)
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
@@ -647,7 +647,7 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne
# layers -- threshold of 2 would sound more reasonable, but keeping it as 1
# to be less flaky
conf["image_creation_threshold"] = "1"
env.neon_cli.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
env.config_tenant(tenant_id, {k: str(v) for k, v in conf.items()})
pageserver_http.timeline_compact(tenant_id, timeline_id)
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)

View File

@@ -59,7 +59,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
timeline_details = client.timeline_detail(
tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True
@@ -108,7 +108,7 @@ def expect_updated_msg_lsn(
def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
env = neon_simple_env
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
endpoint = env.endpoints.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
# insert something to force sk -> ps message

View File

@@ -9,7 +9,7 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
env.create_branch("test_pageserver_catchup_while_compute_down")
# Make shared_buffers large to ensure we won't query pageserver while it is down.
endpoint = env.endpoints.create_start(
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]

View File

@@ -150,7 +150,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
env.pageserver.start()
env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
)
@@ -549,6 +549,14 @@ def test_multi_attach(
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# Instruct the storage controller to not interfere with our low level configuration
# of the pageserver's attachment states. Otherwise when it sees nodes go offline+return,
# it would send its own requests that would conflict with the test's.
env.storage_controller.tenant_policy_update(tenant_id, {"scheduling": "Stop"})
env.storage_controller.allowed_errors.extend(
[".*Scheduling is disabled by policy Stop.*", ".*Skipping reconcile for policy Stop.*"]
)
# Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
_detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
@@ -635,9 +643,7 @@ def test_upgrade_generationless_local_file_paths(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
)
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
workload = Workload(env, tenant_id, timeline_id)
workload.init()

View File

@@ -42,7 +42,7 @@ async def run_worker_for_tenant(
async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> Tuple[TenantId, TimelineId, Lsn]:
tenant, timeline = env.neon_cli.create_tenant(conf=tenant_conf)
tenant, timeline = env.create_tenant(conf=tenant_conf)
last_flush_lsn = await run_worker_for_tenant(env, entries, tenant)
return tenant, timeline, last_flush_lsn

View File

@@ -14,7 +14,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
# least the code gets exercised.
def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_restarts")
env.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_reconnects = 1000
timeout = 0.01
@@ -46,7 +46,7 @@ def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
# Test handling errors during page server reconnect
def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_reconnect")
env.create_branch("test_pageserver_reconnect")
endpoint = env.endpoints.create_start("test_pageserver_reconnect")
con = endpoint.connect()

View File

@@ -169,7 +169,7 @@ def test_pageserver_chaos(
# Use a tiny checkpoint distance, to create a lot of layers quickly.
# That allows us to stress the compaction and layer flushing logic more.
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"checkpoint_distance": "5000000",
}

View File

@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
# running.
def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_restarts")
env.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_restarts = 10
scale = 10

View File

@@ -650,7 +650,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_a = TimelineId.generate()
timeline_b = TimelineId.generate()
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id,
timeline_a,
placement_policy='{"Attached":1}',
@@ -658,7 +658,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
# to trigger the upload promptly.
conf={"heatmap_period": f"{upload_period_secs}s"},
)
env.neon_cli.create_timeline("main2", tenant_id, timeline_b)
env.create_timeline("main2", tenant_id, timeline_b)
tenant_timelines[tenant_id] = [timeline_a, timeline_b]
@@ -778,9 +778,7 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
)
env.create_tenant(tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}')
attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
ps_attached = env.get_pageserver(attached_to_id)

View File

@@ -57,7 +57,7 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
# Branch at the point where only 100 rows were inserted
# It must have been preserved by PITR setting
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
env.create_branch("test_pitr_gc_hundred", ancestor_branch_name="main", ancestor_start_lsn=lsn_a)
endpoint_hundred = env.endpoints.create_start("test_pitr_gc_hundred")

View File

@@ -25,7 +25,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
)
# Create a branch for us
env.neon_cli.create_branch("test_pageserver_recovery", "main")
env.create_branch("test_pageserver_recovery", ancestor_branch_name="main")
endpoint = env.endpoints.create_start("test_pageserver_recovery")

View File

@@ -230,7 +230,7 @@ def test_remote_storage_upload_queue_retries(
# create tenant with config that will determinstically allow
# compaction and gc
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
conf={
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": f"{64 * 1024}",
@@ -640,7 +640,9 @@ def test_empty_branch_remote_storage_upload(neon_env_builder: NeonEnvBuilder):
client = env.pageserver.http_client()
new_branch_name = "new_branch"
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
new_branch_timeline_id = env.create_branch(
new_branch_name, ancestor_branch_name="main", tenant_id=env.initial_tenant
)
assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)
timelines_before_detach = set(

View File

@@ -103,6 +103,7 @@ def test_replica_start_scan_clog_crashed_xids(neon_simple_env: NeonEnv):
# Initialize the primary, a test table, and a helper function to create lots
# of subtransactions.
env = neon_simple_env
timeline_id = env.initial_timeline
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
primary_conn = primary.connect()
primary_cur = primary_conn.cursor()
@@ -114,7 +115,7 @@ def test_replica_start_scan_clog_crashed_xids(neon_simple_env: NeonEnv):
# chance to write abort records for them.
primary_cur.execute("begin")
primary_cur.execute("select create_subxacts(100000)")
primary.stop(mode="immediate")
primary.stop(mode="immediate", sks_wait_walreceiver_gone=(env.safekeepers, timeline_id))
# Restart the primary. Do some light work, and shut it down cleanly
primary.start()
@@ -659,6 +660,7 @@ def test_replica_start_with_too_many_unused_xids(neon_simple_env: NeonEnv):
# Initialize the primary and a test table
env = neon_simple_env
timeline_id = env.initial_timeline
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
with primary.cursor() as primary_cur:
primary_cur.execute("create table t(pk serial primary key, payload integer)")
@@ -667,7 +669,7 @@ def test_replica_start_with_too_many_unused_xids(neon_simple_env: NeonEnv):
with primary.cursor() as primary_cur:
primary_cur.execute("insert into t (payload) values (0)")
# restart primary
primary.stop("immediate")
primary.stop("immediate", sks_wait_walreceiver_gone=(env.safekeepers, timeline_id))
primary.start()
# Wait for the WAL to be flushed

View File

@@ -60,9 +60,7 @@ def test_tenant_s3_restore(
last_flush_lsns = []
for timeline in ["first", "second"]:
timeline_id = env.neon_cli.create_branch(
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
)
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
endpoint.safe_psql(f"CREATE TABLE created_{timeline}(id integer);")

View File

@@ -77,7 +77,7 @@ def test_sharding_smoke(
assert all(s < expect_initdb_size // 2 for s in sizes.values())
# Test that timeline creation works on a sharded tenant
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
# Test that we can write data to a sharded tenant
workload = Workload(env, tenant_id, timeline_b, branch_name="branch_b")
@@ -378,7 +378,7 @@ def test_sharding_split_smoke(
env.start()
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id,
timeline_id,
shard_count=shard_count,
@@ -1127,7 +1127,7 @@ def test_sharding_split_failures(
timeline_id = TimelineId.generate()
# Create a tenant with secondary locations enabled
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id, timeline_id, shard_count=initial_shard_count, placement_policy='{"Attached":1}'
)
@@ -1441,7 +1441,7 @@ def test_sharding_unlogged_relation(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=8)
env.create_tenant(tenant_id, timeline_id, shard_count=8)
# We will create many tables to ensure it's overwhelmingly likely that at least one
# of them doesn't land on shard 0
@@ -1483,7 +1483,7 @@ def test_top_tenants(neon_env_builder: NeonEnvBuilder):
for i in range(0, n_tenants):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(tenant_id, timeline_id)
env.create_tenant(tenant_id, timeline_id)
# Write a different amount of data to each tenant
w = Workload(env, tenant_id, timeline_id)

View File

@@ -96,7 +96,7 @@ def test_storage_controller_smoke(
# Creating several tenants should spread out across the pageservers
for tid in tenant_ids:
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
env.create_tenant(tid, shard_count=shards_per_tenant)
# Repeating a creation should be idempotent (we are just testing it doesn't return an error)
env.storage_controller.tenant_create(
@@ -172,7 +172,7 @@ def test_storage_controller_smoke(
# Create some fresh tenants
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
for tid in tenant_ids:
env.neon_cli.create_tenant(tid, shard_count=shards_per_tenant)
env.create_tenant(tid, shard_count=shards_per_tenant)
counts = get_node_shard_counts(env, tenant_ids)
# Nothing should have been scheduled on the node in Draining
@@ -567,6 +567,149 @@ def test_storage_controller_compute_hook(
env.storage_controller.consistency_check()
def test_storage_controller_stuck_compute_hook(
httpserver: HTTPServer,
neon_env_builder: NeonEnvBuilder,
httpserver_listen_address,
):
"""
Test the migration process's behavior when the compute hook does not enable it to proceed
"""
neon_env_builder.num_pageservers = 2
(host, port) = httpserver_listen_address
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
handle_params = {"status": 200}
notifications = []
def handler(request: Request):
status = handle_params["status"]
log.info(f"Notify request[{status}]: {request}")
notifications.append(request.json)
return Response(status=status)
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
# Start running
env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"})
# Initial notification from tenant creation
assert len(notifications) == 1
expect: Dict[str, Union[List[Dict[str, int]], str, None, int]] = {
"tenant_id": str(env.initial_tenant),
"stripe_size": None,
"shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}],
}
assert notifications[0] == expect
# Do a migration while the compute hook is returning 423 status
tenant_id = env.initial_tenant
origin_pageserver = env.get_tenant_pageserver(tenant_id)
dest_ps_id = [p.id for p in env.pageservers if p.id != origin_pageserver.id][0]
dest_pageserver = env.get_pageserver(dest_ps_id)
shard_0_id = TenantShardId(tenant_id, 0, 0)
NOTIFY_BLOCKED_LOG = ".*Live migration blocked.*"
env.storage_controller.allowed_errors.extend(
[
NOTIFY_BLOCKED_LOG,
".*Failed to notify compute.*",
".*Reconcile error.*Cancelled",
".*Reconcile error.*Control plane tenant busy",
]
)
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
# We expect the controller to hit the 423 (locked) and retry. Migration shouldn't complete until that
# status is cleared.
handle_params["status"] = 423
migrate_fut = executor.submit(
env.storage_controller.tenant_shard_migrate, shard_0_id, dest_ps_id
)
def logged_stuck():
env.storage_controller.assert_log_contains(NOTIFY_BLOCKED_LOG)
wait_until(10, 0.25, logged_stuck)
contains_r = env.storage_controller.log_contains(NOTIFY_BLOCKED_LOG)
assert contains_r is not None # Appease mypy
(_, log_cursor) = contains_r
assert migrate_fut.running()
# Permit the compute hook to proceed
handle_params["status"] = 200
migrate_fut.result(timeout=10)
# Advance log cursor past the last 'stuck' message (we already waited for one, but
# there could be more than one)
while True:
contains_r = env.storage_controller.log_contains(NOTIFY_BLOCKED_LOG, offset=log_cursor)
if contains_r is None:
break
else:
(_, log_cursor) = contains_r
# Now, do a migration in the opposite direction
handle_params["status"] = 423
migrate_fut = executor.submit(
env.storage_controller.tenant_shard_migrate, shard_0_id, origin_pageserver.id
)
def logged_stuck_again():
env.storage_controller.assert_log_contains(NOTIFY_BLOCKED_LOG, offset=log_cursor)
wait_until(10, 0.25, logged_stuck_again)
assert migrate_fut.running()
# This time, the compute hook remains stuck, but we mark the origin node offline: this should
# also allow the migration to complete -- we only wait for the compute hook as long as we think
# the old location is still usable for computes.
# This is a regression test for issue https://github.com/neondatabase/neon/issues/8901
dest_pageserver.stop()
env.storage_controller.node_configure(dest_ps_id, {"availability": "Offline"})
try:
migrate_fut.result(timeout=10)
except StorageControllerApiException as e:
# The reconciler will fail because it can't detach from the origin: the important
# thing is that it finishes, rather than getting stuck in the compute notify loop.
assert "Reconcile error" in str(e)
# A later background reconciliation will clean up and leave things in a neat state, even
# while the compute hook is still blocked
try:
env.storage_controller.reconcile_all()
except StorageControllerApiException as e:
# We expect that the reconciler will do its work, but be unable to fully succeed
# because it can't send a compute notification. It will complete, but leave
# the internal flag set for "retry compute notification later"
assert "Control plane tenant busy" in str(e)
# Confirm that we are AttachedSingle on the node we last called the migrate API for
loc = origin_pageserver.http_client().tenant_get_location(shard_0_id)
assert loc["mode"] == "AttachedSingle"
# When the origin node comes back, it should get cleaned up
dest_pageserver.start()
try:
env.storage_controller.reconcile_all()
except StorageControllerApiException as e:
# Compute hook is still blocked: reconciler will configure PS but not fully succeed
assert "Control plane tenant busy" in str(e)
with pytest.raises(PageserverApiException, match="Tenant shard not found"):
dest_pageserver.http_client().tenant_get_location(shard_0_id)
# Once the compute hook is unblocked, we should be able to get into a totally
# quiescent state again
handle_params["status"] = 200
env.storage_controller.reconcile_until_idle()
env.storage_controller.consistency_check()
def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder):
"""
Verify that occasional-use debug APIs work as expected. This is a lightweight test
@@ -663,10 +806,7 @@ def test_storage_controller_s3_time_travel_recovery(
env.storage_controller.consistency_check()
branch_name = "main"
timeline_id = env.neon_cli.create_timeline(
branch_name,
tenant_id=tenant_id,
)
timeline_id = env.create_timeline(branch_name, tenant_id=tenant_id)
# Write some nontrivial amount of data into the endpoint and wait until it is uploaded
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -866,9 +1006,7 @@ def test_storage_controller_tenant_deletion(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}'
)
env.create_tenant(tenant_id, timeline_id, shard_count=2, placement_policy='{"Attached":1}')
# Ensure all the locations are configured, including secondaries
env.storage_controller.reconcile_until_idle()
@@ -1074,10 +1212,7 @@ def test_storage_controller_heartbeats(
env.storage_controller.tenant_create(tid)
branch_name = "main"
env.neon_cli.create_timeline(
branch_name,
tenant_id=tid,
)
env.create_timeline(branch_name, tenant_id=tid)
with env.endpoints.create_start("main", tenant_id=tid) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -1179,9 +1314,9 @@ def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):
# We'll have two tenants.
tenant_a = TenantId.generate()
env.neon_cli.create_tenant(tenant_a, placement_policy='{"Attached":1}')
env.create_tenant(tenant_a, placement_policy='{"Attached":1}')
tenant_b = TenantId.generate()
env.neon_cli.create_tenant(tenant_b, placement_policy='{"Attached":1}')
env.create_tenant(tenant_b, placement_policy='{"Attached":1}')
# Each pageserver will have one attached and one secondary location
env.storage_controller.tenant_shard_migrate(
@@ -1504,7 +1639,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
# Create a second timeline to ensure that import finds both
timeline_a = env.initial_timeline
timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
timeline_b = env.create_branch("branch_b", tenant_id=tenant_id)
workload_a = Workload(env, tenant_id, timeline_a, branch_name="main")
workload_a.init()
@@ -1546,7 +1681,7 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
)
# Now import it again
env.neon_cli.import_tenant(tenant_id)
env.neon_cli.tenant_import(tenant_id)
# Check we found the shards
describe = env.storage_controller.tenant_describe(tenant_id)
@@ -1588,7 +1723,7 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.neon_cli.create_tenant(
env.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -1675,7 +1810,7 @@ def test_skip_drain_on_secondary_lag(neon_env_builder: NeonEnvBuilder, pg_bin: P
env = neon_env_builder.init_configs()
env.start()
tid, timeline_id = env.neon_cli.create_tenant(placement_policy='{"Attached":1}')
tid, timeline_id = env.create_tenant(placement_policy='{"Attached":1}')
# Give things a chance to settle.
env.storage_controller.reconcile_until_idle(timeout_secs=30)
@@ -1781,7 +1916,7 @@ def test_background_operation_cancellation(neon_env_builder: NeonEnvBuilder):
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.neon_cli.create_tenant(
env.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -1841,7 +1976,7 @@ def test_storage_controller_node_deletion(
for _ in range(0, tenant_count):
tid = TenantId.generate()
tenant_ids.append(tid)
env.neon_cli.create_tenant(
env.create_tenant(
tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
)
@@ -1966,7 +2101,7 @@ def test_storage_controller_metadata_health(
)
# Mock tenant with unhealthy scrubber scan result
tenant_b, _ = env.neon_cli.create_tenant(shard_count=shard_count)
tenant_b, _ = env.create_tenant(shard_count=shard_count)
tenant_b_shard_ids = (
env.storage_controller.tenant_shard_split(tenant_b, shard_count=shard_count)
if shard_count is not None
@@ -1974,7 +2109,7 @@ def test_storage_controller_metadata_health(
)
# Mock tenant that never gets a health update from scrubber
tenant_c, _ = env.neon_cli.create_tenant(shard_count=shard_count)
tenant_c, _ = env.create_tenant(shard_count=shard_count)
tenant_c_shard_ids = (
env.storage_controller.tenant_shard_split(tenant_c, shard_count=shard_count)
@@ -2374,7 +2509,7 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
env.neon_cli.create_tenant(tenant_id, timeline_id)
env.create_tenant(tenant_id, timeline_id)
env.storage_controller.pageserver_api().set_tenant_config(tenant_id, TENANT_CONF)
# Write enough data that a compaction would do some work (deleting some L0s)
@@ -2470,6 +2605,9 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB
class MigrationFailpoints(Enum):
# While only the origin is attached
PRE_GENERATION_INC = "reconciler-live-migrate-pre-generation-inc"
# While only the origin is attached and the db was updated to
# point to the new location
PRE_AWAIT_LSN = "reconciler-live-migrate-pre-await-lsn"
# While both locations are attached
POST_NOTIFY = "reconciler-live-migrate-post-notify"
# While only the destination is attached
@@ -2495,12 +2633,24 @@ def test_storage_controller_proxy_during_migration(
"""
neon_env_builder.num_pageservers = 2
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
neon_env_builder.storage_controller_config = {
# Publish long reconcile metric early
"long_reconcile_threshold": "5s",
}
env = neon_env_builder.init_configs()
env.start()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
env.neon_cli.create_tenant(tenant_id, timeline_id)
env.create_tenant(tenant_id, timeline_id)
# The test stalls a reconcile on purpose to check if the long running
# reconcile alert fires.
env.storage_controller.allowed_errors.extend(
[".*Reconcile passed the long running threshold.*"]
)
# Activate a failpoint that will cause live migration to get stuck _after_ the generation has been issued
# to the new pageserver: this should result in requests routed to the new pageserver.
@@ -2509,6 +2659,24 @@ def test_storage_controller_proxy_during_migration(
origin_pageserver = env.get_tenant_pageserver(tenant_id)
dest_ps_id = [p.id for p in env.pageservers if p.id != origin_pageserver.id][0]
def long_migration_metric_published():
assert (
env.storage_controller.get_metric_value(
"storage_controller_reconcile_long_running_total",
filter={"tenant_id": str(tenant_id), "shard_number": "0"},
)
== 1
)
def assert_long_migration_metric_not_published():
assert (
env.storage_controller.get_metric_value(
"storage_controller_reconcile_long_running_total",
filter={"tenant_id": str(tenant_id), "shard_number": "0"},
)
is None
)
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
migrate_fut = executor.submit(
@@ -2539,9 +2707,14 @@ def test_storage_controller_proxy_during_migration(
# We expect request to land on the origin
assert tenant_info["generation"] == 1
wait_until(10, 1, long_migration_metric_published)
# Eventually migration completes
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
migrate_fut.result()
assert_long_migration_metric_not_published()
except:
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
@@ -2650,7 +2823,7 @@ def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
# Generate a layer to avoid shard split handling on ps from tripping
# up on debug assert.
timeline_id = TimelineId.generate()
env.neon_cli.create_timeline("bar", tids[0], timeline_id)
env.create_timeline("bar", tids[0], timeline_id)
workload = Workload(env, tids[0], timeline_id, branch_name="bar")
workload.init()
@@ -2664,3 +2837,171 @@ def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
attached_to = shard["node_attached"]
expected_az = env.get_pageserver(attached_to).az_id
assert shard["preferred_az_id"] == expected_az
@run_only_on_default_postgres("Postgres version makes no difference here")
@pytest.mark.parametrize(
"migration_failpoint",
[
MigrationFailpoints.PRE_GENERATION_INC,
MigrationFailpoints.PRE_AWAIT_LSN,
MigrationFailpoints.POST_NOTIFY,
MigrationFailpoints.POST_DETACH,
],
)
def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, migration_failpoint):
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_configs()
env.start()
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
env.storage_controller.pageserver_api().timeline_create(
pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id
)
shard_zero = TenantShardId(tenant_id, 0, 0)
locations = env.storage_controller.get_tenants_placement()[str(shard_zero)]
assert locations["observed"] == locations["intent"]
assert locations["observed"]["attached"] is not None
assert len(locations["observed"]["secondary"]) > 0
attached_location = locations["observed"]["attached"]
secondary_location = locations["observed"]["secondary"][0]
env.storage_controller.configure_failpoints((migration_failpoint.value, "pause"))
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
migrate_fut = executor.submit(
env.storage_controller.tenant_shard_migrate,
shard_zero,
secondary_location,
)
def has_hit_migration_failpoint():
expr = f"at failpoint {migration_failpoint.value}"
log.info(expr)
assert env.storage_controller.log_contains(expr)
wait_until(10, 1, has_hit_migration_failpoint)
env.storage_controller.pageserver_api().timeline_delete(
tenant_id=tenant_id, timeline_id=timeline_id
)
# Eventually migration completes
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
migrate_fut.result()
# Ensure that we detached from the old attached location
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
# Ensure the timeline is not present on the new attached location
client = env.get_pageserver(secondary_location).http_client()
assert timeline_id not in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"deleted timeline found on {secondary_location}"
except:
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise
@run_only_on_default_postgres("Postgres version makes no difference here")
@pytest.mark.parametrize(
"migration_failpoint",
[
MigrationFailpoints.PRE_GENERATION_INC,
MigrationFailpoints.POST_NOTIFY,
MigrationFailpoints.POST_DETACH,
],
)
def test_multi_attached_timeline_creation(neon_env_builder: NeonEnvBuilder, migration_failpoint):
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_configs()
env.start()
tenant_id = TenantId.generate()
env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
shard_zero = TenantShardId(tenant_id, 0, 0)
locations = env.storage_controller.get_tenants_placement()[str(shard_zero)]
assert locations["observed"] == locations["intent"]
assert locations["observed"]["attached"] is not None
assert len(locations["observed"]["secondary"]) > 0
attached_location = locations["observed"]["attached"]
secondary_location = locations["observed"]["secondary"][0]
env.storage_controller.configure_failpoints((migration_failpoint.value, "pause"))
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
migrate_fut = executor.submit(
env.storage_controller.tenant_shard_migrate,
shard_zero,
secondary_location,
)
def has_hit_migration_failpoint():
expr = f"at failpoint {migration_failpoint.value}"
log.info(expr)
assert env.storage_controller.log_contains(expr)
wait_until(10, 1, has_hit_migration_failpoint)
timeline_id = TimelineId.generate()
env.storage_controller.pageserver_api().timeline_create(
pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id
)
# Timeline creation only goes to the origin.
if migration_failpoint == MigrationFailpoints.PRE_GENERATION_INC:
client = env.get_pageserver(attached_location).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {attached_location}"
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(secondary_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
# Timeline creations goes to both attached locations
if migration_failpoint == MigrationFailpoints.POST_NOTIFY:
for node_id in [attached_location, secondary_location]:
client = env.get_pageserver(node_id).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {node_id}"
# Timeline creation goes both locations, but storcon gets a 404 from the origin
# which it ignores.
if migration_failpoint == MigrationFailpoints.POST_DETACH:
client = env.get_pageserver(secondary_location).http_client()
assert timeline_id in {
TimelineId(b["timeline_id"]) for b in client.timeline_list(tenant_id)
}, f"new timeline not found on {attached_location}"
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
# Eventually migration completes
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
migrate_fut.result()
# Ensure that we detached from the old attached location
with pytest.raises(PageserverApiException) as exc:
env.get_pageserver(attached_location).http_client().timeline_list(tenant_id)
assert exc.value.status_code == 404
except:
# Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown
env.storage_controller.configure_failpoints((migration_failpoint.value, "off"))
raise

View File

@@ -135,7 +135,7 @@ def test_scrubber_physical_gc(neon_env_builder: NeonEnvBuilder, shard_count: Opt
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
env.create_tenant(tenant_id, timeline_id, shard_count=shard_count)
workload = Workload(env, tenant_id, timeline_id)
workload.init()
@@ -185,7 +185,7 @@ def test_scrubber_physical_gc_ancestors(
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id,
timeline_id,
shard_count=shard_count,
@@ -303,7 +303,7 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id,
timeline_id,
shard_count=None,
@@ -385,7 +385,7 @@ def test_scrubber_physical_gc_ancestors_split(neon_env_builder: NeonEnvBuilder):
tenant_id = TenantId.generate()
timeline_id = TimelineId.generate()
initial_shard_count = 2
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id,
timeline_id,
shard_count=initial_shard_count,

View File

@@ -9,11 +9,11 @@ from fixtures.utils import wait_until
# It requires tracking information about replication origins at page server side
def test_subscriber_restart(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("publisher")
env.create_branch("publisher")
pub = env.endpoints.create("publisher")
pub.start()
env.neon_cli.create_branch("subscriber")
sub_timeline_id = env.create_branch("subscriber")
sub = env.endpoints.create("subscriber")
sub.start()
@@ -47,7 +47,7 @@ def test_subscriber_restart(neon_simple_env: NeonEnv):
for _ in range(n_restarts):
# restart subscriber
# time.sleep(2)
sub.stop("immediate")
sub.stop("immediate", sks_wait_walreceiver_gone=(env.safekeepers, sub_timeline_id))
sub.start()
thread.join()

View File

@@ -38,7 +38,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
# Check that we raise on misspelled configs
invalid_conf_key = "some_invalid_setting_name_blah_blah_123"
try:
env.neon_cli.create_tenant(
env.create_tenant(
conf={
invalid_conf_key: "20000",
}
@@ -54,9 +54,9 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
"evictions_low_residence_duration_metric_threshold": "42s",
"eviction_policy": json.dumps({"kind": "NoEviction"}),
}
tenant, _ = env.neon_cli.create_tenant(conf=new_conf)
tenant, _ = env.create_tenant(conf=new_conf)
env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant)
env.create_timeline("test_tenant_conf", tenant_id=tenant)
env.endpoints.create_start("test_tenant_conf", "main", tenant)
# check the configuration of the default tenant
@@ -121,10 +121,7 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
),
"max_lsn_wal_lag": "13000000",
}
env.neon_cli.config_tenant(
tenant_id=tenant,
conf=conf_update,
)
env.config_tenant(tenant_id=tenant, conf=conf_update)
updated_tenant_config = http_client.tenant_config(tenant_id=tenant)
updated_specific_config = updated_tenant_config.tenant_specific_overrides
@@ -172,10 +169,8 @@ def test_tenant_config(neon_env_builder: NeonEnvBuilder):
final_conf = {
"pitr_interval": "1 min",
}
env.neon_cli.config_tenant(
tenant_id=tenant,
conf=final_conf,
)
env.config_tenant(tenant_id=tenant, conf=final_conf)
final_tenant_config = http_client.tenant_config(tenant_id=tenant)
final_specific_config = final_tenant_config.tenant_specific_overrides
assert final_specific_config["pitr_interval"] == "1m"
@@ -218,7 +213,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
# tenant is created with defaults, as in without config file
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
(tenant_id, timeline_id) = env.create_tenant()
config_path = env.pageserver.tenant_dir(tenant_id) / "config-v1"
http_client = env.pageserver.http_client()
@@ -240,9 +235,9 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
func=lambda: assert_tenant_state(http_client, tenant_id, "Active"),
)
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"})
env.config_tenant(tenant_id, {"gc_horizon": "1000000"})
contents_first = config_path.read_text()
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "0"})
env.config_tenant(tenant_id, {"gc_horizon": "0"})
contents_later = config_path.read_text()
# dont test applying the setting here, we have that another test case to show it
@@ -298,7 +293,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
metric = get_metric()
assert int(metric.value) > 0, "metric is updated"
env.neon_cli.config_tenant(
env.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": default_value}
)
updated_metric = get_metric()
@@ -306,9 +301,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
metric.value
), "metric is unchanged when setting same value"
env.neon_cli.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"}
)
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2day"})
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
assert int(metric.value) == 0
@@ -320,9 +313,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
assert int(metric.labels["low_threshold_secs"]) == 2 * 24 * 60 * 60
assert int(metric.value) > 0
env.neon_cli.config_tenant(
tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"}
)
env.config_tenant(tenant_id, {"evictions_low_residence_duration_metric_threshold": "2h"})
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
assert int(metric.value) == 0, "value resets if label changes"
@@ -334,7 +325,7 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
assert int(metric.labels["low_threshold_secs"]) == 2 * 60 * 60
assert int(metric.value) > 0, "set a non-zero value for next step"
env.neon_cli.config_tenant(tenant_id, {})
env.config_tenant(tenant_id, {})
metric = get_metric()
assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default"
assert int(metric.value) == 0, "value resets to default"

View File

@@ -78,7 +78,7 @@ def test_tenant_delete_smoke(
# may need to retry on some remote storage errors injected by the test harness
error_tolerant_delete(ps_http, tenant_id)
env.neon_cli.create_tenant(
env.create_tenant(
tenant_id=tenant_id,
conf=many_small_layers_tenant_config(),
)
@@ -89,9 +89,7 @@ def test_tenant_delete_smoke(
# create two timelines one being the parent of another
parent = None
for timeline in ["first", "second"]:
timeline_id = env.neon_cli.create_branch(
timeline, tenant_id=tenant_id, ancestor_branch_name=parent
)
timeline_id = env.create_branch(timeline, ancestor_branch_name=parent, tenant_id=tenant_id)
with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())
wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)
@@ -339,7 +337,7 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, make_httpserver, neon_env_builder
ps_http = env.pageserver.http_client()
# create a tenant separate from the main tenant so that we have one remaining
# after we deleted it, as the scrubber treats empty buckets as an error.
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
(tenant_id, timeline_id) = env.create_tenant()
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
run_pg_bench_small(pg_bin, endpoint.connstr())

View File

@@ -72,7 +72,7 @@ def test_tenant_reattach(neon_env_builder: NeonEnvBuilder, mode: str):
pageserver_http = env.pageserver.http_client()
# create new nenant
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)
@@ -241,7 +241,7 @@ def test_tenant_reattach_while_busy(
pageserver_http = env.pageserver.http_client()
# create new nenant
tenant_id, timeline_id = env.neon_cli.create_tenant(
tenant_id, timeline_id = env.create_tenant(
# Create layers aggressively
conf={"checkpoint_distance": "100000"}
)

View File

@@ -219,7 +219,7 @@ def test_tenant_relocation(
log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, env.initial_timeline)
env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
env.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
ep_main = env.endpoints.create_start(
branch_name="test_tenant_relocation_main", tenant_id=tenant_id
)
@@ -232,7 +232,7 @@ def test_tenant_relocation(
expected_sum=500500,
)
env.neon_cli.create_branch(
env.create_branch(
new_branch_name="test_tenant_relocation_second",
ancestor_branch_name="test_tenant_relocation_main",
ancestor_start_lsn=current_lsn_main,
@@ -404,7 +404,7 @@ def test_emergency_relocate_with_branches_slow_replay(
# - A logical replication message between the inserts, so that we can conveniently
# pause the WAL ingestion between the two inserts.
# - Child branch, created after the inserts
tenant_id, _ = env.neon_cli.create_tenant()
tenant_id, _ = env.create_tenant()
main_endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with main_endpoint.cursor() as cur:
@@ -417,7 +417,7 @@ def test_emergency_relocate_with_branches_slow_replay(
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
main_endpoint.stop()
env.neon_cli.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
env.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
# Now kill the pageserver, remove the tenant directory, and restart. This simulates
# the scenario that a pageserver dies unexpectedly and cannot be recovered, so we relocate
@@ -548,7 +548,7 @@ def test_emergency_relocate_with_branches_createdb(
pageserver_http = env.pageserver.http_client()
# create new nenant
tenant_id, _ = env.neon_cli.create_tenant()
tenant_id, _ = env.create_tenant()
main_endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
with main_endpoint.cursor() as cur:
@@ -556,7 +556,7 @@ def test_emergency_relocate_with_branches_createdb(
cur.execute("CREATE DATABASE neondb")
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
env.neon_cli.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
env.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
with main_endpoint.cursor(dbname="neondb") as cur:
cur.execute("CREATE TABLE test_migrate_one AS SELECT generate_series(1,100)")

View File

@@ -27,7 +27,7 @@ def test_empty_tenant_size(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_configs()
env.start()
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
(tenant_id, timeline_id) = env.create_tenant()
http_client = env.pageserver.http_client()
initial_size = http_client.tenant_size(tenant_id)
@@ -67,12 +67,12 @@ def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir:
gc_horizon
"""
env = neon_simple_env
(tenant_id, _) = env.neon_cli.create_tenant()
(tenant_id, _) = env.create_tenant()
http_client = env.pageserver.http_client()
initial_size = http_client.tenant_size(tenant_id)
first_branch_timeline_id = env.neon_cli.create_branch("first-branch", tenant_id=tenant_id)
first_branch_timeline_id = env.create_branch("first-branch", tenant_id=tenant_id)
with env.endpoints.create_start("first-branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
@@ -104,13 +104,13 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou
nth_n: 10------------I--------100
"""
env = neon_simple_env
(tenant_id, _) = env.neon_cli.create_tenant()
(tenant_id, _) = env.create_tenant()
http_client = env.pageserver.http_client()
initial_size = http_client.tenant_size(tenant_id)
first_branch_name = "first"
env.neon_cli.create_branch(first_branch_name, tenant_id=tenant_id)
env.create_branch(first_branch_name, tenant_id=tenant_id)
size_after_branching = http_client.tenant_size(tenant_id)
@@ -123,7 +123,7 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou
for i in range(0, 4):
latest_branch_name = f"nth_{i}"
last_branch = env.neon_cli.create_branch(
last_branch = env.create_branch(
latest_branch_name, ancestor_branch_name=last_branch_name, tenant_id=tenant_id
)
last_branch_name = latest_branch_name
@@ -159,7 +159,7 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
env = neon_simple_env
gc_horizon = 20_000
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
(tenant_id, main_id) = env.create_tenant(conf={"gc_horizon": str(gc_horizon)})
http_client = env.pageserver.http_client()
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
@@ -172,9 +172,7 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
assert flushed_lsn.lsn_int - gc_horizon > initdb_lsn.lsn_int
branch_id = env.neon_cli.create_branch(
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
)
branch_id = env.create_branch("branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn)
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
@@ -201,7 +199,7 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
env = neon_simple_env
gc_horizon = 5_000
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": str(gc_horizon)})
(tenant_id, main_id) = env.create_tenant(conf={"gc_horizon": str(gc_horizon)})
http_client = env.pageserver.http_client()
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
@@ -220,9 +218,7 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
assert flushed_lsn.lsn_int - gc_horizon > initdb_lsn.lsn_int
branch_id = env.neon_cli.create_branch(
"branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn
)
branch_id = env.create_branch("branch", tenant_id=tenant_id, ancestor_start_lsn=flushed_lsn)
with env.endpoints.create_start("branch", tenant_id=tenant_id) as endpoint:
with endpoint.cursor() as cur:
@@ -248,13 +244,13 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
"""
env = neon_simple_env
(tenant_id, main_id) = env.neon_cli.create_tenant(conf={"gc_horizon": "1024"})
(tenant_id, main_id) = env.create_tenant(conf={"gc_horizon": "1024"})
http_client = env.pageserver.http_client()
initial_size = http_client.tenant_size(tenant_id)
first_id = env.neon_cli.create_branch("first", tenant_id=tenant_id)
second_id = env.neon_cli.create_branch("second", tenant_id=tenant_id)
first_id = env.create_branch("first", tenant_id=tenant_id)
second_id = env.create_branch("second", tenant_id=tenant_id)
ids = {"main": main_id, "first": first_id, "second": second_id}
@@ -530,8 +526,8 @@ def test_get_tenant_size_with_multiple_branches(
size_at_branch = http_client.tenant_size(tenant_id)
assert size_at_branch > 0
first_branch_timeline_id = env.neon_cli.create_branch(
"first-branch", main_branch_name, tenant_id
first_branch_timeline_id = env.create_branch(
"first-branch", ancestor_branch_name=main_branch_name, tenant_id=tenant_id
)
size_after_first_branch = http_client.tenant_size(tenant_id)
@@ -557,8 +553,8 @@ def test_get_tenant_size_with_multiple_branches(
size_after_continuing_on_main = http_client.tenant_size(tenant_id)
assert size_after_continuing_on_main > size_after_growing_first_branch
second_branch_timeline_id = env.neon_cli.create_branch(
"second-branch", main_branch_name, tenant_id
second_branch_timeline_id = env.create_branch(
"second-branch", ancestor_branch_name=main_branch_name, tenant_id=tenant_id
)
size_after_second_branch = http_client.tenant_size(tenant_id)
assert_size_approx_equal(size_after_second_branch, size_after_continuing_on_main)
@@ -633,8 +629,8 @@ def test_synthetic_size_while_deleting(neon_env_builder: NeonEnvBuilder):
orig_size = client.tenant_size(env.initial_tenant)
branch_id = env.neon_cli.create_branch(
tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch"
branch_id = env.create_branch(
"branch", ancestor_branch_name="main", tenant_id=env.initial_tenant
)
client.configure_failpoints((failpoint, "pause"))
@@ -651,8 +647,8 @@ def test_synthetic_size_while_deleting(neon_env_builder: NeonEnvBuilder):
assert_size_approx_equal(orig_size, size)
branch_id = env.neon_cli.create_branch(
tenant_id=env.initial_tenant, ancestor_branch_name="main", new_branch_name="branch2"
branch_id = env.create_branch(
"branch2", ancestor_branch_name="main", tenant_id=env.initial_tenant
)
client.configure_failpoints((failpoint, "pause"))
@@ -749,7 +745,7 @@ def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path,
env, env.initial_tenant, env.initial_timeline, test_output_dir, action="branch"
)
tenant, timeline = env.neon_cli.create_tenant(conf=conf)
tenant, timeline = env.create_tenant(conf=conf)
lease_res = insert_with_action(env, tenant, timeline, test_output_dir, action="lease")
assert_size_approx_equal_for_lease_test(lease_res, ro_branch_res)
@@ -793,8 +789,8 @@ def insert_with_action(
res = client.timeline_lsn_lease(tenant, timeline, last_flush_lsn)
log.info(f"result from lsn_lease api: {res}")
elif action == "branch":
ro_branch = env.neon_cli.create_branch(
"ro_branch", tenant_id=tenant, ancestor_start_lsn=last_flush_lsn
ro_branch = env.create_branch(
"ro_branch", ancestor_start_lsn=last_flush_lsn, tenant_id=tenant
)
log.info(f"{ro_branch=} created")
else:

View File

@@ -31,8 +31,8 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
timeline_delete_wait_completed(client, tenant, t)
# Create tenant, start compute
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(name, tenant_id=tenant)
tenant, _ = env.create_tenant()
env.create_timeline(name, tenant_id=tenant)
endpoint = env.endpoints.create_start(name, tenant_id=tenant)
assert_tenant_state(
client,

View File

@@ -32,7 +32,7 @@ from prometheus_client.samples import Sample
def test_tenant_creation_fails(neon_simple_env: NeonEnv):
tenants_dir = neon_simple_env.pageserver.tenant_dir()
initial_tenants = sorted(
map(lambda t: t.split()[0], neon_simple_env.neon_cli.list_tenants().stdout.splitlines())
map(lambda t: t.split()[0], neon_simple_env.neon_cli.tenant_list().stdout.splitlines())
)
[d for d in tenants_dir.iterdir()]
@@ -59,11 +59,11 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
# an empty tenant dir with no config in it.
neon_simple_env.pageserver.allowed_errors.append(".*Failed to load tenant config.*")
new_tenants = sorted(
map(lambda t: t.split()[0], neon_simple_env.neon_cli.list_tenants().stdout.splitlines())
map(lambda t: t.split()[0], neon_simple_env.neon_cli.tenant_list().stdout.splitlines())
)
assert initial_tenants == new_tenants, "should not create new tenants"
neon_simple_env.neon_cli.create_tenant()
neon_simple_env.create_tenant()
def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
@@ -71,11 +71,11 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
"""Tests tenants with and without wal acceptors"""
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
tenant_1, _ = env.create_tenant()
tenant_2, _ = env.create_tenant()
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_1)
env.neon_cli.create_timeline("test_tenants_normal_work", tenant_id=tenant_2)
env.create_timeline("test_tenants_normal_work", tenant_id=tenant_1)
env.create_timeline("test_tenants_normal_work", tenant_id=tenant_2)
endpoint_tenant1 = env.endpoints.create_start(
"test_tenants_normal_work",
@@ -102,11 +102,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "availability_zone='test_ps_az'"
env = neon_env_builder.init_start()
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
tenant_1, _ = env.create_tenant()
tenant_2, _ = env.create_tenant()
timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1)
timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2)
timeline_1 = env.create_timeline("test_metrics_normal_work", tenant_id=tenant_1)
timeline_2 = env.create_timeline("test_metrics_normal_work", tenant_id=tenant_2)
endpoint_tenant1 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_1)
endpoint_tenant2 = env.endpoints.create_start("test_metrics_normal_work", tenant_id=tenant_2)
@@ -250,11 +250,11 @@ def test_pageserver_metrics_removed_after_detach(neon_env_builder: NeonEnvBuilde
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
tenant_1, _ = env.create_tenant()
tenant_2, _ = env.create_tenant()
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1)
env.neon_cli.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2)
env.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_1)
env.create_timeline("test_metrics_removed_after_detach", tenant_id=tenant_2)
endpoint_tenant1 = env.endpoints.create_start(
"test_metrics_removed_after_detach", tenant_id=tenant_1

View File

@@ -66,7 +66,7 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder):
for _ in range(1, 5):
# Use a tiny checkpoint distance, to create a lot of layers quickly
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
"checkpoint_distance": "5000000",
}

View File

@@ -46,10 +46,11 @@ def test_timeline_archive(neon_env_builder: NeonEnvBuilder, shard_count: int):
# construct a pair of branches to validate that pageserver prohibits
# archival of ancestor timelines when they have non-archived child branches
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_archive_parent")
parent_timeline_id = env.create_branch("test_ancestor_branch_archive_parent")
leaf_timeline_id = env.neon_cli.create_branch(
"test_ancestor_branch_archive_branch1", "test_ancestor_branch_archive_parent"
leaf_timeline_id = env.create_branch(
"test_ancestor_branch_archive_branch1",
ancestor_branch_name="test_ancestor_branch_archive_parent",
)
with pytest.raises(

View File

@@ -68,12 +68,12 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
# construct pair of branches to validate that pageserver prohibits
# deletion of ancestor timelines when they have child branches
parent_timeline_id = env.neon_cli.create_branch(
new_branch_name="test_ancestor_branch_delete_parent", ancestor_branch_name="main"
parent_timeline_id = env.create_branch(
"test_ancestor_branch_delete_parent", ancestor_branch_name="main"
)
leaf_timeline_id = env.neon_cli.create_branch(
new_branch_name="test_ancestor_branch_delete_branch1",
leaf_timeline_id = env.create_branch(
"test_ancestor_branch_delete_branch1",
ancestor_branch_name="test_ancestor_branch_delete_parent",
)
@@ -184,7 +184,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
ps_http = env.pageserver.http_client()
timeline_id = env.neon_cli.create_timeline("delete")
timeline_id = env.create_timeline("delete")
with env.endpoints.create_start("delete") as endpoint:
# generate enough layers
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -334,7 +334,7 @@ def test_timeline_resurrection_on_attach(
wait_for_upload(ps_http, tenant_id, main_timeline_id, current_lsn)
log.info("upload of checkpoint is done")
branch_timeline_id = env.neon_cli.create_branch("new", "main")
branch_timeline_id = env.create_branch("new", ancestor_branch_name="main")
# Two variants of this test:
# - In fill_branch=True, the deleted branch has layer files.
@@ -409,13 +409,11 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
ps_http.configure_failpoints(("timeline-delete-before-rm", "return"))
# construct pair of branches
intermediate_timeline_id = env.neon_cli.create_branch(
"test_timeline_delete_fail_before_local_delete"
)
intermediate_timeline_id = env.create_branch("test_timeline_delete_fail_before_local_delete")
leaf_timeline_id = env.neon_cli.create_branch(
leaf_timeline_id = env.create_branch(
"test_timeline_delete_fail_before_local_delete1",
"test_timeline_delete_fail_before_local_delete",
ancestor_branch_name="test_timeline_delete_fail_before_local_delete",
)
leaf_timeline_path = env.pageserver.timeline_dir(env.initial_tenant, leaf_timeline_id)
@@ -514,7 +512,7 @@ def test_concurrent_timeline_delete_stuck_on(
env = neon_env_builder.init_start()
child_timeline_id = env.neon_cli.create_branch("child", "main")
child_timeline_id = env.create_branch("child", ancestor_branch_name="main")
ps_http = env.pageserver.http_client()
@@ -591,7 +589,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
child_timeline_id = env.neon_cli.create_branch("child", "main")
child_timeline_id = env.create_branch("child", ancestor_branch_name="main")
ps_http = env.pageserver.http_client(retries=Retry(0, read=False))
@@ -656,7 +654,7 @@ def test_timeline_delete_works_for_remote_smoke(
timeline_ids = [env.initial_timeline]
for i in range(2):
branch_timeline_id = env.neon_cli.create_branch(f"new{i}", "main")
branch_timeline_id = env.create_branch(f"new{i}", ancestor_branch_name="main")
with env.endpoints.create_start(f"new{i}") as pg, pg.cursor() as cur:
cur.execute("CREATE TABLE f (i integer);")
cur.execute("INSERT INTO f VALUES (generate_series(1,1000));")
@@ -733,7 +731,7 @@ def test_delete_orphaned_objects(
ps_http = env.pageserver.http_client()
timeline_id = env.neon_cli.create_timeline("delete")
timeline_id = env.create_timeline("delete")
with env.endpoints.create_start("delete") as endpoint:
# generate enough layers
run_pg_bench_small(pg_bin, endpoint.connstr())
@@ -791,7 +789,7 @@ def test_timeline_delete_resumed_on_attach(
ps_http = env.pageserver.http_client()
timeline_id = env.neon_cli.create_timeline("delete")
timeline_id = env.create_timeline("delete")
with env.endpoints.create_start("delete") as endpoint:
# generate enough layers
run_pg_bench_small(pg_bin, endpoint.connstr())

View File

@@ -133,9 +133,7 @@ def test_ancestor_detach_branched_from(
name = "new main"
timeline_id = env.neon_cli.create_branch(
name, "main", env.initial_tenant, ancestor_start_lsn=branch_at
)
timeline_id = env.create_branch(name, ancestor_branch_name="main", ancestor_start_lsn=branch_at)
recorded = Lsn(client.timeline_detail(env.initial_tenant, timeline_id)["ancestor_lsn"])
if branch_at is None:
@@ -262,19 +260,19 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
# as this only gets reparented, we don't need to write to it like new main
reparented = env.neon_cli.create_branch(
"reparented", "main", env.initial_tenant, ancestor_start_lsn=branchpoint_pipe
reparented = env.create_branch(
"reparented", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe
)
same_branchpoint = env.neon_cli.create_branch(
"same_branchpoint", "main", env.initial_tenant, ancestor_start_lsn=branchpoint_x
same_branchpoint = env.create_branch(
"same_branchpoint", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_x
)
timeline_id = env.neon_cli.create_branch(
"new main", "main", env.initial_tenant, ancestor_start_lsn=branchpoint_x
timeline_id = env.create_branch(
"new main", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_x
)
after = env.neon_cli.create_branch("after", "main", env.initial_tenant, ancestor_start_lsn=None)
after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None)
all_reparented = client.detach_ancestor(env.initial_tenant, timeline_id)
assert set(all_reparented) == {reparented, same_branchpoint}
@@ -365,8 +363,8 @@ def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEn
branchpoint = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
timeline_id = env.neon_cli.create_branch(
"new main", "main", tenant_id=env.initial_tenant, ancestor_start_lsn=branchpoint
timeline_id = env.create_branch(
"new main", ancestor_branch_name="main", ancestor_start_lsn=branchpoint
)
log.info("starting the new main endpoint")
@@ -479,10 +477,9 @@ def test_compaction_induced_by_detaches_in_history(
for num in more_good_numbers:
branch_name = f"br-{len(branches)}"
branch_timeline_id = env.neon_cli.create_branch(
branch_timeline_id = env.create_branch(
branch_name,
ancestor_branch_name=branches[-1][0],
tenant_id=env.initial_tenant,
ancestor_start_lsn=branch_lsn,
)
branches.append((branch_name, branch_timeline_id))
@@ -599,15 +596,15 @@ def test_timeline_ancestor_detach_idempotent_success(
else:
client = env.pageserver.http_client()
first_branch = env.neon_cli.create_branch("first_branch")
first_branch = env.create_branch("first_branch")
_ = env.neon_cli.create_branch("second_branch", ancestor_branch_name="first_branch")
_ = env.create_branch("second_branch", ancestor_branch_name="first_branch")
# these two will be reparented, and they should be returned in stable order
# from pageservers OR otherwise there will be an `error!` logging from
# storage controller
reparented1 = env.neon_cli.create_branch("first_reparented", ancestor_branch_name="main")
reparented2 = env.neon_cli.create_branch("second_reparented", ancestor_branch_name="main")
reparented1 = env.create_branch("first_reparented", ancestor_branch_name="main")
reparented2 = env.create_branch("second_reparented", ancestor_branch_name="main")
first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch)
assert set(first_reparenting_response) == {reparented1, reparented2}
@@ -658,9 +655,9 @@ def test_timeline_ancestor_detach_errors(neon_env_builder: NeonEnvBuilder, shard
client.detach_ancestor(env.initial_tenant, env.initial_timeline)
assert info.value.status_code == 409
_ = env.neon_cli.create_branch("first_branch")
_ = env.create_branch("first_branch")
second_branch = env.neon_cli.create_branch("second_branch", ancestor_branch_name="first_branch")
second_branch = env.create_branch("second_branch", ancestor_branch_name="first_branch")
# funnily enough this does not have a prefix
with pytest.raises(PageserverApiException, match="too many ancestors") as info:
@@ -697,7 +694,7 @@ def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
utilized_pageservers = {x["node_id"] for x in shards}
assert len(utilized_pageservers) > 1, "all shards got placed on single pageserver?"
branch_timeline_id = env.neon_cli.create_branch(branch_name, tenant_id=env.initial_tenant)
branch_timeline_id = env.create_branch(branch_name)
with env.endpoints.create_start(branch_name, tenant_id=env.initial_tenant) as ep:
ep.safe_psql(
@@ -849,7 +846,7 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
pageservers = dict((int(p.id), p) for p in env.pageservers)
detached_timeline = env.neon_cli.create_branch("detached soon", "main")
detached_timeline = env.create_branch("detached soon", ancestor_branch_name="main")
pausepoint = "timeline-detach-ancestor::before_starting_after_locking-pausable"
@@ -993,7 +990,7 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv
ps.http_client().timeline_checkpoint(shard_id, env.initial_timeline)
def create_reparentable_timeline() -> TimelineId:
return env.neon_cli.create_branch(
return env.create_branch(
"first_branch", ancestor_branch_name="main", ancestor_start_lsn=first_branch_lsn
)
@@ -1002,7 +999,7 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv
else:
first_branch = None
detached_branch = env.neon_cli.create_branch(
detached_branch = env.create_branch(
"detached_branch", ancestor_branch_name="main", ancestor_start_lsn=detached_branch_lsn
)
@@ -1169,7 +1166,7 @@ def test_retryable_500_hit_through_storcon_during_timeline_detach_ancestor(
shards = env.storage_controller.locate(env.initial_tenant)
assert len(set(x["node_id"] for x in shards)) == shard_count
detached_branch = env.neon_cli.create_branch("detached_branch", ancestor_branch_name="main")
detached_branch = env.create_branch("detached_branch", ancestor_branch_name="main")
pausepoint = "timeline-detach-ancestor::before_starting_after_locking-pausable"
failpoint = "timeline-detach-ancestor::before_starting_after_locking"
@@ -1294,8 +1291,8 @@ def test_retried_detach_ancestor_after_failed_reparenting(neon_env_builder: Neon
)
branch_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
http.timeline_checkpoint(env.initial_tenant, env.initial_timeline)
branch = env.neon_cli.create_branch(
f"branch_{counter}", "main", ancestor_start_lsn=branch_lsn
branch = env.create_branch(
f"branch_{counter}", ancestor_branch_name="main", ancestor_start_lsn=branch_lsn
)
timelines.append(branch)
@@ -1432,7 +1429,7 @@ def test_timeline_is_deleted_before_timeline_detach_ancestor_completes(
http = env.pageserver.http_client()
detached = env.neon_cli.create_branch("detached")
detached = env.create_branch("detached")
failpoint = "timeline-detach-ancestor::after_activating_before_finding-pausable"

View File

@@ -28,7 +28,7 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
pss = ManyPageservers(list(map(lambda ps: ScrollableLog(ps, None), env.pageservers)))
foo_branch = env.neon_cli.create_branch("foo", "main", env.initial_tenant)
foo_branch = env.create_branch("foo", ancestor_branch_name="main", tenant_id=env.initial_tenant)
gc_active_line = ".* gc_loop.*: [12] timelines need GC"
gc_skipped_line = ".* gc_loop.*: Skipping GC: .*"

View File

@@ -36,7 +36,7 @@ from fixtures.utils import get_timeline_dir_size, wait_until
def test_timeline_size(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "main")
new_timeline_id = env.create_branch("test_timeline_size", ancestor_branch_name="main")
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -68,7 +68,9 @@ def test_timeline_size(neon_simple_env: NeonEnv):
def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "main")
new_timeline_id = env.create_branch(
"test_timeline_size_createdropdb", ancestor_branch_name="main"
)
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -148,7 +150,7 @@ def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, tim
def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota_on_startup")
new_timeline_id = env.create_branch("test_timeline_size_quota_on_startup")
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -236,7 +238,7 @@ def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota")
new_timeline_id = env.create_branch("test_timeline_size_quota")
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -373,7 +375,7 @@ def test_timeline_physical_size_init(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init")
new_timeline_id = env.create_branch("test_timeline_physical_size_init")
endpoint = env.endpoints.create_start("test_timeline_physical_size_init")
endpoint.safe_psql_many(
@@ -410,7 +412,7 @@ def test_timeline_physical_size_post_checkpoint(neon_env_builder: NeonEnvBuilder
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint")
new_timeline_id = env.create_branch("test_timeline_physical_size_post_checkpoint")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint")
endpoint.safe_psql_many(
@@ -446,7 +448,7 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder
)
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction")
new_timeline_id = env.create_branch("test_timeline_physical_size_post_compaction")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction")
# We don't want autovacuum to run on the table, while we are calculating the
@@ -496,7 +498,7 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
)
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc")
new_timeline_id = env.create_branch("test_timeline_physical_size_post_gc")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc")
# Like in test_timeline_physical_size_post_compaction, disable autovacuum
@@ -543,7 +545,7 @@ def test_timeline_size_metrics(
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics")
new_timeline_id = env.create_branch("test_timeline_size_metrics")
endpoint = env.endpoints.create_start("test_timeline_size_metrics")
endpoint.safe_psql_many(
@@ -620,7 +622,7 @@ def test_tenant_physical_size(neon_env_builder: NeonEnvBuilder):
pageserver_http = env.pageserver.http_client()
client = env.pageserver.http_client()
tenant, timeline = env.neon_cli.create_tenant()
tenant, timeline = env.create_tenant()
def get_timeline_resident_physical_size(timeline: TimelineId):
sizes = get_physical_size_values(env, tenant, timeline)
@@ -631,7 +633,7 @@ def test_tenant_physical_size(neon_env_builder: NeonEnvBuilder):
for i in range(10):
n_rows = random.randint(100, 1000)
timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
timeline = env.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
endpoint.safe_psql_many(
@@ -743,7 +745,7 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
tenant_ids = {env.initial_tenant}
for _i in range(0, n_tenants - 1):
tenant_id = TenantId.generate()
env.neon_cli.create_tenant(tenant_id)
env.create_tenant(tenant_id)
tenant_ids.add(tenant_id)
# Restart pageserver with logical size calculations paused
@@ -990,8 +992,8 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
# the supporting_second does nothing except queue behind env.initial_tenant
# for purposes of showing that eager_tenant breezes past the queue
supporting_second, _ = env.neon_cli.create_tenant()
eager_tenant, _ = env.neon_cli.create_tenant()
supporting_second, _ = env.create_tenant()
eager_tenant, _ = env.create_tenant()
client = env.pageserver.http_client()
client.tenant_location_conf(
@@ -1067,7 +1069,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met
env = neon_env_builder.init_start()
# because this returns (also elsewhere in this file), we know that SpawnMode::Create skips the queue
lazy_tenant, _ = env.neon_cli.create_tenant()
lazy_tenant, _ = env.create_tenant()
client = env.pageserver.http_client()
client.tenant_location_conf(
@@ -1131,7 +1133,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met
# starting up the endpoint should make it jump the queue
wait_until(10, 1, lazy_tenant_is_active)
elif activation_method == "branch":
env.neon_cli.create_timeline("second_branch", lazy_tenant)
env.create_timeline("second_branch", lazy_tenant)
wait_until(10, 1, lazy_tenant_is_active)
elif activation_method == "delete":
delete_lazy_activating(lazy_tenant, env.pageserver, expect_attaching=True)

View File

@@ -13,7 +13,7 @@ def test_truncate(neon_env_builder: NeonEnvBuilder, zenbenchmark):
# Problems with FSM/VM forks truncation are most frequently detected during page reconstruction triggered
# by image layer generation. So adjust default parameters to make it happen more frequently.
tenant, _ = env.neon_cli.create_tenant(
tenant, _ = env.create_tenant(
conf={
# disable automatic GC
"gc_period": "0s",

View File

@@ -96,7 +96,7 @@ def test_twophase(neon_simple_env: NeonEnv):
Test branching, when a transaction is in prepared state
"""
env = neon_simple_env
env.neon_cli.create_branch("test_twophase")
env.create_branch("test_twophase")
twophase_test_on_timeline(env)
@@ -147,7 +147,7 @@ def test_twophase_at_wal_segment_start(neon_simple_env: NeonEnv):
very first page of a WAL segment and the server was started up at that first page.
"""
env = neon_simple_env
timeline_id = env.neon_cli.create_branch("test_twophase", "main")
timeline_id = env.create_branch("test_twophase", ancestor_branch_name="main")
endpoint = env.endpoints.create_start(
"test_twophase", config_lines=["max_prepared_transactions=5"]

View File

@@ -247,7 +247,7 @@ def test_vm_bit_clear_on_heap_lock_blackbox(neon_env_builder: NeonEnvBuilder):
# in a "clean" way. Our neon extension will write a full-page image of the VM
# page, and we want to avoid that. A clean shutdown will also not do, for the
# same reason.
endpoint.stop(mode="immediate")
endpoint.stop(mode="immediate", sks_wait_walreceiver_gone=(env.safekeepers, timeline_id))
endpoint.start()
pg_conn = endpoint.connect()

View File

@@ -47,7 +47,7 @@ from fixtures.remote_storage import (
s3_storage,
)
from fixtures.safekeeper.http import SafekeeperHttpClient
from fixtures.safekeeper.utils import are_walreceivers_absent
from fixtures.safekeeper.utils import wait_walreceivers_absent
from fixtures.utils import (
PropagatingThread,
get_dir_size,
@@ -146,7 +146,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
# start postgres on each timeline
endpoints = []
for branch_name in branch_names:
new_timeline_id = env.neon_cli.create_branch(branch_name)
new_timeline_id = env.create_branch(branch_name)
endpoints.append(env.endpoints.create_start(branch_name))
branch_names_to_timeline_ids[branch_name] = new_timeline_id
@@ -284,7 +284,7 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = n_acceptors
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_restarts")
env.create_branch("test_safekeepers_restarts")
endpoint = env.endpoints.create_start("test_safekeepers_restarts")
# we rely upon autocommit after each statement
@@ -314,7 +314,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_broker", "main")
timeline_id = env.create_branch("test_broker", ancestor_branch_name="main")
endpoint = env.endpoints.create_start("test_broker")
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
@@ -374,7 +374,7 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_safekeepers_wal_removal")
timeline_id = env.create_branch("test_safekeepers_wal_removal")
endpoint = env.endpoints.create_start("test_safekeepers_wal_removal")
# Note: it is important to insert at least two segments, as currently
@@ -504,7 +504,7 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
)
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_safekeepers_wal_backup")
timeline_id = env.create_branch("test_safekeepers_wal_backup")
endpoint = env.endpoints.create_start("test_safekeepers_wal_backup")
pg_conn = endpoint.connect()
@@ -561,7 +561,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_s3_wal_replay")
timeline_id = env.create_branch("test_s3_wal_replay")
endpoint = env.endpoints.create_start("test_s3_wal_replay")
@@ -849,7 +849,7 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_timeline_status")
timeline_id = env.create_branch("test_timeline_status")
endpoint = env.endpoints.create_start("test_timeline_status")
wa = env.safekeepers[0]
@@ -894,6 +894,13 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id)
assert debug_dump_0["timelines"][0]["wal_last_modified"] != ""
# debug dump non existing tenant, should return no timelines.
debug_dump_non_existent = wa_http_cli_debug.debug_dump(
{"tenant_id": "deadbeefdeadbeefdeadbeefdeadbeef"}
)
log.info(f"debug_dump_non_existend: {debug_dump_non_existent}")
assert len(debug_dump_non_existent["timelines"]) == 0
endpoint.safe_psql("create table t(i int)")
# ensure epoch goes up after reboot
@@ -941,7 +948,7 @@ def test_start_replication_term(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_start_replication_term")
timeline_id = env.create_branch("test_start_replication_term")
endpoint = env.endpoints.create_start("test_start_replication_term")
endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
@@ -973,7 +980,7 @@ def test_sk_auth(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_sk_auth")
timeline_id = env.create_branch("test_sk_auth")
env.endpoints.create_start("test_sk_auth")
sk = env.safekeepers[0]
@@ -1034,7 +1041,7 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_sk_auth_restart_endpoint")
env.create_branch("test_sk_auth_restart_endpoint")
endpoint = env.endpoints.create_start("test_sk_auth_restart_endpoint")
with closing(endpoint.connect()) as conn:
@@ -1061,6 +1068,7 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder):
# https://github.com/neondatabase/neon/issues/8911
def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
timeline_id = env.initial_timeline
endpoint = env.endpoints.create_start("main")
@@ -1070,7 +1078,7 @@ def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder):
# we want immediate shutdown to have endpoint restart on xlog switch record,
# so prevent shutdown checkpoint.
endpoint.stop(mode="immediate")
endpoint.stop(mode="immediate", sks_wait_walreceiver_gone=(env.safekeepers, timeline_id))
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("SELECT 'works'")
@@ -1117,7 +1125,7 @@ def test_late_init(neon_env_builder: NeonEnvBuilder):
sk1.stop()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_late_init")
timeline_id = env.create_branch("test_late_init")
endpoint = env.endpoints.create_start("test_late_init")
# create and insert smth while safekeeper is down...
endpoint.safe_psql("create table t(key int, value text)")
@@ -1222,10 +1230,7 @@ def wait_flush_lsn_align_by_ep(env, branch, tenant_id, timeline_id, ep, sks):
# Even if there is no compute, there might be some in flight data; ensure
# all walreceivers die before rechecking.
for sk_http_cli in sk_http_clis:
wait(
partial(are_walreceivers_absent, sk_http_cli, tenant_id, timeline_id),
"walreceivers to be gone",
)
wait_walreceivers_absent(sk_http_cli, tenant_id, timeline_id)
# Now recheck again flush_lsn and exit if it is good
if is_flush_lsn_aligned(sk_http_clis, tenant_id, timeline_id):
return
@@ -1256,7 +1261,7 @@ def test_lagging_sk(neon_env_builder: NeonEnvBuilder):
# create and insert smth while safekeeper is down...
sk1.stop()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_lagging_sk")
timeline_id = env.create_branch("test_lagging_sk")
ep = env.endpoints.create_start("test_lagging_sk")
ep.safe_psql("create table t(key int, value text)")
# make small insert to be on the same segment
@@ -1343,7 +1348,7 @@ def test_peer_recovery(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_peer_recovery")
timeline_id = env.create_branch("test_peer_recovery")
endpoint = env.endpoints.create_start("test_peer_recovery")
endpoint.safe_psql("create table t(key int, value text)")
@@ -1407,7 +1412,7 @@ def test_wp_graceful_shutdown(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_wp_graceful_shutdown")
timeline_id = env.create_branch("test_wp_graceful_shutdown")
ep = env.endpoints.create_start("test_wp_graceful_shutdown")
ep.safe_psql("create table t(key int, value text)")
ep.stop()
@@ -1600,7 +1605,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 4
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_replace_safekeeper")
timeline_id = env.create_branch("test_replace_safekeeper")
log.info("Use only first 3 safekeepers")
env.safekeepers[3].stop()
@@ -1667,12 +1672,12 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# Create two tenants: one will be deleted, other should be preserved.
tenant_id = env.initial_tenant
timeline_id_1 = env.neon_cli.create_branch("br1") # Active, delete explicitly
timeline_id_2 = env.neon_cli.create_branch("br2") # Inactive, delete explicitly
timeline_id_3 = env.neon_cli.create_branch("br3") # Active, delete with the tenant
timeline_id_4 = env.neon_cli.create_branch("br4") # Inactive, delete with the tenant
timeline_id_1 = env.create_branch("br1") # Active, delete explicitly
timeline_id_2 = env.create_branch("br2") # Inactive, delete explicitly
timeline_id_3 = env.create_branch("br3") # Active, delete with the tenant
timeline_id_4 = env.create_branch("br4") # Inactive, delete with the tenant
tenant_id_other, timeline_id_other = env.neon_cli.create_tenant()
tenant_id_other, timeline_id_other = env.create_tenant()
# Populate branches
endpoint_1 = env.endpoints.create_start("br1")
@@ -2004,7 +2009,7 @@ def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
timeline_id = env.neon_cli.create_branch("test_idle_reconnections")
timeline_id = env.create_branch("test_idle_reconnections")
def collect_stats() -> Dict[str, float]:
# we need to collect safekeeper_pg_queries_received_total metric from all safekeepers
@@ -2239,7 +2244,7 @@ def test_broker_discovery(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_broker_discovery")
env.create_branch("test_broker_discovery")
endpoint = env.endpoints.create_start(
"test_broker_discovery",
@@ -2320,7 +2325,7 @@ def test_s3_eviction(
# start postgres on each timeline
endpoints: list[Endpoint] = []
for branch_name in branch_names:
timeline_id = env.neon_cli.create_branch(branch_name)
timeline_id = env.create_branch(branch_name)
timelines.append(timeline_id)
endpoints.append(env.endpoints.create_start(branch_name))

View File

@@ -218,7 +218,7 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_restarts_under_load")
env.create_branch("test_safekeepers_restarts_under_load")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
endpoint = env.endpoints.create_start(
"test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"]
@@ -234,7 +234,7 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_restarts_frequent_checkpoints")
env.create_branch("test_restarts_frequent_checkpoints")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
endpoint = env.endpoints.create_start(
"test_restarts_frequent_checkpoints",
@@ -325,7 +325,7 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_compute_restarts")
env.create_branch("test_compute_restarts")
asyncio.run(run_compute_restarts(env))
@@ -435,7 +435,7 @@ def test_concurrent_computes(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_concurrent_computes")
env.create_branch("test_concurrent_computes")
asyncio.run(run_concurrent_computes(env))
@@ -484,7 +484,7 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 2
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_unavailability")
env.create_branch("test_safekeepers_unavailability")
endpoint = env.endpoints.create_start("test_safekeepers_unavailability")
asyncio.run(run_unavailability(env, endpoint))
@@ -493,7 +493,7 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder):
async def run_recovery_uncommitted(env: NeonEnv):
(sk1, sk2, _) = env.safekeepers
env.neon_cli.create_branch("test_recovery_uncommitted")
env.create_branch("test_recovery_uncommitted")
ep = env.endpoints.create_start("test_recovery_uncommitted")
ep.safe_psql("create table t(key int, value text)")
ep.safe_psql("insert into t select generate_series(1, 100), 'payload'")
@@ -589,7 +589,7 @@ def test_wal_truncation(neon_env_builder: NeonEnvBuilder):
async def run_segment_init_failure(env: NeonEnv):
env.neon_cli.create_branch("test_segment_init_failure")
env.create_branch("test_segment_init_failure")
ep = env.endpoints.create_start("test_segment_init_failure")
ep.safe_psql("create table t(key int, value text)")
ep.safe_psql("insert into t select generate_series(1, 100), 'payload'")
@@ -684,7 +684,7 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_safekeepers_race_conditions")
env.create_branch("test_safekeepers_race_conditions")
endpoint = env.endpoints.create_start("test_safekeepers_race_conditions")
asyncio.run(run_race_conditions(env, endpoint))
@@ -761,7 +761,7 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, bu
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_lagging")
env.create_branch("test_wal_lagging")
endpoint = env.endpoints.create_start("test_wal_lagging")
asyncio.run(run_wal_lagging(env, endpoint, test_output_dir))

View File

@@ -14,7 +14,7 @@ def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"
env.pageserver.allowed_errors.append(f".*{expected_timeout_error}.*")
@@ -57,7 +57,7 @@ def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuil
env = neon_env_builder.init_start()
env.pageserver.http_client()
tenant_id, timeline_id = env.neon_cli.create_tenant()
tenant_id, timeline_id = env.create_tenant()
elements_to_insert = 1_000_000
expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"

View File

@@ -38,7 +38,7 @@ def test_wal_restore(
pg_distrib_dir: Path,
):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_restore")
env.create_branch("test_wal_restore")
endpoint = env.endpoints.create_start("test_wal_restore")
endpoint.safe_psql("create table t as select generate_series(1,300000)")
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])

View File

@@ -40,7 +40,7 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
pageserver_http.tenant_status(tenant_id)
# create new nenant
tenant_id, _ = env.neon_cli.create_tenant()
tenant_id, _ = env.create_tenant()
# assert tenant exists on disk
assert (env.pageserver.tenant_dir(tenant_id)).exists()