Files
neon/test_runner/regress/test_tenant_tasks.py
Heikki Linnakangas 53f438a8a8 Rename "Postgres nodes" in control_plane to endpoints.
We use the term "endpoint" in for compute Postgres nodes in the web UI
and user-facing documentation now. Adjust the nomenclature in the code.

This changes the name of the "neon_local pg" command to "neon_local
endpoint". Also adjust names of classes, variables etc. in the python
tests accordingly.

This also changes the directory structure so that endpoints are now
stored in:

    .neon/endpoints/<endpoint id>

instead of:

    .neon/pgdatadirs/tenants/<tenant_id>/<endpoint (node) name>

The tenant ID is no longer part of the path. That means that you
cannot have two endpoints with the same name/ID in two different
tenants anymore. That's consistent with how we treat endpoints in the
real control plane and proxy: the endpoint ID must be globally unique.
2023-04-13 14:34:29 +03:00

75 lines
2.8 KiB
Python

from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.pageserver.utils import assert_tenant_state, wait_until_tenant_active
from fixtures.types import TenantId, TimelineId
from fixtures.utils import wait_until
def get_only_element(l): # noqa: E741
assert len(l) == 1
return l[0]
# Test that gc and compaction tenant tasks start and stop correctly
def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
name = "test_tenant_tasks"
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
def get_state(tenant):
all_states = client.tenant_list()
matching = [t for t in all_states if TenantId(t["id"]) == tenant]
return get_only_element(matching)["state"]
def delete_all_timelines(tenant: TenantId):
timelines = [TimelineId(t["timeline_id"]) for t in client.timeline_list(tenant)]
for t in timelines:
client.timeline_delete(tenant, t)
# Create tenant, start compute
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(name, tenant_id=tenant)
endpoint = env.endpoints.create_start(name, tenant_id=tenant)
assert_tenant_state(
client,
tenant,
expected_state="Active",
message="Pageserver should activate a tenant and start background jobs if timelines are loaded",
)
# Stop compute
endpoint.stop()
# Delete all timelines on all tenants.
#
# FIXME: we used to check that the background jobs are stopped when all timelines
# are removed, but we don't stop them anymore. Not sure if this test still makes sense
# or we should just remove it.
for tenant_info in client.tenant_list():
tenant_id = TenantId(tenant_info["id"])
delete_all_timelines(tenant_id)
wait_until_tenant_active(client, tenant_id, iterations=10, period=0.2)
# Assert that all tasks finish quickly after tenant is detached
task_starts = client.get_metric_value("pageserver_tenant_task_events_total", {"event": "start"})
assert task_starts is not None
assert int(task_starts) > 0
client.tenant_detach(tenant)
client.tenant_detach(env.initial_tenant)
def assert_tasks_finish():
tasks_started = client.get_metric_value(
"pageserver_tenant_task_events_total", {"event": "start"}
)
tasks_ended = client.get_metric_value(
"pageserver_tenant_task_events_total", {"event": "stop"}
)
tasks_panicked = client.get_metric_value(
"pageserver_tenant_task_events_total", {"event": "panic"}
)
log.info(f"started {tasks_started}, ended {tasks_ended}, panicked {tasks_panicked}")
assert tasks_started == tasks_ended
assert tasks_panicked is None or int(tasks_panicked) == 0
wait_until(10, 0.2, assert_tasks_finish)