mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
Decrease the number of pageserver threads by running gc and compaction in a blocking tokio thread pool
71 lines
2.7 KiB
Python
71 lines
2.7 KiB
Python
from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
|
|
from uuid import UUID
|
|
import time
|
|
|
|
|
|
def get_only_element(l):
|
|
assert len(l) == 1
|
|
return l[0]
|
|
|
|
|
|
# Test that gc and compaction tenant tasks start and stop correctly
|
|
def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
|
# The gc and compaction loops don't bother to watch for tenant state
|
|
# changes while sleeping, so we use small periods to make this test
|
|
# run faster. With default settings we'd have to wait longer for tasks
|
|
# to notice state changes and shut down.
|
|
# TODO fix this behavior in the pageserver
|
|
tenant_config = "{gc_period = '1 s', compaction_period = '1 s'}"
|
|
neon_env_builder.pageserver_config_override = f"tenant_config={tenant_config}"
|
|
name = "test_tenant_tasks"
|
|
env = neon_env_builder.init_start()
|
|
client = env.pageserver.http_client()
|
|
|
|
def get_state(tenant):
|
|
all_states = client.tenant_list()
|
|
matching = [t for t in all_states if t["id"] == tenant.hex]
|
|
return get_only_element(matching)["state"]
|
|
|
|
def get_metric_value(name):
|
|
metrics = client.get_metrics()
|
|
relevant = [line for line in metrics.splitlines() if line.startswith(name)]
|
|
if len(relevant) == 0:
|
|
return 0
|
|
line = get_only_element(relevant)
|
|
value = line.lstrip(name).strip()
|
|
return int(value)
|
|
|
|
def detach_all_timelines(tenant):
|
|
timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
|
|
for t in timelines:
|
|
client.timeline_detach(tenant, t)
|
|
|
|
def assert_idle(tenant):
|
|
assert get_state(tenant) == "Idle"
|
|
|
|
# Create tenant, start compute
|
|
tenant, _ = env.neon_cli.create_tenant()
|
|
timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
|
|
pg = env.postgres.create_start(name, tenant_id=tenant)
|
|
assert (get_state(tenant) == "Active")
|
|
|
|
# Stop compute
|
|
pg.stop()
|
|
|
|
# Detach all tenants and wait for them to go idle
|
|
# TODO they should be already idle since there are no active computes
|
|
for tenant_info in client.tenant_list():
|
|
tenant_id = UUID(tenant_info["id"])
|
|
detach_all_timelines(tenant_id)
|
|
wait_until(10, 0.2, lambda: assert_idle(tenant_id))
|
|
|
|
# Assert that all tasks finish quickly after tenants go idle
|
|
def assert_tasks_finish():
|
|
tasks_started = get_metric_value('pageserver_tenant_task_events{event="start"}')
|
|
tasks_ended = get_metric_value('pageserver_tenant_task_events{event="stop"}')
|
|
tasks_panicked = get_metric_value('pageserver_tenant_task_events{event="panic"}')
|
|
assert tasks_started == tasks_ended
|
|
assert tasks_panicked == 0
|
|
|
|
wait_until(10, 0.2, assert_tasks_finish)
|