Files
neon/test_runner/regress/test_timeline_size.py
Alexander Bayandin 30a7dd630c ruff: enable TC — flake8-type-checking (#11368)
## Problem

`TYPE_CHECKING` is used inconsistently across Python tests.

## Summary of changes
- Update `ruff`: 0.7.0 -> 0.11.2
- Enable TC (flake8-type-checking):
https://docs.astral.sh/ruff/rules/#flake8-type-checking-tc
- (auto)fix all new issues
2025-03-30 18:58:33 +00:00

1150 lines
43 KiB
Python

from __future__ import annotations
import concurrent.futures
import math
import random
import time
from collections import defaultdict
from contextlib import closing
from typing import TYPE_CHECKING
import psycopg2.errors
import psycopg2.extras
import pytest
from fixtures.common_types import TenantId, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
NeonPageserver,
PgBin,
VanillaPostgres,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.utils import (
assert_tenant_state,
timeline_delete_wait_completed,
wait_for_upload_queue_empty,
wait_until_tenant_active,
)
from fixtures.remote_storage import RemoteStorageKind
from fixtures.utils import get_timeline_dir_size, wait_until
if TYPE_CHECKING:
from pathlib import Path
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
def test_timeline_size(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.create_branch("test_timeline_size", ancestor_branch_name="main")
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
endpoint_main = env.endpoints.create_start("test_timeline_size")
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10) g
"""
)
res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
cur.execute("TRUNCATE foo")
res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.create_branch(
"test_timeline_size_createdropdb", ancestor_branch_name="main"
)
client = env.pageserver.http_client()
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
timeline_details = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
endpoint_main = env.endpoints.create_start("test_timeline_size_createdropdb")
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
assert (
timeline_details["current_logical_size_non_incremental"]
== res["current_logical_size_non_incremental"]
), "no writes should not change the incremental logical size"
cur.execute("CREATE DATABASE foodb")
with closing(endpoint_main.connect(dbname="foodb")) as conn:
with conn.cursor() as cur2:
cur2.execute("CREATE TABLE foo (t text)")
cur2.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10) g
"""
)
res = client.timeline_detail(
env.initial_tenant,
new_timeline_id,
include_non_incremental_logical_size=True,
)
assert (
res["current_logical_size"] == res["current_logical_size_non_incremental"]
)
cur.execute("DROP DATABASE foodb")
res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
# wait until received_lsn_lag is 0
def wait_for_pageserver_catchup(endpoint_main: Endpoint, polling_interval=1, timeout=60):
started_at = time.time()
received_lsn_lag = 1
while received_lsn_lag > 0:
elapsed = time.time() - started_at
if elapsed > timeout:
raise RuntimeError(
"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()"
)
res = endpoint_main.safe_psql(
"""
SELECT
pg_size_pretty(neon.pg_cluster_size()),
pg_wal_lsn_diff(pg_current_wal_flush_lsn(), received_lsn) as received_lsn_lag
FROM neon.backpressure_lsns();
""",
dbname="postgres",
)[0]
log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}")
received_lsn_lag = res[1]
time.sleep(polling_interval)
def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_size_quota_on_startup")
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
size_limit_mb = 30
endpoint_main = env.endpoints.create(
"test_timeline_size_quota_on_startup",
# Set small limit for the test
config_lines=[f"neon.max_cluster_size={size_limit_mb}MB"],
)
endpoint_main.start()
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
# Insert many rows. This query must fail because of space limit
try:
def write_rows(count):
for _i in range(count):
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100) g
"""
)
# Write some data that exceeds limit, then let the pageserver ingest it to guarantee that some feedback has made it to
# the safekeeper, then try to write some more. We expect either the initial writes or the ones after
# the wait_for_last_flush_lsn to generate an exception.
#
# Without the wait_for_last_flush_lsn, the size limit sometimes isn't enforced (see https://github.com/neondatabase/neon/issues/6562)
write_rows(2500)
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
logical_size = env.pageserver.http_client().timeline_detail(
env.initial_tenant, new_timeline_id
)["current_logical_size"]
assert logical_size > size_limit_mb * 1024 * 1024
write_rows(2500)
# If we get here, the timeline size limit failed. Find out from the pageserver how large it
# thinks the timeline is.
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
logical_size = env.pageserver.http_client().timeline_detail(
env.initial_tenant, new_timeline_id
)["current_logical_size"]
log.error(
f"Query unexpectedly succeeded, pageserver logical size is {logical_size}"
)
raise AssertionError()
except psycopg2.errors.DiskFull as err:
log.info(f"Query expectedly failed with: {err}")
# Restart endpoint that reached the limit to ensure that it doesn't fail on startup
# i.e. the size limit is not enforced during startup.
endpoint_main.stop()
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
# which is needed for neon.pg_cluster_size() to work
endpoint_main.respec(skip_pg_catalog_updates=False)
endpoint_main.start()
# ensure that the limit is enforced after startup
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
# This query must fail because of space limit
try:
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
"""
)
# If we get here, the timeline size limit failed
log.error("Query unexpectedly succeeded")
raise AssertionError()
except psycopg2.errors.DiskFull as err:
log.info(f"Query expectedly failed with: {err}")
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_size_quota")
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
endpoint_main = env.endpoints.create(
"test_timeline_size_quota",
# Set small limit for the test
config_lines=["neon.max_cluster_size=30MB"],
)
# don't skip pg_catalog updates - it runs CREATE EXTENSION neon
# which is needed for pg_cluster_size() to work
endpoint_main.respec(skip_pg_catalog_updates=False)
endpoint_main.start()
with closing(endpoint_main.connect()) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
wait_for_pageserver_catchup(endpoint_main)
# Insert many rows. This query must fail because of space limit
try:
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
"""
)
wait_for_pageserver_catchup(endpoint_main)
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 500000) g
"""
)
# If we get here, the timeline size limit failed
log.error("Query unexpectedly succeeded")
raise AssertionError()
except psycopg2.errors.DiskFull as err:
log.info(f"Query expectedly failed with: {err}")
# drop table to free space
cur.execute("DROP TABLE foo")
wait_for_pageserver_catchup(endpoint_main)
# create it again and insert some rows. This query must succeed
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10000) g
"""
)
wait_for_pageserver_catchup(endpoint_main)
cur.execute("SELECT * from pg_size_pretty(neon.pg_cluster_size())")
pg_cluster_size = cur.fetchone()
log.info(f"pg_cluster_size = {pg_cluster_size}")
new_res = client.timeline_detail(
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
)
assert new_res["current_logical_size"] == new_res["current_logical_size_non_incremental"], (
"after the WAL is streamed, current_logical_size is expected to be calculated and to be equal its non-incremental value"
)
@pytest.mark.parametrize("deletion_method", ["tenant_detach", "timeline_delete"])
def test_timeline_initial_logical_size_calculation_cancellation(
neon_env_builder: NeonEnvBuilder, deletion_method: str
):
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# load in some data
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (x INTEGER)",
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
]
)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
endpoint.stop()
# restart with failpoint inside initial size calculation task
env.pageserver.stop()
env.pageserver.start(
extra_env_vars={"FAILPOINTS": "timeline-calculate-logical-size-pause=pause"}
)
wait_until_tenant_active(client, tenant_id)
# kick off initial size calculation task (the response we get here is the estimated size)
def assert_size_calculation_not_done():
details = client.timeline_detail(
tenant_id, timeline_id, include_non_incremental_logical_size=True
)
assert details["current_logical_size"] != details["current_logical_size_non_incremental"]
assert_size_calculation_not_done()
# ensure we're really stuck
time.sleep(5)
assert_size_calculation_not_done()
log.info(
f"delete the timeline using {deletion_method}, this should cancel size computation tasks and wait for them to finish"
)
if deletion_method == "tenant_detach":
client.tenant_detach(tenant_id)
elif deletion_method == "timeline_delete":
timeline_delete_wait_completed(client, tenant_id, timeline_id)
else:
raise RuntimeError(deletion_method)
# timeline-calculate-logical-size-pause is still paused, but it doesn't
# matter because it's a pausable_failpoint, which can be cancelled by drop.
def test_timeline_physical_size_init(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
new_timeline_id = env.create_branch("test_timeline_physical_size_init")
endpoint = env.endpoints.create_start("test_timeline_physical_size_init")
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 1000) g""",
]
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
# restart the pageserer to force calculating timeline's initial physical size
env.pageserver.stop()
env.pageserver.start()
# Wait for the tenant to be loaded
client = env.pageserver.http_client()
wait_until(lambda: assert_tenant_state(client, env.initial_tenant, "Active"))
assert_physical_size_invariants(
get_physical_size_values(env, env.initial_tenant, new_timeline_id),
)
def test_timeline_physical_size_post_checkpoint(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_physical_size_post_checkpoint")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_checkpoint")
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 1000) g""",
]
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
def check():
assert_physical_size_invariants(
get_physical_size_values(env, env.initial_tenant, new_timeline_id),
)
wait_until(check)
def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
# Disable background compaction as we don't want it to happen after `get_physical_size` request
# and before checking the expected size on disk, which makes the assertion failed
env = neon_env_builder.init_start(
initial_tenant_conf={
"checkpoint_distance": "100000",
"compaction_period": "0s",
}
)
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_physical_size_post_compaction")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_compaction")
# We don't want autovacuum to run on the table, while we are calculating the
# physical size, because that could cause a new layer to be created and a
# mismatch between the incremental and non-incremental size. (If that still
# happens, because of some other background activity or autovacuum on other
# tables, we could simply retry the size calculations. It's unlikely that
# that would happen more than once.)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
]
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
# shutdown safekeepers to prevent new data from coming in
endpoint.stop() # We can't gracefully stop after safekeepers die
for sk in env.safekeepers:
sk.stop()
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
pageserver_http.timeline_compact(env.initial_tenant, new_timeline_id)
wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, new_timeline_id)
assert_physical_size_invariants(
get_physical_size_values(env, env.initial_tenant, new_timeline_id),
)
def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
# Disable background compaction and GC as we don't want it to happen after `get_physical_size` request
# and before checking the expected size on disk, which makes the assertion failed
env = neon_env_builder.init_start(
initial_tenant_conf={
"checkpoint_distance": "100000",
"compaction_period": "0s",
"gc_period": "0s",
"pitr_interval": "1s",
}
)
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_physical_size_post_gc")
endpoint = env.endpoints.create_start("test_timeline_physical_size_post_gc")
# Like in test_timeline_physical_size_post_compaction, disable autovacuum
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
]
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
endpoint.safe_psql(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
"""
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None)
wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, new_timeline_id)
assert_physical_size_invariants(
get_physical_size_values(env, env.initial_tenant, new_timeline_id),
)
# The timeline logical and physical sizes are also exposed as prometheus metrics.
# Test the metrics.
def test_timeline_size_metrics(
neon_simple_env: NeonEnv,
test_output_dir: Path,
port_distributor: PortDistributor,
pg_distrib_dir: Path,
pg_version: PgVersion,
):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
new_timeline_id = env.create_branch("test_timeline_size_metrics")
endpoint = env.endpoints.create_start("test_timeline_size_metrics")
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
]
)
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_timeline_id)
pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id)
# get the metrics and parse the metric for the current timeline's physical size
metrics = env.pageserver.http_client().get_metrics()
tl_physical_size_metric = metrics.query_one(
name="pageserver_resident_physical_size",
filter={
"tenant_id": str(env.initial_tenant),
"timeline_id": str(new_timeline_id),
},
).value
# assert that the physical size metric matches the actual physical size on disk
timeline_path = env.pageserver.timeline_dir(env.initial_tenant, new_timeline_id)
assert tl_physical_size_metric == get_timeline_dir_size(timeline_path)
# Check that the logical size metric is sane, and matches
tl_logical_size_metric = metrics.query_one(
name="pageserver_current_logical_size",
filter={
"tenant_id": str(env.initial_tenant),
"timeline_id": str(new_timeline_id),
},
).value
pgdatadir = test_output_dir / "pgdata-vanilla"
pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
port = port_distributor.get_port()
with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
vanilla_pg.start()
# Create database based on template0 because we can't connect to template0
vanilla_pg.safe_psql("CREATE TABLE foo (t text)")
vanilla_pg.safe_psql(
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g"""
)
vanilla_size_sum = vanilla_pg.safe_psql(
"select sum(pg_database_size(oid)) from pg_database"
)[0][0]
# Compare the size with Vanilla postgres.
# Allow some slack, because the logical size metric includes some things like
# the SLRUs that are not included in pg_database_size().
assert math.isclose(tl_logical_size_metric, vanilla_size_sum, abs_tol=2 * 1024 * 1024)
# The sum of the sizes of all databases, as seen by pg_database_size(), should also
# be close. Again allow some slack, the logical size metric includes some things like
# the SLRUs that are not included in pg_database_size().
dbsize_sum = endpoint.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0]
assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024)
def test_tenant_physical_size(neon_env_builder: NeonEnvBuilder):
random.seed(100)
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
client = env.pageserver.http_client()
tenant, timeline = env.create_tenant()
def get_timeline_resident_physical_size(timeline: TimelineId):
sizes = get_physical_size_values(env, tenant, timeline)
assert_physical_size_invariants(sizes)
return sizes.prometheus_resident_physical
timeline_total_resident_physical_size = get_timeline_resident_physical_size(timeline)
for i in range(10):
n_rows = random.randint(100, 1000)
timeline = env.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
endpoint = env.endpoints.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (t text)",
f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
]
)
wait_for_last_flush_lsn(env, endpoint, tenant, timeline)
pageserver_http.timeline_checkpoint(tenant, timeline)
wait_for_upload_queue_empty(pageserver_http, tenant, timeline)
timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline)
endpoint.stop()
# ensure that tenant_status current_physical size reports sum of timeline current_physical_size
tenant_current_physical_size = int(
client.tenant_status(tenant_id=tenant)["current_physical_size"]
)
assert tenant_current_physical_size == sum(
[tl["current_physical_size"] for tl in client.timeline_list(tenant_id=tenant)]
)
# since we don't do layer eviction, current_physical_size is identical to resident physical size
assert timeline_total_resident_physical_size == tenant_current_physical_size
class TimelinePhysicalSizeValues:
api_current_physical: int
prometheus_resident_physical: float
prometheus_remote_physical: float | None = None
python_timelinedir_layerfiles_physical: int
layer_map_file_size_sum: int
def get_physical_size_values(
env: NeonEnv,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> TimelinePhysicalSizeValues:
res = TimelinePhysicalSizeValues()
client = env.pageserver.http_client()
res.layer_map_file_size_sum = sum(
layer.layer_file_size
for layer in client.layer_map_info(tenant_id, timeline_id).historic_layers
)
metrics = client.get_metrics()
metrics_filter = {"tenant_id": str(tenant_id), "timeline_id": str(timeline_id)}
res.prometheus_resident_physical = metrics.query_one(
"pageserver_resident_physical_size", metrics_filter
).value
res.prometheus_remote_physical = metrics.query_one(
"pageserver_remote_physical_size", metrics_filter
).value
detail = client.timeline_detail(
tenant_id, timeline_id, include_timeline_dir_layer_file_size_sum=True
)
res.api_current_physical = detail["current_physical_size"]
timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
res.python_timelinedir_layerfiles_physical = get_timeline_dir_size(timeline_path)
return res
def assert_physical_size_invariants(sizes: TimelinePhysicalSizeValues):
# resident phyiscal size is defined as
assert sizes.python_timelinedir_layerfiles_physical == sizes.prometheus_resident_physical
assert sizes.python_timelinedir_layerfiles_physical == sizes.layer_map_file_size_sum
# we don't do layer eviction, so, all layers are resident
assert sizes.api_current_physical == sizes.prometheus_resident_physical
assert sizes.prometheus_resident_physical == sizes.prometheus_remote_physical
# XXX would be nice to assert layer file physical storage utilization here as well, but we can only do that for LocalFS
def wait_for_tenant_startup_completions(client: PageserverHttpClient, count: int):
def condition():
assert client.get_metric_value("pageserver_tenant_startup_complete_total") == count
wait_until(condition)
def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
"""
Tenants warmuping up opportunistically will wait for one another's logical size calculations to complete
before proceeding. However, they skip this if a client is actively trying to access them.
This test is not purely about logical sizes, but logical size calculation is the phase that we
use as a proxy for "warming up" in this test: it happens within the semaphore guard used
to limit concurrent tenant warm-up.
"""
# We will run with the limit set to 1, so that once we have one tenant stuck
# in a pausable failpoint, the rest are prevented from proceeding through warmup.
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()
pageserver_http = env.pageserver.http_client()
# Create some tenants
n_tenants = 10
tenant_ids = {env.initial_tenant}
for _i in range(0, n_tenants - 1):
tenant_id = TenantId.generate()
env.create_tenant(tenant_id)
tenant_ids.add(tenant_id)
# Restart pageserver with logical size calculations paused
env.pageserver.stop()
env.pageserver.start(
extra_env_vars={"FAILPOINTS": "timeline-calculate-logical-size-pause=pause"}
)
def get_tenant_states():
states = {}
log.info(f"Tenant ids: {tenant_ids}")
for tenant_id in tenant_ids:
tenant = pageserver_http.tenant_status(tenant_id=tenant_id)
states[tenant_id] = tenant["state"]["slug"]
log.info(f"Tenant states: {states}")
return states
def at_least_one_active():
assert "Active" in set(get_tenant_states().values())
# One tenant should activate, then get stuck in their logical size calculation
wait_until(at_least_one_active)
# Wait some walltime to gain confidence that other tenants really are stuck and not proceeding to activate
time.sleep(5)
# We should see one tenant win the activation race, and enter logical size calculation. The rest
# will stay in Attaching state, waiting for the "warmup_limit" semaphore
expect_activated = 1
states = get_tenant_states()
assert len([s for s in states.values() if s == "Active"]) == expect_activated
assert len([s for s in states.values() if s == "Attaching"]) == n_tenants - expect_activated
assert (
pageserver_http.get_metric_value("pageserver_tenant_startup_scheduled_total") == n_tenants
)
# This is zero, and subsequent checks are expect_activated - 1, because this counter does not
# count how may tenants are Active, it counts how many have finished warmup. The first tenant
# that reached Active is still stuck in its local size calculation, and has therefore not finished warmup.
assert pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total") == 0
# If a client accesses one of the blocked tenants, it should skip waiting for warmup and
# go active as fast as it can.
stuck_tenant_id = list(
[(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
)[0][0]
endpoint = env.endpoints.create_start(branch_name="main", tenant_id=stuck_tenant_id)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (x INTEGER)",
"INSERT INTO foo SELECT g FROM generate_series(1, 10) g",
]
)
endpoint.stop()
# That one that we successfully accessed is now Active
expect_activated += 1
assert pageserver_http.tenant_status(tenant_id=stuck_tenant_id)["state"]["slug"] == "Active"
wait_for_tenant_startup_completions(pageserver_http, count=expect_activated - 1)
# The ones we didn't touch are still in Attaching
assert (
len([s for s in get_tenant_states().values() if s == "Attaching"])
== n_tenants - expect_activated
)
# Timeline creation operations also wake up Attaching tenants
stuck_tenant_id = list(
[(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
)[0][0]
pageserver_http.timeline_create(env.pg_version, stuck_tenant_id, TimelineId.generate())
expect_activated += 1
assert pageserver_http.tenant_status(tenant_id=stuck_tenant_id)["state"]["slug"] == "Active"
assert (
len([s for s in get_tenant_states().values() if s == "Attaching"])
== n_tenants - expect_activated
)
wait_for_tenant_startup_completions(pageserver_http, count=expect_activated - 1)
# When we unblock logical size calculation, all tenants should proceed to active state via
# the warmup route.
pageserver_http.configure_failpoints(("timeline-calculate-logical-size-pause", "off"))
def all_active():
assert all(s == "Active" for s in get_tenant_states().values())
wait_until(all_active)
# Final control check: restarting with no failpoints at all results in all tenants coming active
# without being prompted by client I/O
env.pageserver.stop()
env.pageserver.start()
wait_until(all_active)
assert (
pageserver_http.get_metric_value("pageserver_tenant_startup_scheduled_total") == n_tenants
)
wait_for_tenant_startup_completions(pageserver_http, count=n_tenants)
# Check that tenant deletion/detach proactively wakes tenants: this is done separately to the main
# body of the test because it will disrupt tenant counts
env.pageserver.stop()
env.pageserver.start(
extra_env_vars={"FAILPOINTS": "timeline-calculate-logical-size-pause=pause"}
)
wait_until(at_least_one_active)
detach_tenant_id = list(
[(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
)[0][0]
delete_tenant_id = list(
[(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
)[1][0]
# Detaching a stuck tenant should proceed promptly
# (reproducer for https://github.com/neondatabase/neon/pull/6430)
env.pageserver.http_client().tenant_detach(detach_tenant_id)
tenant_ids.remove(detach_tenant_id)
# FIXME: currently the mechanism for cancelling attach is to set state to broken, which is reported spuriously at error level
env.pageserver.allowed_errors.append(
".*attach failed, setting tenant state to Broken: Shut down while Attaching"
)
# Deleting a stuck tenant should prompt it to go active
# in some cases, it has already been activated because it's behind the detach
delete_lazy_activating(delete_tenant_id, env.pageserver, expect_attaching=False)
tenant_ids.remove(delete_tenant_id)
# Check that all the stuck tenants proceed to active (apart from the one that deletes, and the one
# we detached)
wait_until(all_active)
assert len(get_tenant_states()) == n_tenants - 2
def delete_lazy_activating(
delete_tenant_id: TenantId, pageserver: NeonPageserver, expect_attaching: bool
):
pageserver_http = pageserver.http_client()
if expect_attaching:
assert pageserver_http.tenant_status(delete_tenant_id)["state"]["slug"] == "Attaching"
with concurrent.futures.ThreadPoolExecutor() as executor:
log.info("Starting background delete")
def shutting_down():
assert pageserver.log_contains(".*Waiting for timelines.*") is not None
def delete_tenant():
pageserver_http.tenant_delete(delete_tenant_id)
background_delete = executor.submit(delete_tenant)
# We expect deletion to enter shutdown of the tenant even though it's in the attaching state
try:
# Deletion will get to the point in shutdown where it's waiting for timeline shutdown, then
# hang because of our failpoint blocking activation.
wait_until(shutting_down)
finally:
log.info("Clearing failpoint")
pageserver_http.configure_failpoints(("timeline-calculate-logical-size-pause", "off"))
# Deletion should complete successfully now that failpoint is unblocked and shutdown can complete
log.info("Joining background delete")
background_delete.result(timeout=10)
def test_timeline_logical_size_task_priority(neon_env_builder: NeonEnvBuilder):
"""
/v1/tenant/:tenant_shard_id/timeline and /v1/tenant/:tenant_shard_id
should not bump the priority of the initial logical size computation
background task, unless the force-await-initial-logical-size query param
is set to true.
This test verifies the invariant stated above. A couple of tricks are involved:
1. Detach the tenant and re-attach it after the page server is restarted. This circumvents
the warm-up which forces the initial logical size calculation.
2. A fail point (initial-size-calculation-permit-pause) is used to block the initial
computation of the logical size until forced.
3. A fail point (walreceiver-after-ingest) is used to pause the walreceiver since
otherwise it would force the logical size computation.
"""
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
# just make sure this doesn't hit an assertion
client.timeline_detail(tenant_id, timeline_id, force_await_initial_logical_size=True)
# load in some data
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
endpoint.safe_psql_many(
[
"CREATE TABLE foo (x INTEGER)",
"INSERT INTO foo SELECT g FROM generate_series(1, 10000) g",
]
)
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
# restart with failpoint inside initial size calculation task
log.info(f"Detaching tenant {tenant_id} and stopping pageserver...")
endpoint.stop()
env.pageserver.tenant_detach(tenant_id)
env.pageserver.stop()
env.pageserver.start(
extra_env_vars={
"FAILPOINTS": "initial-size-calculation-permit-pause=pause;walreceiver-after-ingest=pause"
}
)
log.info(f"Re-attaching tenant {tenant_id}...")
env.pageserver.tenant_attach(tenant_id)
# kick off initial size calculation task (the response we get here is the estimated size)
def assert_initial_logical_size_not_prioritised():
details = client.timeline_detail(tenant_id, timeline_id)
assert details["current_logical_size_is_accurate"] is False
assert_initial_logical_size_not_prioritised()
# ensure that's actually the case
time.sleep(2)
assert_initial_logical_size_not_prioritised()
details = client.timeline_detail(tenant_id, timeline_id, force_await_initial_logical_size=True)
assert details["current_logical_size_is_accurate"] is True
client.configure_failpoints(
[("initial-size-calculation-permit-pause", "off"), ("walreceiver-after-ingest", "off")]
)
def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()
# the supporting_second does nothing except queue behind env.initial_tenant
# for purposes of showing that eager_tenant breezes past the queue
supporting_second, _ = env.create_tenant()
eager_tenant, _ = env.create_tenant()
client = env.pageserver.http_client()
client.tenant_location_conf(
eager_tenant,
{
"mode": "Detached",
"secondary_conf": None,
"tenant_conf": {},
"generation": None,
},
)
env.pageserver.stop()
# pause at logical size calculation, also pause before walreceiver can give feedback so it will give priority to logical size calculation
env.pageserver.start(
extra_env_vars={
"FAILPOINTS": "timeline-calculate-logical-size-pause=pause;walreceiver-after-ingest=pause"
}
)
tenant_ids = [env.initial_tenant, supporting_second]
def get_tenant_states() -> dict[str, list[TenantId]]:
states = defaultdict(list)
for id in tenant_ids:
state = client.tenant_status(id)["state"]["slug"]
states[state].append(id)
return dict(states)
def one_is_active():
states = get_tenant_states()
log.info(f"{states}")
assert len(states["Active"]) == 1
wait_until(one_is_active)
def other_is_attaching():
states = get_tenant_states()
assert len(states["Attaching"]) == 1
wait_until(other_is_attaching)
def eager_tenant_is_active():
resp = client.tenant_status(eager_tenant)
assert resp["state"]["slug"] == "Active"
gen = env.storage_controller.attach_hook_issue(eager_tenant, env.pageserver.id)
client.tenant_location_conf(
eager_tenant,
{
"mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": {},
"generation": gen,
},
lazy=False,
)
wait_until(eager_tenant_is_active)
other_is_attaching()
client.configure_failpoints(
[("timeline-calculate-logical-size-pause", "off"), ("walreceiver-after-ingest", "off")]
)
@pytest.mark.parametrize("activation_method", ["endpoint", "branch", "delete"])
def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_method: str):
# env.initial_tenant will take up this permit when attaching with lazy because of a failpoint activated after restart
neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = 1"
env = neon_env_builder.init_start()
# because this returns (also elsewhere in this file), we know that SpawnMode::Create skips the queue
lazy_tenant, _ = env.create_tenant()
client = env.pageserver.http_client()
client.tenant_location_conf(
lazy_tenant,
{
"mode": "Detached",
"secondary_conf": None,
"tenant_conf": {},
"generation": None,
},
)
env.pageserver.stop()
# pause at logical size calculation, also pause before walreceiver can give feedback so it will give priority to logical size calculation
env.pageserver.start(
extra_env_vars={
"FAILPOINTS": "timeline-calculate-logical-size-pause=pause;walreceiver-after-ingest=pause"
}
)
def initial_tenant_is_active():
resp = client.tenant_status(env.initial_tenant)
assert resp["state"]["slug"] == "Active"
wait_until(initial_tenant_is_active)
# even though the initial tenant is now active, because it was startup time
# attach, it will consume the only permit because logical size calculation
# is paused.
gen = env.storage_controller.attach_hook_issue(lazy_tenant, env.pageserver.id)
client.tenant_location_conf(
lazy_tenant,
{
"mode": "AttachedSingle",
"secondary_conf": None,
"tenant_conf": {},
"generation": gen,
},
lazy=True,
)
def lazy_tenant_is_attaching():
resp = client.tenant_status(lazy_tenant)
assert resp["state"]["slug"] == "Attaching"
# paused logical size calculation of env.initial_tenant is keeping it attaching
wait_until(lazy_tenant_is_attaching)
for _ in range(5):
lazy_tenant_is_attaching()
time.sleep(0.5)
def lazy_tenant_is_active():
resp = client.tenant_status(lazy_tenant)
assert resp["state"]["slug"] == "Active"
if activation_method == "endpoint":
with env.endpoints.create_start("main", tenant_id=lazy_tenant):
# starting up the endpoint should make it jump the queue
wait_until(lazy_tenant_is_active)
elif activation_method == "branch":
env.create_timeline("second_branch", lazy_tenant)
wait_until(lazy_tenant_is_active)
elif activation_method == "delete":
delete_lazy_activating(lazy_tenant, env.pageserver, expect_attaching=True)
else:
raise RuntimeError(activation_method)
client.configure_failpoints(
[
("timeline-calculate-logical-size-pause", "off"),
("walreceiver-after-ingest", "off"),
]
)