mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 12:10:37 +00:00
Merge branch 'main' into problame/2024-02-walredo-work/prespawn/split-code
This commit is contained in:
@@ -4388,10 +4388,6 @@ impl Timeline {
|
||||
|
||||
guard.finish_gc_timeline(&gc_layers);
|
||||
|
||||
if result.layers_removed != 0 {
|
||||
fail_point!("after-timeline-gc-removed-layers");
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
result.doomed_layers = gc_layers;
|
||||
|
||||
@@ -831,3 +831,16 @@ class PageserverHttpClient(requests.Session):
|
||||
self.put(
|
||||
f"http://localhost:{self.port}/v1/deletion_queue/flush?execute={'true' if execute else 'false'}"
|
||||
).raise_for_status()
|
||||
|
||||
def timeline_wait_logical_size(self, tenant_id: TenantId, timeline_id: TimelineId) -> int:
|
||||
detail = self.timeline_detail(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
include_non_incremental_logical_size=True,
|
||||
force_await_initial_logical_size=True,
|
||||
)
|
||||
current_logical_size = detail["current_logical_size"]
|
||||
non_incremental = detail["current_logical_size_non_incremental"]
|
||||
assert current_logical_size == non_incremental
|
||||
assert isinstance(current_logical_size, int)
|
||||
return current_logical_size
|
||||
|
||||
@@ -155,6 +155,15 @@ class EvictionEnv:
|
||||
mock_behavior,
|
||||
eviction_order: EvictionOrder,
|
||||
):
|
||||
"""
|
||||
Starts pageserver up with mocked statvfs setup. The startup is
|
||||
problematic because of dueling initial logical size calculations
|
||||
requiring layers and disk usage based task evicting.
|
||||
|
||||
Returns after initial logical sizes are complete, but the phase of disk
|
||||
usage eviction task is unknown; it might need to run one more iteration
|
||||
before assertions can be made.
|
||||
"""
|
||||
disk_usage_config = {
|
||||
"period": period,
|
||||
"max_usage_pct": max_usage_pct,
|
||||
@@ -183,9 +192,15 @@ class EvictionEnv:
|
||||
),
|
||||
)
|
||||
|
||||
# we now do initial logical size calculation on startup, which on debug builds can fight with disk usage based eviction
|
||||
for tenant_id, timeline_id in self.timelines:
|
||||
pageserver_http = self.neon_env.get_tenant_pageserver(tenant_id).http_client()
|
||||
pageserver_http.timeline_wait_logical_size(tenant_id, timeline_id)
|
||||
|
||||
def statvfs_called():
|
||||
assert pageserver.log_contains(".*running mocked statvfs.*")
|
||||
|
||||
# we most likely have already completed multiple runs
|
||||
wait_until(10, 1, statvfs_called)
|
||||
|
||||
|
||||
@@ -789,9 +804,11 @@ def test_statvfs_pressure_usage(eviction_env: EvictionEnv):
|
||||
|
||||
wait_until(10, 1, relieved_log_message)
|
||||
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
def less_than_max_usage_pct():
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
assert post_eviction_total_size < 0.33 * total_size, "we requested max 33% usage"
|
||||
|
||||
assert post_eviction_total_size <= 0.33 * total_size, "we requested max 33% usage"
|
||||
wait_until(2, 2, less_than_max_usage_pct)
|
||||
|
||||
|
||||
def test_statvfs_pressure_min_avail_bytes(eviction_env: EvictionEnv):
|
||||
@@ -831,11 +848,13 @@ def test_statvfs_pressure_min_avail_bytes(eviction_env: EvictionEnv):
|
||||
|
||||
wait_until(10, 1, relieved_log_message)
|
||||
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
def more_than_min_avail_bytes_freed():
|
||||
post_eviction_total_size, _, _ = env.timelines_du(env.pageserver)
|
||||
assert (
|
||||
total_size - post_eviction_total_size >= min_avail_bytes
|
||||
), f"we requested at least {min_avail_bytes} worth of free space"
|
||||
|
||||
assert (
|
||||
total_size - post_eviction_total_size >= min_avail_bytes
|
||||
), "we requested at least min_avail_bytes worth of free space"
|
||||
wait_until(2, 2, more_than_min_avail_bytes_freed)
|
||||
|
||||
|
||||
def test_secondary_mode_eviction(eviction_env_ha: EvictionEnv):
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
||||
|
||||
|
||||
# Test gc_cutoff
|
||||
#
|
||||
# This test sets fail point at the end of GC, and checks that pageserver
|
||||
# normally restarts after it. Also, there should be GC ERRORs in the log,
|
||||
# but the fixture checks the log for any unexpected ERRORs after every
|
||||
# test anyway, so it doesn't need any special attention here.
|
||||
@pytest.mark.timeout(600)
|
||||
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"gc_period": "10 s",
|
||||
"gc_horizon": f"{1024 ** 2}",
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
"compaction_period": "5 s",
|
||||
# set PITR interval to be small, so we can do GC
|
||||
"pitr_interval": "1 s",
|
||||
"compaction_threshold": "3",
|
||||
"image_creation_threshold": "2",
|
||||
}
|
||||
)
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
|
||||
tenant_id = env.initial_tenant
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
connstr = endpoint.connstr(options="-csynchronous_commit=off")
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
|
||||
|
||||
pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
|
||||
|
||||
# Because this test does a rapid series of restarts of the same node, it's possible that
|
||||
# we are restarted again before we can clean up deletion lists form the previous generation,
|
||||
# resulting in a subsequent startup logging a warning.
|
||||
env.pageserver.allowed_errors.append(".*Dropping stale deletions for tenant.*")
|
||||
|
||||
for _ in range(5):
|
||||
with pytest.raises(subprocess.SubprocessError):
|
||||
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start(extra_env_vars={"FAILPOINTS": "after-timeline-gc-removed-layers=exit"})
|
||||
@@ -20,7 +20,7 @@ from fixtures.neon_fixtures import (
|
||||
VanillaPostgres,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
assert_tenant_state,
|
||||
timeline_delete_wait_completed,
|
||||
@@ -40,7 +40,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
|
||||
endpoint_main = env.endpoints.create_start("test_timeline_size")
|
||||
log.info("postgres is running on 'test_timeline_size' branch")
|
||||
@@ -73,7 +73,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "empty")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
timeline_details = client.timeline_detail(
|
||||
env.initial_tenant, new_timeline_id, include_non_incremental_logical_size=True
|
||||
)
|
||||
@@ -153,7 +153,7 @@ def test_timeline_size_quota_on_startup(neon_env_builder: NeonEnvBuilder):
|
||||
client = env.pageserver.http_client()
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota_on_startup")
|
||||
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
|
||||
endpoint_main = env.endpoints.create(
|
||||
"test_timeline_size_quota_on_startup",
|
||||
@@ -219,7 +219,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
||||
client = env.pageserver.http_client()
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota")
|
||||
|
||||
wait_for_timeline_size_init(client, tenant=env.initial_tenant, timeline=new_timeline_id)
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
|
||||
endpoint_main = env.endpoints.create(
|
||||
"test_timeline_size_quota",
|
||||
@@ -715,28 +715,6 @@ def assert_physical_size_invariants(sizes: TimelinePhysicalSizeValues):
|
||||
# XXX would be nice to assert layer file physical storage utilization here as well, but we can only do that for LocalFS
|
||||
|
||||
|
||||
# Timeline logical size initialization is an asynchronous background task that runs once,
|
||||
# try a few times to ensure it's activated properly
|
||||
def wait_for_timeline_size_init(
|
||||
client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
):
|
||||
for i in range(10):
|
||||
timeline_details = client.timeline_detail(
|
||||
tenant, timeline, include_non_incremental_logical_size=True
|
||||
)
|
||||
current_logical_size = timeline_details["current_logical_size"]
|
||||
non_incremental = timeline_details["current_logical_size_non_incremental"]
|
||||
if current_logical_size == non_incremental:
|
||||
return
|
||||
log.info(
|
||||
f"waiting for current_logical_size of a timeline to be calculated, iteration {i}: {current_logical_size} vs {non_incremental}"
|
||||
)
|
||||
time.sleep(1)
|
||||
raise Exception(
|
||||
f"timed out while waiting for current_logical_size of a timeline to reach its non-incremental value, details: {timeline_details}"
|
||||
)
|
||||
|
||||
|
||||
def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Tenants warmuping up opportunistically will wait for one another's logical size calculations to complete
|
||||
|
||||
Reference in New Issue
Block a user