From 66a5159511ec80e032824d85b6eafca9f01b31a6 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 2 Mar 2023 19:04:54 +0100 Subject: [PATCH] fix: compaction: no index upload scheduled if no on-demand downloads Commit 0cf7fd0fb82b082d02dfadd9d6a488a7f799d72f Compaction with on-demand download (#3598) introduced a subtle bug: if we don't have to do on-demand downloads, we only take one ROUND in fn compact() and exit early. Thereby, we miss scheduling the index part upload for any layers created by fn compact_inner(). Before that commit, we didn't have this problem. So, this patch fixes it. Since no regression test caught this, I went ahead and extended the timeline size tests to assert that, if remote storage is configured, 1. pageserver_remote_physical_size matches the other physical sizes 2. file sizes reported by the layer map info endpoint match the other physical size metrics Without the pageserver code fix, the regression test would fail at the physical size assertion, complaining that any of the resident physical size != remote physical size metric 50790400.0 != 18399232.0 I figured out what the problem is by comparing the remote storage and local directories like so, and noticed that the image layer in the local directory wasn't present on the remote side. It's size was exactly the difference 50790400.0 - 18399232.0 =32391168.0 fixes https://github.com/neondatabase/neon/issues/3738 --- pageserver/src/tenant/timeline.rs | 4 +- test_runner/fixtures/neon_fixtures.py | 20 +++ test_runner/regress/test_layer_eviction.py | 20 ++- test_runner/regress/test_timeline_size.py | 134 ++++++++++++++++++--- 4 files changed, 145 insertions(+), 33 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 101b27bb97..8b24fd6ecd 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -662,8 +662,8 @@ impl Timeline { // update the index file on next flush iteration too. But it // could take a while until that happens. // - // Additionally, only do this on the terminal round before sleeping. - if last_round { + // Additionally, only do this once before we return from this function. + if last_round || res.is_ok() { if let Some(remote_client) = &self.remote_client { remote_client.schedule_index_upload_for_file_changes()?; } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 94ee1d50f7..ba98563693 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3542,3 +3542,23 @@ def wait_for_sk_commit_lsn_to_reach_remote_storage( ps_http.timeline_checkpoint(tenant_id, timeline_id) wait_for_upload(ps_http, tenant_id, timeline_id, lsn) return lsn + + +def wait_for_upload_queue_empty( + pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId +): + ps_http = pageserver.http_client() + while True: + all_metrics = ps_http.get_metrics() + tl = all_metrics.query_all( + "pageserver_remote_timeline_client_calls_unfinished", + { + "tenant_id": str(tenant_id), + "timeline_id": str(timeline_id), + }, + ) + assert len(tl) > 0 + log.info(f"upload queue for {tenant_id}/{timeline_id}: {tl}") + if all(m.value == 0 for m in tl): + return + time.sleep(0.2) diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 404bd67050..e7c9713f98 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -268,18 +268,14 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): [layer.layer_file_size or 0 for layer in info.historic_layers if not layer.remote] ) - # TODO: the following would be nice to assert, but for some reason, the commented-out - # assert below fails with 113401856.0 != 140427264 - # => https://github.com/neondatabase/neon/issues/3738 - # - # log.info("ensure that remote_physical_size metric matches layer map") - # remote_physical_size_metric = ps_http.get_timeline_metric( - # tenant_id, timeline_id, "pageserver_remote_physical_size" - # ) - # log.info("ensure that remote_physical_size metric corresponds to layer map dump") - # assert remote_physical_size_metric == sum( - # [layer.layer_file_size or 0 for layer in info.historic_layers if layer.remote] - # ) + log.info("ensure that remote_physical_size metric matches layer map") + remote_physical_size_metric = ps_http.get_timeline_metric( + tenant_id, timeline_id, "pageserver_remote_physical_size" + ) + log.info("ensure that remote_physical_size metric corresponds to layer map dump") + assert remote_physical_size_metric == sum( + layer.layer_file_size or 0 for layer in info.historic_layers if layer.remote + ) log.info("before runnning GC, ensure that remote_physical size is zero") ensure_resident_and_remote_size_metrics() diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index ca4f32fff9..ea4b65c9a8 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -5,6 +5,7 @@ import threading import time from contextlib import closing from pathlib import Path +from typing import Optional import psycopg2.errors import psycopg2.extras @@ -18,9 +19,11 @@ from fixtures.neon_fixtures import ( PgBin, PortDistributor, Postgres, + RemoteStorageKind, VanillaPostgres, assert_tenant_status, wait_for_last_flush_lsn, + wait_for_upload_queue_empty, wait_until, ) from fixtures.types import TenantId, TimelineId @@ -301,8 +304,18 @@ def test_timeline_initial_logical_size_calculation_cancellation( # message emitted by the code behind failpoint "timeline-calculate-logical-size-check-dir-exists" -def test_timeline_physical_size_init(neon_simple_env: NeonEnv): - env = neon_simple_env +@pytest.mark.parametrize("remote_storage_kind", [None, RemoteStorageKind.LOCAL_FS]) +def test_timeline_physical_size_init( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind] +): + + if remote_storage_kind is not None: + neon_env_builder.enable_remote_storage( + remote_storage_kind, "test_timeline_physical_size_init" + ) + + env = neon_env_builder.init_start() + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init") pg = env.postgres.create_start("test_timeline_physical_size_init") @@ -330,12 +343,22 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv): ) assert_physical_size_invariants( - get_physical_size_values(env, env.initial_tenant, new_timeline_id) + get_physical_size_values(env, env.initial_tenant, new_timeline_id, remote_storage_kind), + remote_storage_kind, ) -def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): - env = neon_simple_env +@pytest.mark.parametrize("remote_storage_kind", [None, RemoteStorageKind.LOCAL_FS]) +def test_timeline_physical_size_post_checkpoint( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind] +): + if remote_storage_kind is not None: + neon_env_builder.enable_remote_storage( + remote_storage_kind, "test_timeline_physical_size_init" + ) + + env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") @@ -353,11 +376,21 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) assert_physical_size_invariants( - get_physical_size_values(env, env.initial_tenant, new_timeline_id) + get_physical_size_values(env, env.initial_tenant, new_timeline_id, remote_storage_kind), + remote_storage_kind, ) -def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder): +@pytest.mark.parametrize("remote_storage_kind", [None, RemoteStorageKind.LOCAL_FS]) +def test_timeline_physical_size_post_compaction( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind] +): + + if remote_storage_kind is not None: + neon_env_builder.enable_remote_storage( + remote_storage_kind, "test_timeline_physical_size_init" + ) + # Disable background compaction as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed neon_env_builder.pageserver_config_override = ( @@ -386,15 +419,33 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) + + # shutdown safekeepers to prevent new data from coming in + for sk in env.safekeepers: + sk.stop() + pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) pageserver_http.timeline_compact(env.initial_tenant, new_timeline_id) + if remote_storage_kind is not None: + wait_for_upload_queue_empty(env.pageserver, env.initial_tenant, new_timeline_id) + assert_physical_size_invariants( - get_physical_size_values(env, env.initial_tenant, new_timeline_id) + get_physical_size_values(env, env.initial_tenant, new_timeline_id, remote_storage_kind), + remote_storage_kind, ) -def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): +@pytest.mark.parametrize("remote_storage_kind", [None, RemoteStorageKind.LOCAL_FS]) +def test_timeline_physical_size_post_gc( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind] +): + + if remote_storage_kind is not None: + neon_env_builder.enable_remote_storage( + remote_storage_kind, "test_timeline_physical_size_init" + ) + # Disable background compaction and GC as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='0s', gc_period='0s', pitr_interval='1s'}" @@ -430,8 +481,12 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): pageserver_http.timeline_checkpoint(env.initial_tenant, new_timeline_id) pageserver_http.timeline_gc(env.initial_tenant, new_timeline_id, gc_horizon=None) + if remote_storage_kind is not None: + wait_for_upload_queue_empty(env.pageserver, env.initial_tenant, new_timeline_id) + assert_physical_size_invariants( - get_physical_size_values(env, env.initial_tenant, new_timeline_id) + get_physical_size_values(env, env.initial_tenant, new_timeline_id, remote_storage_kind), + remote_storage_kind, ) @@ -515,18 +570,29 @@ def test_timeline_size_metrics( assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024) -def test_tenant_physical_size(neon_simple_env: NeonEnv): +@pytest.mark.parametrize("remote_storage_kind", [None, RemoteStorageKind.LOCAL_FS]) +def test_tenant_physical_size( + neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind] +): random.seed(100) - env = neon_simple_env + if remote_storage_kind is not None: + neon_env_builder.enable_remote_storage( + remote_storage_kind, "test_timeline_physical_size_init" + ) + + env = neon_env_builder.init_start() + pageserver_http = env.pageserver.http_client() client = env.pageserver.http_client() tenant, timeline = env.neon_cli.create_tenant() + if remote_storage_kind is not None: + wait_for_upload_queue_empty(env.pageserver, tenant, timeline) def get_timeline_resident_physical_size(timeline: TimelineId): - sizes = get_physical_size_values(env, tenant, timeline) - assert_physical_size_invariants(sizes) + sizes = get_physical_size_values(env, tenant, timeline, remote_storage_kind) + assert_physical_size_invariants(sizes, remote_storage_kind) return sizes.prometheus_resident_physical timeline_total_resident_physical_size = get_timeline_resident_physical_size(timeline) @@ -546,6 +612,9 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): wait_for_last_flush_lsn(env, pg, tenant, timeline) pageserver_http.timeline_checkpoint(tenant, timeline) + if remote_storage_kind is not None: + wait_for_upload_queue_empty(env.pageserver, tenant, timeline) + timeline_total_resident_physical_size += get_timeline_resident_physical_size(timeline) pg.stop() @@ -563,21 +632,39 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): class TimelinePhysicalSizeValues: api_current_physical: int - prometheus_resident_physical: int + prometheus_resident_physical: float + prometheus_remote_physical: Optional[float] = None python_timelinedir_layerfiles_physical: int + layer_map_file_size_sum: int def get_physical_size_values( - env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId + env: NeonEnv, + tenant_id: TenantId, + timeline_id: TimelineId, + remote_storage_kind: Optional[RemoteStorageKind], ) -> TimelinePhysicalSizeValues: res = TimelinePhysicalSizeValues() client = env.pageserver.http_client() - res.prometheus_resident_physical = int( - client.get_timeline_metric(tenant_id, timeline_id, "pageserver_resident_physical_size") + res.layer_map_file_size_sum = sum( + layer.layer_file_size or 0 + for layer in client.layer_map_info(tenant_id, timeline_id).historic_layers ) + metrics = client.get_metrics() + metrics_filter = {"tenant_id": str(tenant_id), "timeline_id": str(timeline_id)} + res.prometheus_resident_physical = metrics.query_one( + "pageserver_resident_physical_size", metrics_filter + ).value + if remote_storage_kind is not None: + res.prometheus_remote_physical = metrics.query_one( + "pageserver_remote_physical_size", metrics_filter + ).value + else: + res.prometheus_remote_physical = None + detail = client.timeline_detail( tenant_id, timeline_id, include_timeline_dir_layer_file_size_sum=True ) @@ -589,11 +676,20 @@ def get_physical_size_values( return res -def assert_physical_size_invariants(sizes: TimelinePhysicalSizeValues): +def assert_physical_size_invariants( + sizes: TimelinePhysicalSizeValues, remote_storage_kind: Optional[RemoteStorageKind] +): # resident phyiscal size is defined as assert sizes.python_timelinedir_layerfiles_physical == sizes.prometheus_resident_physical + assert sizes.python_timelinedir_layerfiles_physical == sizes.layer_map_file_size_sum + # we don't do layer eviction, so, all layers are resident assert sizes.api_current_physical == sizes.prometheus_resident_physical + if remote_storage_kind is not None: + assert sizes.prometheus_resident_physical == sizes.prometheus_remote_physical + # XXX would be nice to assert layer file physical storage utilization here as well, but we can only do that for LocalFS + else: + assert sizes.prometheus_remote_physical is None # Timeline logical size initialization is an asynchronous background task that runs once,