From dd7b3934ba106d0e3628755f5833e90be84e3368 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 27 Sep 2024 16:50:23 +0100 Subject: [PATCH] tests: add test_image_layer_reads --- test_runner/fixtures/workload.py | 28 +++++++---- test_runner/regress/test_compaction.py | 65 ++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 8 deletions(-) diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py index e869c43185..b7905558a6 100644 --- a/test_runner/fixtures/workload.py +++ b/test_runner/fixtures/workload.py @@ -61,20 +61,32 @@ class Workload: with ENDPOINT_LOCK: self._endpoint.reconfigure() - def endpoint(self, pageserver_id: Optional[int] = None) -> Endpoint: + def go_readonly(self): + self.stop() + self._endpoint = self.make_endpoint(readonly=True, pageserver_id=None) + self._endpoint.start(pageserver_id=None) + + def make_endpoint(self, readonly: bool, pageserver_id: Optional[int] = None) -> Endpoint: # We may be running alongside other Workloads for different tenants. Full TTID is # obnoxiously long for use here, but a cut-down version is still unique enough for tests. endpoint_id = f"ep-workload-{str(self.tenant_id)[0:4]}-{str(self.timeline_id)[0:4]}" + if readonly: + self._endpoint_opts["hot_standby"] = True + + return self.env.endpoints.create( + self.branch_name, + tenant_id=self.tenant_id, + pageserver_id=pageserver_id, + endpoint_id=endpoint_id, + **self._endpoint_opts, + ) + + def endpoint(self, pageserver_id: Optional[int] = None) -> Endpoint: with ENDPOINT_LOCK: if self._endpoint is None: - self._endpoint = self.env.endpoints.create( - self.branch_name, - tenant_id=self.tenant_id, - pageserver_id=pageserver_id, - endpoint_id=endpoint_id, - **self._endpoint_opts, - ) + self._endpoint = self.make_endpoint(pageserver_id=pageserver_id, readonly=False) + self._endpoint.start(pageserver_id=pageserver_id) else: self._endpoint.reconfigure(pageserver_id=pageserver_id) diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 420055ac3a..b678eebe90 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -12,7 +12,9 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, generate_uploads_and_deletions, ) +from fixtures.pageserver.common_types import ImageLayerName, parse_layer_file_name from fixtures.pageserver.http import PageserverApiException +from fixtures.pageserver.utils import wait_for_last_record_lsn from fixtures.utils import wait_until from fixtures.workload import Workload @@ -421,3 +423,66 @@ def test_image_layer_compression(neon_env_builder: NeonEnvBuilder, enabled: bool f"SELECT count(*) FROM foo WHERE id={v} and val=repeat('abcde{v:0>3}', 500)" ) assert res[0][0] == 1 + + +def test_image_layer_reads(neon_env_builder: NeonEnvBuilder): + """ + Verify that after generating some image layers, these are used in the read path to avoid having + to read + reconstruct deltas. + """ + env = neon_env_builder.init_start() + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + env.pageserver.http_client().set_tenant_config( + tenant_id, + { + "compaction_period": "0s", + }, + ) + + workload = Workload(env, tenant_id, timeline_id) + workload.init() + workload.write_rows(256) + workload.validate() + + workload.go_readonly() + + commit_lsn = env.safekeepers[0].http_client().get_commit_lsn(tenant_id, timeline_id) + wait_for_last_record_lsn(env.pageserver.http_client(), tenant_id, timeline_id, commit_lsn) + log.info(f"Ingested up to commit_lsn {commit_lsn}") + + env.pageserver.http_client().timeline_compact( + tenant_id, timeline_id, force_image_layer_creation=True, force_repartition=True + ) + + # Uncomment this checkpoint, and the logs will show getpage requests hitting the image layers we + # just created. However, without the checkpoint, getpage requests will hit one InMemoryLayer and + # one persistent delta layer. + # env.pageserver.http_client().timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True, compact=False) + + # Evict everything: we can then look at which layers were downloaded across a read, to determine + # which layers the pageserver consulted to service that read. + layers = env.pageserver.http_client().layer_map_info(tenant_id, timeline_id) + for layer in layers.historic_layers: + log.infof(f"Evicting layer {layer.layer_file_name}") + env.pageserver.http_client().evict_layer(tenant_id, timeline_id, layer.layer_file_name) + + for layer_path in env.pageserver.list_layers(tenant_id, timeline_id): + log.info(f"After eviction: local layer: {layer_path}") + + # This should send getpage requests at the same LSN where we just created image layers + workload.validate() + + # Nothing should have written in the meantime + assert commit_lsn == env.safekeepers[0].http_client().get_commit_lsn(tenant_id, timeline_id) + + # The only historic layer touched should be our image layer + getpage_touched_layers = env.pageserver.list_layers(tenant_id, timeline_id) + for layer_path in getpage_touched_layers: + log.info(f"Layer used in getpage request: {layer_path}") + + assert len(getpage_touched_layers) == 1 + touched_layer = parse_layer_file_name(str(getpage_touched_layers[0])) + assert isinstance(touched_layer, ImageLayerName) + assert touched_layer.lsn == commit_lsn