From 011c0a175f91d3f636acb11dc8c7613b64ccb6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 7 Nov 2024 01:53:58 +0100 Subject: [PATCH] Support copying layers in detach_ancestor from before shard splits (#9669) We need to use the shard associated with the layer file, not the shard associated with our current tenant shard ID. Due to shard splits, the shard IDs can refer to older files. close https://github.com/neondatabase/neon/issues/9667 --- .../src/tenant/remote_timeline_client.rs | 6 +-- .../src/tenant/timeline/detach_ancestor.rs | 14 +++++-- .../regress/test_timeline_detach_ancestor.py | 41 ++++++++++++++++--- 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 03ec18c882..0aa8d61036 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1445,7 +1445,7 @@ impl RemoteTimelineClient { let remote_path = remote_layer_path( &self.tenant_shard_id.tenant_id, &self.timeline_id, - self.tenant_shard_id.to_index(), + uploaded.metadata().shard, &uploaded.layer_desc().layer_name(), uploaded.metadata().generation, ); @@ -1486,7 +1486,7 @@ impl RemoteTimelineClient { &adopted .get_timeline_id() .expect("Source timeline should be alive"), - self.tenant_shard_id.to_index(), + adopted.metadata().shard, &adopted.layer_desc().layer_name(), adopted.metadata().generation, ); @@ -1494,7 +1494,7 @@ impl RemoteTimelineClient { let target_remote_path = remote_layer_path( &self.tenant_shard_id.tenant_id, &self.timeline_id, - self.tenant_shard_id.to_index(), + adopted_as.metadata().shard, &adopted_as.layer_desc().layer_name(), adopted_as.metadata().generation, ); diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index b4c0ab0329..f8bc4352e2 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -12,7 +12,7 @@ use crate::{ virtual_file::{MaybeFatalIo, VirtualFile}, }; use anyhow::Context; -use pageserver_api::models::detach_ancestor::AncestorDetached; +use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity}; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; use tracing::Instrument; @@ -376,8 +376,14 @@ pub(super) async fn prepare( tasks.spawn( async move { let _permit = limiter.acquire().await; - let owned = - remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?; + let owned = remote_copy( + &adopted, + &timeline, + timeline.generation, + timeline.shard_identity, + &timeline.cancel, + ) + .await?; tracing::info!(layer=%owned, "remote copied"); Ok(owned) } @@ -629,6 +635,7 @@ async fn remote_copy( adopted: &Layer, adoptee: &Arc, generation: Generation, + shard_identity: ShardIdentity, cancel: &CancellationToken, ) -> Result { // depending if Layer::keep_resident we could hardlink @@ -636,6 +643,7 @@ async fn remote_copy( let mut metadata = adopted.metadata(); debug_assert!(metadata.generation <= generation); metadata.generation = generation; + metadata.shard = shard_identity.shard_index(); let owned = crate::tenant::storage_layer::Layer::for_evicted( adoptee.conf, diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index ed47f9432b..0e8519e07b 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -16,6 +16,7 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, PgBin, flush_ep_to_pageserver, + last_flush_lsn_upload, wait_for_last_flush_lsn, ) from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException @@ -576,27 +577,49 @@ def test_compaction_induced_by_detaches_in_history( assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, set()) -@pytest.mark.parametrize("sharded", [True, False]) +@pytest.mark.parametrize("shards_initial_after", [(1, 1), (2, 2), (1, 4)]) def test_timeline_ancestor_detach_idempotent_success( - neon_env_builder: NeonEnvBuilder, sharded: bool + neon_env_builder: NeonEnvBuilder, shards_initial_after: tuple[int, int] ): - shards = 2 if sharded else 1 + shards_initial = shards_initial_after[0] + shards_after = shards_initial_after[1] - neon_env_builder.num_pageservers = shards - env = neon_env_builder.init_start(initial_tenant_shard_count=shards if sharded else None) + neon_env_builder.num_pageservers = shards_after + env = neon_env_builder.init_start( + initial_tenant_shard_count=shards_initial if shards_initial > 1 else None, + initial_tenant_conf={ + # small checkpointing and compaction targets to ensure we generate many upload operations + "checkpoint_distance": 512 * 1024, + "compaction_threshold": 1, + "compaction_target_size": 512 * 1024, + # disable background compaction and GC. We invoke it manually when we want it to happen. + "gc_period": "0s", + "compaction_period": "0s", + }, + ) pageservers = dict((int(p.id), p) for p in env.pageservers) for ps in pageservers.values(): ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS) - if sharded: + if shards_after > 1: # FIXME: should this be in the neon_env_builder.init_start? env.storage_controller.reconcile_until_idle() client = env.storage_controller.pageserver_api() else: client = env.pageserver.http_client() + # Write some data so that we have some layers to copy + with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as endpoint: + endpoint.safe_psql_many( + [ + "CREATE TABLE foo(key serial primary key, t text default 'data_content')", + "INSERT INTO foo SELECT FROM generate_series(1,1024)", + ] + ) + last_flush_lsn_upload(env, endpoint, env.initial_tenant, env.initial_timeline) + first_branch = env.create_branch("first_branch") _ = env.create_branch("second_branch", ancestor_branch_name="first_branch") @@ -607,6 +630,12 @@ def test_timeline_ancestor_detach_idempotent_success( reparented1 = env.create_branch("first_reparented", ancestor_branch_name="main") reparented2 = env.create_branch("second_reparented", ancestor_branch_name="main") + if shards_after > shards_initial: + # Do a shard split + # This is a reproducer for https://github.com/neondatabase/neon/issues/9667 + env.storage_controller.tenant_shard_split(env.initial_tenant, shards_after) + env.storage_controller.reconcile_until_idle() + first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch) assert set(first_reparenting_response) == {reparented1, reparented2}