Support copying layers in detach_ancestor from before shard splits (#9669)

We need to use the shard associated with the layer file, not the shard associated with our current tenant shard ID. Due to shard splits, the shard IDs can refer to older files. close https://github.com/neondatabase/neon/issues/9667
2026-06-05 06:20:37 +00:00 · 2024-11-07 01:53:58 +01:00
parent 2a95a51a0d
commit 011c0a175f
3 changed files with 49 additions and 12 deletions
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1445,7 +1445,7 @@ impl RemoteTimelineClient {
        let remote_path = remote_layer_path(
            &self.tenant_shard_id.tenant_id,
            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
+            uploaded.metadata().shard,
            &uploaded.layer_desc().layer_name(),
            uploaded.metadata().generation,
        );
@@ -1486,7 +1486,7 @@ impl RemoteTimelineClient {
            &adopted
                .get_timeline_id()
                .expect("Source timeline should be alive"),
-            self.tenant_shard_id.to_index(),
+            adopted.metadata().shard,
            &adopted.layer_desc().layer_name(),
            adopted.metadata().generation,
        );
@@ -1494,7 +1494,7 @@ impl RemoteTimelineClient {
        let target_remote_path = remote_layer_path(
            &self.tenant_shard_id.tenant_id,
            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
+            adopted_as.metadata().shard,
            &adopted_as.layer_desc().layer_name(),
            adopted_as.metadata().generation,
        );
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -12,7 +12,7 @@ use crate::{
    virtual_file::{MaybeFatalIo, VirtualFile},
 };
 use anyhow::Context;
-use pageserver_api::models::detach_ancestor::AncestorDetached;
+use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity};
 use tokio::sync::Semaphore;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
@@ -376,8 +376,14 @@ pub(super) async fn prepare(
        tasks.spawn(
            async move {
                let _permit = limiter.acquire().await;
-                let owned =
-                    remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?;
+                let owned = remote_copy(
+                    &adopted,
+                    &timeline,
+                    timeline.generation,
+                    timeline.shard_identity,
+                    &timeline.cancel,
+                )
+                .await?;
                tracing::info!(layer=%owned, "remote copied");
                Ok(owned)
            }
@@ -629,6 +635,7 @@ async fn remote_copy(
    adopted: &Layer,
    adoptee: &Arc<Timeline>,
    generation: Generation,
+    shard_identity: ShardIdentity,
    cancel: &CancellationToken,
 ) -> Result<Layer, Error> {
    // depending if Layer::keep_resident we could hardlink
@@ -636,6 +643,7 @@ async fn remote_copy(
    let mut metadata = adopted.metadata();
    debug_assert!(metadata.generation <= generation);
    metadata.generation = generation;
+    metadata.shard = shard_identity.shard_index();

    let owned = crate::tenant::storage_layer::Layer::for_evicted(
        adoptee.conf,
--- a/test_runner/regress/test_timeline_detach_ancestor.py
+++ b/test_runner/regress/test_timeline_detach_ancestor.py
@@ -16,6 +16,7 @@ from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    PgBin,
    flush_ep_to_pageserver,
+    last_flush_lsn_upload,
    wait_for_last_flush_lsn,
 )
 from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
@@ -576,27 +577,49 @@ def test_compaction_induced_by_detaches_in_history(
    assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, set())


-@pytest.mark.parametrize("sharded", [True, False])
+@pytest.mark.parametrize("shards_initial_after", [(1, 1), (2, 2), (1, 4)])
 def test_timeline_ancestor_detach_idempotent_success(
-    neon_env_builder: NeonEnvBuilder, sharded: bool
+    neon_env_builder: NeonEnvBuilder, shards_initial_after: tuple[int, int]
 ):
-    shards = 2 if sharded else 1
+    shards_initial = shards_initial_after[0]
+    shards_after = shards_initial_after[1]

-    neon_env_builder.num_pageservers = shards
-    env = neon_env_builder.init_start(initial_tenant_shard_count=shards if sharded else None)
+    neon_env_builder.num_pageservers = shards_after
+    env = neon_env_builder.init_start(
+        initial_tenant_shard_count=shards_initial if shards_initial > 1 else None,
+        initial_tenant_conf={
+            # small checkpointing and compaction targets to ensure we generate many upload operations
+            "checkpoint_distance": 512 * 1024,
+            "compaction_threshold": 1,
+            "compaction_target_size": 512 * 1024,
+            # disable background compaction and GC. We invoke it manually when we want it to happen.
+            "gc_period": "0s",
+            "compaction_period": "0s",
+        },
+    )

    pageservers = dict((int(p.id), p) for p in env.pageservers)

    for ps in pageservers.values():
        ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)

-    if sharded:
+    if shards_after > 1:
        # FIXME: should this be in the neon_env_builder.init_start?
        env.storage_controller.reconcile_until_idle()
        client = env.storage_controller.pageserver_api()
    else:
        client = env.pageserver.http_client()

+    # Write some data so that we have some layers to copy
+    with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as endpoint:
+        endpoint.safe_psql_many(
+            [
+                "CREATE TABLE foo(key serial primary key, t text default 'data_content')",
+                "INSERT INTO foo SELECT FROM generate_series(1,1024)",
+            ]
+        )
+        last_flush_lsn_upload(env, endpoint, env.initial_tenant, env.initial_timeline)
+
    first_branch = env.create_branch("first_branch")

    _ = env.create_branch("second_branch", ancestor_branch_name="first_branch")
@@ -607,6 +630,12 @@ def test_timeline_ancestor_detach_idempotent_success(
    reparented1 = env.create_branch("first_reparented", ancestor_branch_name="main")
    reparented2 = env.create_branch("second_reparented", ancestor_branch_name="main")

+    if shards_after > shards_initial:
+        # Do a shard split
+        # This is a reproducer for https://github.com/neondatabase/neon/issues/9667
+        env.storage_controller.tenant_shard_split(env.initial_tenant, shards_after)
+        env.storage_controller.reconcile_until_idle()
+
    first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch)
    assert set(first_reparenting_response) == {reparented1, reparented2}