Support copying layers in detach_ancestor from before shard splits (#9669)

We need to use the shard associated with the layer file, not the shard
associated with our current tenant shard ID.

Due to shard splits, the shard IDs can refer to older files.

close https://github.com/neondatabase/neon/issues/9667
This commit is contained in:
Arpad Müller
2024-11-07 01:53:58 +01:00
committed by GitHub
parent 2a95a51a0d
commit 011c0a175f
3 changed files with 49 additions and 12 deletions

View File

@@ -1445,7 +1445,7 @@ impl RemoteTimelineClient {
let remote_path = remote_layer_path(
&self.tenant_shard_id.tenant_id,
&self.timeline_id,
self.tenant_shard_id.to_index(),
uploaded.metadata().shard,
&uploaded.layer_desc().layer_name(),
uploaded.metadata().generation,
);
@@ -1486,7 +1486,7 @@ impl RemoteTimelineClient {
&adopted
.get_timeline_id()
.expect("Source timeline should be alive"),
self.tenant_shard_id.to_index(),
adopted.metadata().shard,
&adopted.layer_desc().layer_name(),
adopted.metadata().generation,
);
@@ -1494,7 +1494,7 @@ impl RemoteTimelineClient {
let target_remote_path = remote_layer_path(
&self.tenant_shard_id.tenant_id,
&self.timeline_id,
self.tenant_shard_id.to_index(),
adopted_as.metadata().shard,
&adopted_as.layer_desc().layer_name(),
adopted_as.metadata().generation,
);

View File

@@ -12,7 +12,7 @@ use crate::{
virtual_file::{MaybeFatalIo, VirtualFile},
};
use anyhow::Context;
use pageserver_api::models::detach_ancestor::AncestorDetached;
use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity};
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
use tracing::Instrument;
@@ -376,8 +376,14 @@ pub(super) async fn prepare(
tasks.spawn(
async move {
let _permit = limiter.acquire().await;
let owned =
remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?;
let owned = remote_copy(
&adopted,
&timeline,
timeline.generation,
timeline.shard_identity,
&timeline.cancel,
)
.await?;
tracing::info!(layer=%owned, "remote copied");
Ok(owned)
}
@@ -629,6 +635,7 @@ async fn remote_copy(
adopted: &Layer,
adoptee: &Arc<Timeline>,
generation: Generation,
shard_identity: ShardIdentity,
cancel: &CancellationToken,
) -> Result<Layer, Error> {
// depending if Layer::keep_resident we could hardlink
@@ -636,6 +643,7 @@ async fn remote_copy(
let mut metadata = adopted.metadata();
debug_assert!(metadata.generation <= generation);
metadata.generation = generation;
metadata.shard = shard_identity.shard_index();
let owned = crate::tenant::storage_layer::Layer::for_evicted(
adoptee.conf,

View File

@@ -16,6 +16,7 @@ from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
flush_ep_to_pageserver,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
@@ -576,27 +577,49 @@ def test_compaction_induced_by_detaches_in_history(
assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, set())
@pytest.mark.parametrize("sharded", [True, False])
@pytest.mark.parametrize("shards_initial_after", [(1, 1), (2, 2), (1, 4)])
def test_timeline_ancestor_detach_idempotent_success(
neon_env_builder: NeonEnvBuilder, sharded: bool
neon_env_builder: NeonEnvBuilder, shards_initial_after: tuple[int, int]
):
shards = 2 if sharded else 1
shards_initial = shards_initial_after[0]
shards_after = shards_initial_after[1]
neon_env_builder.num_pageservers = shards
env = neon_env_builder.init_start(initial_tenant_shard_count=shards if sharded else None)
neon_env_builder.num_pageservers = shards_after
env = neon_env_builder.init_start(
initial_tenant_shard_count=shards_initial if shards_initial > 1 else None,
initial_tenant_conf={
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": 512 * 1024,
"compaction_threshold": 1,
"compaction_target_size": 512 * 1024,
# disable background compaction and GC. We invoke it manually when we want it to happen.
"gc_period": "0s",
"compaction_period": "0s",
},
)
pageservers = dict((int(p.id), p) for p in env.pageservers)
for ps in pageservers.values():
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
if sharded:
if shards_after > 1:
# FIXME: should this be in the neon_env_builder.init_start?
env.storage_controller.reconcile_until_idle()
client = env.storage_controller.pageserver_api()
else:
client = env.pageserver.http_client()
# Write some data so that we have some layers to copy
with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as endpoint:
endpoint.safe_psql_many(
[
"CREATE TABLE foo(key serial primary key, t text default 'data_content')",
"INSERT INTO foo SELECT FROM generate_series(1,1024)",
]
)
last_flush_lsn_upload(env, endpoint, env.initial_tenant, env.initial_timeline)
first_branch = env.create_branch("first_branch")
_ = env.create_branch("second_branch", ancestor_branch_name="first_branch")
@@ -607,6 +630,12 @@ def test_timeline_ancestor_detach_idempotent_success(
reparented1 = env.create_branch("first_reparented", ancestor_branch_name="main")
reparented2 = env.create_branch("second_reparented", ancestor_branch_name="main")
if shards_after > shards_initial:
# Do a shard split
# This is a reproducer for https://github.com/neondatabase/neon/issues/9667
env.storage_controller.tenant_shard_split(env.initial_tenant, shards_after)
env.storage_controller.reconcile_until_idle()
first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch)
assert set(first_reparenting_response) == {reparented1, reparented2}