Activate timelines during unoffload (#9399)

The current code has forgotten to activate timelines during unoffload,
leading to inability to receive the basebackup, due to the timeline
still being in loading state.

```
  stderr:
    command failed: compute startup failed: failed to get basebackup@0/0 from pageserver postgresql://no_user@localhost:15014

    Caused by:
        0: db error: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading
        1: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading
```

Therefore, also activate the timeline during unoffloading.

Part of #8088
This commit is contained in:
Arpad Müller
2024-10-16 16:47:17 +02:00
committed by GitHub
parent 9668601f46
commit 55b246085e
3 changed files with 50 additions and 14 deletions

View File

@@ -720,7 +720,12 @@ async fn timeline_archival_config_handler(
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
tenant
.apply_timeline_archival_config(timeline_id, request_data.state, ctx)
.apply_timeline_archival_config(
timeline_id,
request_data.state,
state.broker_client.clone(),
ctx,
)
.await?;
Ok::<_, ApiError>(())
}

View File

@@ -1554,6 +1554,7 @@ impl Tenant {
async fn unoffload_timeline(
self: &Arc<Self>,
timeline_id: TimelineId,
broker_client: storage_broker::BrokerClientChannel,
ctx: RequestContext,
) -> Result<Arc<Timeline>, TimelineArchivalError> {
info!("unoffloading timeline");
@@ -1605,25 +1606,37 @@ impl Tenant {
})
.map_err(TimelineArchivalError::Other)?;
let timelines = self.timelines.lock().unwrap();
if let Some(timeline) = timelines.get(&timeline_id) {
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
if offloaded_timelines.remove(&timeline_id).is_none() {
warn!("timeline already removed from offloaded timelines");
}
info!("timeline unoffloading complete");
Ok(Arc::clone(timeline))
} else {
let Some(timeline) = timelines.get(&timeline_id) else {
warn!("timeline not available directly after attach");
Err(TimelineArchivalError::Other(anyhow::anyhow!(
return Err(TimelineArchivalError::Other(anyhow::anyhow!(
"timeline not available directly after attach"
)))
)));
};
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
if offloaded_timelines.remove(&timeline_id).is_none() {
warn!("timeline already removed from offloaded timelines");
}
// Activate the timeline (if it makes sense)
if !(timeline.is_broken() || timeline.is_stopping()) {
let background_jobs_can_start = None;
timeline.activate(
self.clone(),
broker_client.clone(),
background_jobs_can_start,
&ctx,
);
}
info!("timeline unoffloading complete");
Ok(Arc::clone(timeline))
}
pub(crate) async fn apply_timeline_archival_config(
self: &Arc<Self>,
timeline_id: TimelineId,
new_state: TimelineArchivalState,
broker_client: storage_broker::BrokerClientChannel,
ctx: RequestContext,
) -> Result<(), TimelineArchivalError> {
info!("setting timeline archival config");
@@ -1664,12 +1677,13 @@ impl Tenant {
Some(Arc::clone(timeline))
};
// Second part: unarchive timeline (if needed)
// Second part: unoffload timeline (if needed)
let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
timeline
} else {
// Turn offloaded timeline into a non-offloaded one
self.unoffload_timeline(timeline_id, ctx).await?
self.unoffload_timeline(timeline_id, broker_client, ctx)
.await?
};
// Third part: upload new timeline archival state and block until it is present in S3
@@ -3354,7 +3368,7 @@ impl Tenant {
/// Populate all Timelines' `GcInfo` with information about their children. We do not set the
/// PITR cutoffs here, because that requires I/O: this is done later, before GC, by [`Self::refresh_gc_info_internal`]
///
/// Subsequently, parent-child relationships are updated incrementally during timeline creation/deletion.
/// Subsequently, parent-child relationships are updated incrementally inside [`Timeline::new`] and [`Timeline::drop`].
fn initialize_gc_info(
&self,
timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,

View File

@@ -136,6 +136,17 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
"test_ancestor_branch_archive_branch1", tenant_id, "test_ancestor_branch_archive_parent"
)
with env.endpoints.create_start(
"test_ancestor_branch_archive_branch1", tenant_id=tenant_id
) as endpoint:
endpoint.safe_psql_many(
[
"CREATE TABLE foo(key serial primary key, t text default 'data_content')",
"INSERT INTO foo SELECT FROM generate_series(1,1000)",
]
)
sum = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
ps_http.timeline_archival_config(
tenant_id,
leaf_timeline_id,
@@ -197,4 +208,10 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
)
assert leaf_detail["is_archived"] is False
with env.endpoints.create_start(
"test_ancestor_branch_archive_branch1", tenant_id=tenant_id
) as endpoint:
sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
assert sum == sum_again
assert not timeline_offloaded(initial_timeline_id)