mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
Activate timelines during unoffload (#9399)
The current code has forgotten to activate timelines during unoffload,
leading to inability to receive the basebackup, due to the timeline
still being in loading state.
```
stderr:
command failed: compute startup failed: failed to get basebackup@0/0 from pageserver postgresql://no_user@localhost:15014
Caused by:
0: db error: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading
1: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading
```
Therefore, also activate the timeline during unoffloading.
Part of #8088
This commit is contained in:
@@ -720,7 +720,12 @@ async fn timeline_archival_config_handler(
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
tenant
|
||||
.apply_timeline_archival_config(timeline_id, request_data.state, ctx)
|
||||
.apply_timeline_archival_config(
|
||||
timeline_id,
|
||||
request_data.state,
|
||||
state.broker_client.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
Ok::<_, ApiError>(())
|
||||
}
|
||||
|
||||
@@ -1554,6 +1554,7 @@ impl Tenant {
|
||||
async fn unoffload_timeline(
|
||||
self: &Arc<Self>,
|
||||
timeline_id: TimelineId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: RequestContext,
|
||||
) -> Result<Arc<Timeline>, TimelineArchivalError> {
|
||||
info!("unoffloading timeline");
|
||||
@@ -1605,25 +1606,37 @@ impl Tenant {
|
||||
})
|
||||
.map_err(TimelineArchivalError::Other)?;
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
if let Some(timeline) = timelines.get(&timeline_id) {
|
||||
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
|
||||
if offloaded_timelines.remove(&timeline_id).is_none() {
|
||||
warn!("timeline already removed from offloaded timelines");
|
||||
}
|
||||
info!("timeline unoffloading complete");
|
||||
Ok(Arc::clone(timeline))
|
||||
} else {
|
||||
let Some(timeline) = timelines.get(&timeline_id) else {
|
||||
warn!("timeline not available directly after attach");
|
||||
Err(TimelineArchivalError::Other(anyhow::anyhow!(
|
||||
return Err(TimelineArchivalError::Other(anyhow::anyhow!(
|
||||
"timeline not available directly after attach"
|
||||
)))
|
||||
)));
|
||||
};
|
||||
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
|
||||
if offloaded_timelines.remove(&timeline_id).is_none() {
|
||||
warn!("timeline already removed from offloaded timelines");
|
||||
}
|
||||
|
||||
// Activate the timeline (if it makes sense)
|
||||
if !(timeline.is_broken() || timeline.is_stopping()) {
|
||||
let background_jobs_can_start = None;
|
||||
timeline.activate(
|
||||
self.clone(),
|
||||
broker_client.clone(),
|
||||
background_jobs_can_start,
|
||||
&ctx,
|
||||
);
|
||||
}
|
||||
|
||||
info!("timeline unoffloading complete");
|
||||
Ok(Arc::clone(timeline))
|
||||
}
|
||||
|
||||
pub(crate) async fn apply_timeline_archival_config(
|
||||
self: &Arc<Self>,
|
||||
timeline_id: TimelineId,
|
||||
new_state: TimelineArchivalState,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: RequestContext,
|
||||
) -> Result<(), TimelineArchivalError> {
|
||||
info!("setting timeline archival config");
|
||||
@@ -1664,12 +1677,13 @@ impl Tenant {
|
||||
Some(Arc::clone(timeline))
|
||||
};
|
||||
|
||||
// Second part: unarchive timeline (if needed)
|
||||
// Second part: unoffload timeline (if needed)
|
||||
let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
|
||||
timeline
|
||||
} else {
|
||||
// Turn offloaded timeline into a non-offloaded one
|
||||
self.unoffload_timeline(timeline_id, ctx).await?
|
||||
self.unoffload_timeline(timeline_id, broker_client, ctx)
|
||||
.await?
|
||||
};
|
||||
|
||||
// Third part: upload new timeline archival state and block until it is present in S3
|
||||
@@ -3354,7 +3368,7 @@ impl Tenant {
|
||||
/// Populate all Timelines' `GcInfo` with information about their children. We do not set the
|
||||
/// PITR cutoffs here, because that requires I/O: this is done later, before GC, by [`Self::refresh_gc_info_internal`]
|
||||
///
|
||||
/// Subsequently, parent-child relationships are updated incrementally during timeline creation/deletion.
|
||||
/// Subsequently, parent-child relationships are updated incrementally inside [`Timeline::new`] and [`Timeline::drop`].
|
||||
fn initialize_gc_info(
|
||||
&self,
|
||||
timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
|
||||
|
||||
@@ -136,6 +136,17 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
|
||||
"test_ancestor_branch_archive_branch1", tenant_id, "test_ancestor_branch_archive_parent"
|
||||
)
|
||||
|
||||
with env.endpoints.create_start(
|
||||
"test_ancestor_branch_archive_branch1", tenant_id=tenant_id
|
||||
) as endpoint:
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'data_content')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,1000)",
|
||||
]
|
||||
)
|
||||
sum = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
|
||||
|
||||
ps_http.timeline_archival_config(
|
||||
tenant_id,
|
||||
leaf_timeline_id,
|
||||
@@ -197,4 +208,10 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
|
||||
)
|
||||
assert leaf_detail["is_archived"] is False
|
||||
|
||||
with env.endpoints.create_start(
|
||||
"test_ancestor_branch_archive_branch1", tenant_id=tenant_id
|
||||
) as endpoint:
|
||||
sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
|
||||
assert sum == sum_again
|
||||
|
||||
assert not timeline_offloaded(initial_timeline_id)
|
||||
|
||||
Reference in New Issue
Block a user