mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
pageserver: fix race cleaning up timeline files when shut down during bootstrap (#10532)
## Problem Timeline bootstrap starts a flush loop, but doesn't reliably shut down the timeline (incl. waiting for flush loop to exit) before destroying UninitializedTimeline, and that destructor tries to clean up local storage. If local storage is still being written to, then this is unsound. Currently the symptom is that we see a "Directory not empty" error log, e.g. https://neon-github-public-dev.s3.amazonaws.com/reports/main/12966756686/index.html#testresult/5523f7d15f46f7f7/retries ## Summary of changes - Move fallible IO part of bootstrap into a function (notably, this is fallible in the case of the tenant being shut down while creation is happening) - When that function returns an error, call shutdown() on the timeline
This commit is contained in:
@@ -3169,12 +3169,16 @@ async fn put_tenant_timeline_import_basebackup(
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let span = info_span!("import_basebackup", tenant_id=%tenant_id, timeline_id=%timeline_id, base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
let span = info_span!("import_basebackup",
|
||||
tenant_id=%tenant_id, timeline_id=%timeline_id, shard_id=%tenant_shard_id.shard_slug(),
|
||||
base_lsn=%base_lsn, end_lsn=%end_lsn, pg_version=%pg_version);
|
||||
async move {
|
||||
let state = get_state(&request);
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(TenantShardId::unsharded(tenant_id))?;
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
|
||||
let broker_client = state.broker_client.clone();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user