mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-21 23:20:40 +00:00
## Problem When shutting down a Tenant, it isn't just important to cause any background tasks to stop. It's also important to wait until they have stopped before declaring shutdown complete, in cases where we may re-use the tenant's local storage for something else, such as running in secondary mode, or creating a new tenant with the same ID. ## Summary of changes A `Gate` class is added, inspired by [seastar::gate](https://docs.seastar.io/master/classseastar_1_1gate.html). For types that have an important lifetime that corresponds to some physical resource, use of a Gate as well as a CancellationToken provides a robust pattern for async requests & shutdown: - Requests must always acquire the gate as long as they are using the object - Shutdown must set the cancellation token, and then `close()` the gate to wait for requests in progress before returning. This is not for memory safety: it's for expressing the difference between "Arc<Tenant> exists", and "This tenant's files on disk are eligible to be read/written". - Both Tenant and Timeline get a Gate & CancellationToken. - The Timeline gate is held during eviction of layers, and during page_service requests. - Existing cancellation support in page_service is refined to use the timeline-scope cancellation token instead of a process-scope cancellation token. This replaces the use of `task_mgr::associate_with`: tasks no longer change their tenant/timelineidentity after being spawned. The Tenant's Gate is not yet used, but will be important for Tenant-scoped operations in secondary mode, where we must ensure that our secondary-mode downloads for a tenant are gated wrt the activity of an attached Tenant. This is part of a broader move away from using the global-state driven `task_mgr` shutdown tokens: - less global state where we rely on implicit knowledge of what task a given function is running in, and more explicit references to the cancellation token that a particular function/type will respect, making shutdown easier to reason about. - eventually avoid the big global TASKS mutex. --------- Co-authored-by: Joonas Koivunen <joonas@neon.tech>
620 lines
24 KiB
Rust
620 lines
24 KiB
Rust
use std::{
|
|
ops::{Deref, DerefMut},
|
|
sync::Arc,
|
|
};
|
|
|
|
use anyhow::Context;
|
|
use pageserver_api::models::TimelineState;
|
|
use tokio::sync::OwnedMutexGuard;
|
|
use tracing::{debug, error, info, instrument, warn, Instrument, Span};
|
|
use utils::{
|
|
crashsafe, fs_ext,
|
|
id::{TenantId, TimelineId},
|
|
};
|
|
|
|
use crate::{
|
|
config::PageServerConf,
|
|
deletion_queue::DeletionQueueClient,
|
|
task_mgr::{self, TaskKind},
|
|
tenant::{
|
|
debug_assert_current_span_has_tenant_and_timeline_id,
|
|
metadata::TimelineMetadata,
|
|
remote_timeline_client::{
|
|
self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
|
|
},
|
|
CreateTimelineCause, DeleteTimelineError, Tenant,
|
|
},
|
|
InitializationOrder,
|
|
};
|
|
|
|
use super::{Timeline, TimelineResources};
|
|
|
|
/// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
|
|
async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
|
// Notify any timeline work to drop out of loops/requests
|
|
tracing::debug!("Cancelling CancellationToken");
|
|
timeline.cancel.cancel();
|
|
|
|
// Stop the walreceiver first.
|
|
debug!("waiting for wal receiver to shutdown");
|
|
let maybe_started_walreceiver = { timeline.walreceiver.lock().unwrap().take() };
|
|
if let Some(walreceiver) = maybe_started_walreceiver {
|
|
walreceiver.stop().await;
|
|
}
|
|
debug!("wal receiver shutdown confirmed");
|
|
|
|
// Shut down the layer flush task before the remote client, as one depends on the other
|
|
task_mgr::shutdown_tasks(
|
|
Some(TaskKind::LayerFlushTask),
|
|
Some(timeline.tenant_id),
|
|
Some(timeline.timeline_id),
|
|
)
|
|
.await;
|
|
|
|
// Prevent new uploads from starting.
|
|
if let Some(remote_client) = timeline.remote_client.as_ref() {
|
|
let res = remote_client.stop();
|
|
match res {
|
|
Ok(()) => {}
|
|
Err(e) => match e {
|
|
remote_timeline_client::StopError::QueueUninitialized => {
|
|
// This case shouldn't happen currently because the
|
|
// load and attach code bails out if _any_ of the timeline fails to fetch its IndexPart.
|
|
// That is, before we declare the Tenant as Active.
|
|
// But we only allow calls to delete_timeline on Active tenants.
|
|
return Err(DeleteTimelineError::Other(anyhow::anyhow!("upload queue is uninitialized, likely the timeline was in Broken state prior to this call because it failed to fetch IndexPart during load or attach, check the logs")));
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
// Stop & wait for the remaining timeline tasks, including upload tasks.
|
|
// NB: This and other delete_timeline calls do not run as a task_mgr task,
|
|
// so, they are not affected by this shutdown_tasks() call.
|
|
info!("waiting for timeline tasks to shutdown");
|
|
task_mgr::shutdown_tasks(None, Some(timeline.tenant_id), Some(timeline.timeline_id)).await;
|
|
|
|
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
|
|
Err(anyhow::anyhow!(
|
|
"failpoint: timeline-delete-before-index-deleted-at"
|
|
))?
|
|
});
|
|
|
|
tracing::debug!("Waiting for gate...");
|
|
timeline.gate.close().await;
|
|
tracing::debug!("Shutdown complete");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Mark timeline as deleted in S3 so we won't pick it up next time
|
|
/// during attach or pageserver restart.
|
|
/// See comment in persist_index_part_with_deleted_flag.
|
|
async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
|
if let Some(remote_client) = timeline.remote_client.as_ref() {
|
|
match remote_client.persist_index_part_with_deleted_flag().await {
|
|
// If we (now, or already) marked it successfully as deleted, we can proceed
|
|
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
|
|
// Bail out otherwise
|
|
//
|
|
// AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
|
|
// two tasks from performing the deletion at the same time. The first task
|
|
// that starts deletion should run it to completion.
|
|
Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
|
|
| Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
|
|
return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
// We delete local files first, so if pageserver restarts after local files deletion then remote deletion is not continued.
|
|
// This can be solved with inversion of these steps. But even if these steps are inverted then, when index_part.json
|
|
// gets deleted there is no way to distinguish between "this timeline is good, we just didnt upload it to remote"
|
|
// and "this timeline is deleted we should continue with removal of local state". So to avoid the ambiguity we use a mark file.
|
|
// After index part is deleted presence of this mark file indentifies that it was a deletion intention.
|
|
// So we can just remove the mark file.
|
|
async fn create_delete_mark(
|
|
conf: &PageServerConf,
|
|
tenant_id: TenantId,
|
|
timeline_id: TimelineId,
|
|
) -> Result<(), DeleteTimelineError> {
|
|
fail::fail_point!("timeline-delete-before-delete-mark", |_| {
|
|
Err(anyhow::anyhow!(
|
|
"failpoint: timeline-delete-before-delete-mark"
|
|
))?
|
|
});
|
|
let marker_path = conf.timeline_delete_mark_file_path(tenant_id, timeline_id);
|
|
|
|
// Note: we're ok to replace existing file.
|
|
let _ = std::fs::OpenOptions::new()
|
|
.write(true)
|
|
.create(true)
|
|
.open(&marker_path)
|
|
.with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
|
|
|
|
crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Grab the layer_removal_cs lock, and actually perform the deletion.
|
|
///
|
|
/// This lock prevents prevents GC or compaction from running at the same time.
|
|
/// The GC task doesn't register itself with the timeline it's operating on,
|
|
/// so it might still be running even though we called `shutdown_tasks`.
|
|
///
|
|
/// Note that there are still other race conditions between
|
|
/// GC, compaction and timeline deletion. See
|
|
/// <https://github.com/neondatabase/neon/issues/2671>
|
|
///
|
|
/// No timeout here, GC & Compaction should be responsive to the
|
|
/// `TimelineState::Stopping` change.
|
|
async fn delete_local_layer_files(
|
|
conf: &PageServerConf,
|
|
tenant_id: TenantId,
|
|
timeline: &Timeline,
|
|
) -> anyhow::Result<()> {
|
|
info!("waiting for layer_removal_cs.lock()");
|
|
let layer_removal_guard = timeline.layer_removal_cs.lock().await;
|
|
info!("got layer_removal_cs.lock(), deleting layer files");
|
|
|
|
// NB: storage_sync upload tasks that reference these layers have been cancelled
|
|
// by the caller.
|
|
|
|
let local_timeline_directory = conf.timeline_path(&tenant_id, &timeline.timeline_id);
|
|
|
|
fail::fail_point!("timeline-delete-before-rm", |_| {
|
|
Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
|
|
});
|
|
|
|
// NB: This need not be atomic because the deleted flag in the IndexPart
|
|
// will be observed during tenant/timeline load. The deletion will be resumed there.
|
|
//
|
|
// For configurations without remote storage, we guarantee crash-safety by persising delete mark file.
|
|
//
|
|
// Note that here we do not bail out on std::io::ErrorKind::NotFound.
|
|
// This can happen if we're called a second time, e.g.,
|
|
// because of a previous failure/cancellation at/after
|
|
// failpoint timeline-delete-after-rm.
|
|
//
|
|
// It can also happen if we race with tenant detach, because,
|
|
// it doesn't grab the layer_removal_cs lock.
|
|
//
|
|
// For now, log and continue.
|
|
// warn! level is technically not appropriate for the
|
|
// first case because we should expect retries to happen.
|
|
// But the error is so rare, it seems better to get attention if it happens.
|
|
//
|
|
// Note that metadata removal is skipped, this is not technically needed,
|
|
// but allows to reuse timeline loading code during resumed deletion.
|
|
// (we always expect that metadata is in place when timeline is being loaded)
|
|
|
|
#[cfg(feature = "testing")]
|
|
let mut counter = 0;
|
|
|
|
// Timeline directory may not exist if we failed to delete mark file and request was retried.
|
|
if !local_timeline_directory.exists() {
|
|
return Ok(());
|
|
}
|
|
|
|
let metadata_path = conf.metadata_path(&tenant_id, &timeline.timeline_id);
|
|
|
|
for entry in walkdir::WalkDir::new(&local_timeline_directory).contents_first(true) {
|
|
#[cfg(feature = "testing")]
|
|
{
|
|
counter += 1;
|
|
if counter == 2 {
|
|
fail::fail_point!("timeline-delete-during-rm", |_| {
|
|
Err(anyhow::anyhow!("failpoint: timeline-delete-during-rm"))?
|
|
});
|
|
}
|
|
}
|
|
|
|
let entry = entry?;
|
|
if entry.path() == metadata_path {
|
|
debug!("found metadata, skipping");
|
|
continue;
|
|
}
|
|
|
|
if entry.path() == local_timeline_directory {
|
|
// Keeping directory because metedata file is still there
|
|
debug!("found timeline dir itself, skipping");
|
|
continue;
|
|
}
|
|
|
|
let metadata = match entry.metadata() {
|
|
Ok(metadata) => metadata,
|
|
Err(e) => {
|
|
if crate::is_walkdir_io_not_found(&e) {
|
|
warn!(
|
|
timeline_dir=?local_timeline_directory,
|
|
path=?entry.path().display(),
|
|
"got not found err while removing timeline dir, proceeding anyway"
|
|
);
|
|
continue;
|
|
}
|
|
anyhow::bail!(e);
|
|
}
|
|
};
|
|
|
|
if metadata.is_dir() {
|
|
warn!(path=%entry.path().display(), "unexpected directory under timeline dir");
|
|
tokio::fs::remove_dir(entry.path()).await
|
|
} else {
|
|
tokio::fs::remove_file(entry.path()).await
|
|
}
|
|
.with_context(|| format!("Failed to remove: {}", entry.path().display()))?;
|
|
}
|
|
|
|
info!("finished deleting layer files, releasing layer_removal_cs.lock()");
|
|
drop(layer_removal_guard);
|
|
|
|
fail::fail_point!("timeline-delete-after-rm", |_| {
|
|
Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Removes remote layers and an index file after them.
|
|
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
|
|
if let Some(remote_client) = &timeline.remote_client {
|
|
remote_client.delete_all().await.context("delete_all")?
|
|
};
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// This function removs remaining traces of a timeline on disk.
|
|
// Namely: metadata file, timeline directory, delete mark.
|
|
// Note: io::ErrorKind::NotFound are ignored for metadata and timeline dir.
|
|
// delete mark should be present because it is the last step during deletion.
|
|
// (nothing can fail after its deletion)
|
|
async fn cleanup_remaining_timeline_fs_traces(
|
|
conf: &PageServerConf,
|
|
tenant_id: TenantId,
|
|
timeline_id: TimelineId,
|
|
) -> anyhow::Result<()> {
|
|
// Remove local metadata
|
|
tokio::fs::remove_file(conf.metadata_path(&tenant_id, &timeline_id))
|
|
.await
|
|
.or_else(fs_ext::ignore_not_found)
|
|
.context("remove metadata")?;
|
|
|
|
fail::fail_point!("timeline-delete-after-rm-metadata", |_| {
|
|
Err(anyhow::anyhow!(
|
|
"failpoint: timeline-delete-after-rm-metadata"
|
|
))?
|
|
});
|
|
|
|
// Remove timeline dir
|
|
tokio::fs::remove_dir(conf.timeline_path(&tenant_id, &timeline_id))
|
|
.await
|
|
.or_else(fs_ext::ignore_not_found)
|
|
.context("timeline dir")?;
|
|
|
|
fail::fail_point!("timeline-delete-after-rm-dir", |_| {
|
|
Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm-dir"))?
|
|
});
|
|
|
|
// Make sure previous deletions are ordered before mark removal.
|
|
// Otherwise there is no guarantee that they reach the disk before mark deletion.
|
|
// So its possible for mark to reach disk first and for other deletions
|
|
// to be reordered later and thus missed if a crash occurs.
|
|
// Note that we dont need to sync after mark file is removed
|
|
// because we can tolerate the case when mark file reappears on startup.
|
|
let timeline_path = conf.timelines_path(&tenant_id);
|
|
crashsafe::fsync_async(timeline_path)
|
|
.await
|
|
.context("fsync_pre_mark_remove")?;
|
|
|
|
// Remove delete mark
|
|
tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_id, timeline_id))
|
|
.await
|
|
.or_else(fs_ext::ignore_not_found)
|
|
.context("remove delete mark")
|
|
}
|
|
|
|
/// It is important that this gets called when DeletionGuard is being held.
|
|
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
|
|
async fn remove_timeline_from_tenant(
|
|
tenant: &Tenant,
|
|
timeline_id: TimelineId,
|
|
_: &DeletionGuard, // using it as a witness
|
|
) -> anyhow::Result<()> {
|
|
// Remove the timeline from the map.
|
|
let mut timelines = tenant.timelines.lock().unwrap();
|
|
let children_exist = timelines
|
|
.iter()
|
|
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
|
|
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
|
|
// We already deleted the layer files, so it's probably best to panic.
|
|
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
|
|
if children_exist {
|
|
panic!("Timeline grew children while we removed layer files");
|
|
}
|
|
|
|
timelines
|
|
.remove(&timeline_id)
|
|
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
|
|
|
|
drop(timelines);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,
|
|
/// and deletes its data from both disk and s3.
|
|
/// The sequence of steps:
|
|
/// 1. Set deleted_at in remote index part.
|
|
/// 2. Create local mark file.
|
|
/// 3. Delete local files except metadata (it is simpler this way, to be able to reuse timeline initialization code that expects metadata)
|
|
/// 4. Delete remote layers
|
|
/// 5. Delete index part
|
|
/// 6. Delete meta, timeline directory
|
|
/// 7. Delete mark file
|
|
/// It is resumable from any step in case a crash/restart occurs.
|
|
/// There are three entrypoints to the process:
|
|
/// 1. [`DeleteTimelineFlow::run`] this is the main one called by a management api handler.
|
|
/// 2. [`DeleteTimelineFlow::resume_deletion`] is called during restarts when local metadata is still present
|
|
/// and we possibly neeed to continue deletion of remote files.
|
|
/// 3. [`DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces`] is used when we deleted remote
|
|
/// index but still have local metadata, timeline directory and delete mark.
|
|
/// Note the only other place that messes around timeline delete mark is the logic that scans directory with timelines during tenant load.
|
|
#[derive(Default)]
|
|
pub enum DeleteTimelineFlow {
|
|
#[default]
|
|
NotStarted,
|
|
InProgress,
|
|
Finished,
|
|
}
|
|
|
|
impl DeleteTimelineFlow {
|
|
// These steps are run in the context of management api request handler.
|
|
// Long running steps are continued to run in the background.
|
|
// NB: If this fails half-way through, and is retried, the retry will go through
|
|
// all the same steps again. Make sure the code here is idempotent, and don't
|
|
// error out if some of the shutdown tasks have already been completed!
|
|
#[instrument(skip(tenant), fields(tenant_id=%tenant.tenant_id))]
|
|
pub async fn run(
|
|
tenant: &Arc<Tenant>,
|
|
timeline_id: TimelineId,
|
|
inplace: bool,
|
|
) -> Result<(), DeleteTimelineError> {
|
|
let (timeline, mut guard) = Self::prepare(tenant, timeline_id)?;
|
|
|
|
guard.mark_in_progress()?;
|
|
|
|
stop_tasks(&timeline).await?;
|
|
|
|
set_deleted_in_remote_index(&timeline).await?;
|
|
|
|
create_delete_mark(tenant.conf, timeline.tenant_id, timeline.timeline_id).await?;
|
|
|
|
fail::fail_point!("timeline-delete-before-schedule", |_| {
|
|
Err(anyhow::anyhow!(
|
|
"failpoint: timeline-delete-before-schedule"
|
|
))?
|
|
});
|
|
|
|
if inplace {
|
|
Self::background(guard, tenant.conf, tenant, &timeline).await?
|
|
} else {
|
|
Self::schedule_background(guard, tenant.conf, Arc::clone(tenant), timeline);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn mark_in_progress(&mut self) -> anyhow::Result<()> {
|
|
match self {
|
|
Self::Finished => anyhow::bail!("Bug. Is in finished state"),
|
|
Self::InProgress { .. } => { /* We're in a retry */ }
|
|
Self::NotStarted => { /* Fresh start */ }
|
|
}
|
|
|
|
*self = Self::InProgress;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
|
/// See corresponding parts of [`crate::tenant::delete::DeleteTenantFlow`]
|
|
#[instrument(skip_all, fields(%timeline_id))]
|
|
pub async fn resume_deletion(
|
|
tenant: Arc<Tenant>,
|
|
timeline_id: TimelineId,
|
|
local_metadata: &TimelineMetadata,
|
|
remote_client: Option<RemoteTimelineClient>,
|
|
deletion_queue_client: DeletionQueueClient,
|
|
init_order: Option<&InitializationOrder>,
|
|
) -> anyhow::Result<()> {
|
|
// Note: here we even skip populating layer map. Timeline is essentially uninitialized.
|
|
// RemoteTimelineClient is the only functioning part.
|
|
let timeline = tenant
|
|
.create_timeline_struct(
|
|
timeline_id,
|
|
local_metadata,
|
|
None, // Ancestor is not needed for deletion.
|
|
TimelineResources {
|
|
remote_client,
|
|
deletion_queue_client,
|
|
},
|
|
init_order,
|
|
// Important. We dont pass ancestor above because it can be missing.
|
|
// Thus we need to skip the validation here.
|
|
CreateTimelineCause::Delete,
|
|
)
|
|
.context("create_timeline_struct")?;
|
|
|
|
let mut guard = DeletionGuard(
|
|
Arc::clone(&timeline.delete_progress)
|
|
.try_lock_owned()
|
|
.expect("cannot happen because we're the only owner"),
|
|
);
|
|
|
|
// We meed to do this because when console retries delete request we shouldnt answer with 404
|
|
// because 404 means successful deletion.
|
|
{
|
|
let mut locked = tenant.timelines.lock().unwrap();
|
|
locked.insert(timeline_id, Arc::clone(&timeline));
|
|
}
|
|
|
|
guard.mark_in_progress()?;
|
|
|
|
// Note that delete mark can be missing on resume
|
|
// because we create delete mark after we set deleted_at in the index part.
|
|
create_delete_mark(tenant.conf, tenant.tenant_id, timeline_id).await?;
|
|
|
|
Self::schedule_background(guard, tenant.conf, tenant, timeline);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[instrument(skip_all, fields(%timeline_id))]
|
|
pub async fn cleanup_remaining_timeline_fs_traces(
|
|
tenant: &Tenant,
|
|
timeline_id: TimelineId,
|
|
) -> anyhow::Result<()> {
|
|
let r =
|
|
cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_id, timeline_id).await;
|
|
info!("Done");
|
|
r
|
|
}
|
|
|
|
fn prepare(
|
|
tenant: &Tenant,
|
|
timeline_id: TimelineId,
|
|
) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
|
|
// Note the interaction between this guard and deletion guard.
|
|
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
|
|
// This is important because when you take into account `remove_timeline_from_tenant`
|
|
// we remove timeline from memory when we still hold the deletion guard.
|
|
// So here when timeline deletion is finished timeline wont be present in timelines map at all
|
|
// which makes the following sequence impossible:
|
|
// T1: get preempted right before the try_lock on `Timeline::delete_progress`
|
|
// T2: do a full deletion, acquire and drop `Timeline::delete_progress`
|
|
// T1: acquire deletion lock, do another `DeleteTimelineFlow::run`
|
|
// For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
|
|
let timelines = tenant.timelines.lock().unwrap();
|
|
|
|
let timeline = match timelines.get(&timeline_id) {
|
|
Some(t) => t,
|
|
None => return Err(DeleteTimelineError::NotFound),
|
|
};
|
|
|
|
// Ensure that there are no child timelines **attached to that pageserver**,
|
|
// because detach removes files, which will break child branches
|
|
let children: Vec<TimelineId> = timelines
|
|
.iter()
|
|
.filter_map(|(id, entry)| {
|
|
if entry.get_ancestor_timeline_id() == Some(timeline_id) {
|
|
Some(*id)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
if !children.is_empty() {
|
|
return Err(DeleteTimelineError::HasChildren(children));
|
|
}
|
|
|
|
// Note that using try_lock here is important to avoid a deadlock.
|
|
// Here we take lock on timelines and then the deletion guard.
|
|
// At the end of the operation we're holding the guard and need to lock timelines map
|
|
// to remove the timeline from it.
|
|
// Always if you have two locks that are taken in different order this can result in a deadlock.
|
|
|
|
let delete_progress = Arc::clone(&timeline.delete_progress);
|
|
let delete_lock_guard = match delete_progress.try_lock_owned() {
|
|
Ok(guard) => DeletionGuard(guard),
|
|
Err(_) => {
|
|
// Unfortunately if lock fails arc is consumed.
|
|
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
|
|
&timeline.delete_progress,
|
|
)));
|
|
}
|
|
};
|
|
|
|
timeline.set_state(TimelineState::Stopping);
|
|
|
|
Ok((Arc::clone(timeline), delete_lock_guard))
|
|
}
|
|
|
|
fn schedule_background(
|
|
guard: DeletionGuard,
|
|
conf: &'static PageServerConf,
|
|
tenant: Arc<Tenant>,
|
|
timeline: Arc<Timeline>,
|
|
) {
|
|
let tenant_id = timeline.tenant_id;
|
|
let timeline_id = timeline.timeline_id;
|
|
|
|
task_mgr::spawn(
|
|
task_mgr::BACKGROUND_RUNTIME.handle(),
|
|
TaskKind::TimelineDeletionWorker,
|
|
Some(tenant_id),
|
|
Some(timeline_id),
|
|
"timeline_delete",
|
|
false,
|
|
async move {
|
|
if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
|
|
error!("Error: {err:#}");
|
|
timeline.set_broken(format!("{err:#}"))
|
|
};
|
|
Ok(())
|
|
}
|
|
.instrument({
|
|
let span =
|
|
tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_id, timeline_id=%timeline_id);
|
|
span.follows_from(Span::current());
|
|
span
|
|
}),
|
|
);
|
|
}
|
|
|
|
async fn background(
|
|
mut guard: DeletionGuard,
|
|
conf: &PageServerConf,
|
|
tenant: &Tenant,
|
|
timeline: &Timeline,
|
|
) -> Result<(), DeleteTimelineError> {
|
|
delete_local_layer_files(conf, tenant.tenant_id, timeline).await?;
|
|
|
|
delete_remote_layers_and_index(timeline).await?;
|
|
|
|
pausable_failpoint!("in_progress_delete");
|
|
|
|
cleanup_remaining_timeline_fs_traces(conf, tenant.tenant_id, timeline.timeline_id).await?;
|
|
|
|
remove_timeline_from_tenant(tenant, timeline.timeline_id, &guard).await?;
|
|
|
|
*guard = Self::Finished;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub(crate) fn is_finished(&self) -> bool {
|
|
matches!(self, Self::Finished)
|
|
}
|
|
}
|
|
|
|
struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
|
|
|
|
impl Deref for DeletionGuard {
|
|
type Target = DeleteTimelineFlow;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl DerefMut for DeletionGuard {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.0
|
|
}
|
|
}
|