From 52c2c693510bc9975817354350108623d3fbef8e Mon Sep 17 00:00:00 2001 From: Dmitry Rodionov Date: Tue, 15 Aug 2023 19:24:23 +0300 Subject: [PATCH] fsync directory before mark file removal (#4986) ## Problem Deletions can be possibly reordered. Use fsync to avoid the case when mark file doesnt exist but other tenant/timeline files do. See added comments. resolves #4987 --- libs/utils/src/crashsafe.rs | 4 ++++ pageserver/src/tenant/delete.rs | 13 +++++++++++++ .../src/tenant/remote_timeline_client/download.rs | 8 ++------ pageserver/src/tenant/timeline/delete.rs | 11 +++++++++++ test_runner/fixtures/pageserver/utils.py | 2 +- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/libs/utils/src/crashsafe.rs b/libs/utils/src/crashsafe.rs index 2c7e6e20ab..fd20d2d2ed 100644 --- a/libs/utils/src/crashsafe.rs +++ b/libs/utils/src/crashsafe.rs @@ -111,6 +111,10 @@ pub fn fsync(path: &Path) -> io::Result<()> { .map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}"))) } +pub async fn fsync_async(path: impl AsRef) -> Result<(), std::io::Error> { + tokio::fs::File::open(path).await?.sync_all().await +} + #[cfg(test)] mod tests { use tempfile::tempdir; diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 38fc31f69c..4f34f3c113 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -212,6 +212,19 @@ async fn cleanup_remaining_fs_traces( ))? }); + // Make sure previous deletions are ordered before mark removal. + // Otherwise there is no guarantee that they reach the disk before mark deletion. + // So its possible for mark to reach disk first and for other deletions + // to be reordered later and thus missed if a crash occurs. + // Note that we dont need to sync after mark file is removed + // because we can tolerate the case when mark file reappears on startup. + let tenant_path = &conf.tenant_path(tenant_id); + if tenant_path.exists() { + crashsafe::fsync_async(&conf.tenant_path(tenant_id)) + .await + .context("fsync_pre_mark_remove")?; + } + rm(conf.tenant_deleted_mark_file_path(tenant_id), false).await?; fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| { diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 0a6fd03887..7426ae10e9 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -11,7 +11,7 @@ use std::time::Duration; use anyhow::{anyhow, Context}; use tokio::fs; use tokio::io::AsyncWriteExt; -use utils::backoff; +use utils::{backoff, crashsafe}; use crate::config::PageServerConf; use crate::tenant::storage_layer::LayerFileName; @@ -23,10 +23,6 @@ use utils::id::{TenantId, TimelineId}; use super::index::{IndexPart, LayerFileMetadata}; use super::{FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES}; -async fn fsync_path(path: impl AsRef) -> Result<(), std::io::Error> { - fs::File::open(path).await?.sync_all().await -} - static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120); /// @@ -150,7 +146,7 @@ pub async fn download_layer_file<'a>( }) .map_err(DownloadError::Other)?; - fsync_path(&local_path) + crashsafe::fsync_async(&local_path) .await .with_context(|| format!("Could not fsync layer file {}", local_path.display(),)) .map_err(DownloadError::Other)?; diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index dba6475c27..d3d9c8a082 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -279,6 +279,17 @@ async fn cleanup_remaining_timeline_fs_traces( Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm-dir"))? }); + // Make sure previous deletions are ordered before mark removal. + // Otherwise there is no guarantee that they reach the disk before mark deletion. + // So its possible for mark to reach disk first and for other deletions + // to be reordered later and thus missed if a crash occurs. + // Note that we dont need to sync after mark file is removed + // because we can tolerate the case when mark file reappears on startup. + let timeline_path = conf.timelines_path(&tenant_id); + crashsafe::fsync_async(timeline_path) + .await + .context("fsync_pre_mark_remove")?; + // Remove delete mark tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_id, timeline_id)) .await diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py index 6032ff5b68..3b95990a57 100644 --- a/test_runner/fixtures/pageserver/utils.py +++ b/test_runner/fixtures/pageserver/utils.py @@ -284,4 +284,4 @@ MANY_SMALL_LAYERS_TENANT_CONFIG = { def poll_for_remote_storage_iterations(remote_storage_kind: RemoteStorageKind) -> int: - return 20 if remote_storage_kind is RemoteStorageKind.REAL_S3 else 8 + return 30 if remote_storage_kind is RemoteStorageKind.REAL_S3 else 10