diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 38304eb16b..c00dad50ac 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1034,9 +1034,17 @@ async fn put_tenant_location_config_handler( // The `Detached` state is special, it doesn't upsert a tenant, it removes // its local disk content and drops it from memory. if let LocationConfigMode::Detached = request_data.config.mode { - mgr::detach_tenant(conf, tenant_id, true, &state.deletion_queue_client) + if let Err(e) = mgr::detach_tenant(conf, tenant_id, true, &state.deletion_queue_client) .instrument(info_span!("tenant_detach", %tenant_id)) - .await?; + .await + { + match e { + TenantStateError::NotFound(_) => { + // This API is idempotent: a NotFound on a detach is fine. + } + _ => return Err(e.into()), + } + } return json_response(StatusCode::OK, ()); } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 88b2423eec..185d7954ea 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -420,6 +420,12 @@ pub enum CreateTimelineError { Other(#[from] anyhow::Error), } +/// spawn_attach argument for whether the caller is using attachment markers +pub(super) enum AttachMarkerMode { + Expect, + Ignore, +} + struct TenantDirectoryScan { sorted_timelines_to_load: Vec<(TimelineId, TimelineMetadata)>, timelines_to_resume_deletion: Vec<(TimelineId, Option)>, @@ -567,6 +573,7 @@ impl Tenant { resources: TenantSharedResources, attached_conf: AttachedTenantConf, tenants: &'static tokio::sync::RwLock, + expect_marker: AttachMarkerMode, ctx: &RequestContext, ) -> anyhow::Result> { // TODO dedup with spawn_load @@ -648,7 +655,7 @@ impl Tenant { } } - match tenant_clone.attach(&ctx).await { + match tenant_clone.attach(&ctx, expect_marker).await { Ok(()) => { info!("attach finished, activating"); tenant_clone.activate(broker_client, None, &ctx); @@ -673,17 +680,23 @@ impl Tenant { /// /// No background tasks are started as part of this routine. /// - async fn attach(self: &Arc, ctx: &RequestContext) -> anyhow::Result<()> { + async fn attach( + self: &Arc, + ctx: &RequestContext, + expect_marker: AttachMarkerMode, + ) -> anyhow::Result<()> { span::debug_assert_current_span_has_tenant_id(); let marker_file = self.conf.tenant_attaching_mark_file_path(&self.tenant_id); - if !tokio::fs::try_exists(&marker_file) - .await - .context("check for existence of marker file")? - { - anyhow::bail!( - "implementation error: marker file should exist at beginning of this function" - ); + if let AttachMarkerMode::Expect = expect_marker { + if !tokio::fs::try_exists(&marker_file) + .await + .context("check for existence of marker file")? + { + anyhow::bail!( + "implementation error: marker file should exist at beginning of this function" + ); + } } // Get list of remote timelines @@ -805,10 +818,12 @@ impl Tenant { .map_err(LoadLocalTimelineError::ResumeDeletion)?; } - std::fs::remove_file(&marker_file) - .with_context(|| format!("unlink attach marker file {marker_file}"))?; - crashsafe::fsync(marker_file.parent().expect("marker file has parent dir")) - .context("fsync tenant directory after unlinking attach marker file")?; + if let AttachMarkerMode::Expect = expect_marker { + std::fs::remove_file(&marker_file) + .with_context(|| format!("unlink attach marker file {marker_file}"))?; + crashsafe::fsync(marker_file.parent().expect("marker file has parent dir")) + .context("fsync tenant directory after unlinking attach marker file")?; + } crate::failpoint_support::sleep_millis_async!("attach-before-activate"); diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 0db6213bbf..989f2cd779 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -458,7 +458,10 @@ impl DeleteTenantFlow { .await .expect("cant be stopping or broken"); - tenant.attach(ctx).await.context("attach")?; + tenant + .attach(ctx, super::AttachMarkerMode::Expect) + .await + .context("attach")?; Self::background( guard, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 83ba49e1f5..3b6039db93 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -27,7 +27,8 @@ use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt}; use crate::tenant::delete::DeleteTenantFlow; use crate::tenant::{ - create_tenant_files, AttachedTenantConf, CreateTenantFilesMode, Tenant, TenantState, + create_tenant_files, AttachMarkerMode, AttachedTenantConf, CreateTenantFilesMode, Tenant, + TenantState, }; use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX}; @@ -151,6 +152,49 @@ async fn safe_rename_tenant_dir(path: impl AsRef) -> std::io::Result> = Lazy::new(|| RwLock::new(TenantsMap::Initializing)); +/// Create a directory, including parents. This does no fsyncs and makes +/// no guarantees about the persistence of the resulting metadata: for +/// use when creating dirs for use as cache. +async fn unsafe_create_dir_all(path: &Utf8PathBuf) -> std::io::Result<()> { + let mut dirs_to_create = Vec::new(); + let mut path: &Utf8Path = path.as_ref(); + + // Figure out which directories we need to create. + loop { + let meta = tokio::fs::metadata(path).await; + match meta { + Ok(metadata) if metadata.is_dir() => break, + Ok(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!("non-directory found in path: {path}"), + )); + } + Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(e) => return Err(e), + } + + dirs_to_create.push(path); + + match path.parent() { + Some(parent) => path = parent, + None => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("can't find parent of path '{path}'"), + )); + } + } + } + + // Create directories from parent to child. + for &path in dirs_to_create.iter().rev() { + tokio::fs::create_dir(path).await?; + } + + Ok(()) +} + fn emergency_generations( tenant_confs: &HashMap>, ) -> HashMap { @@ -446,7 +490,15 @@ pub(crate) fn schedule_local_tenant_processing( "attaching mark file present but no remote storage configured".to_string(), ) } else { - match Tenant::spawn_attach(conf, tenant_id, resources, location_conf, tenants, ctx) { + match Tenant::spawn_attach( + conf, + tenant_id, + resources, + location_conf, + tenants, + AttachMarkerMode::Expect, + ctx, + ) { Ok(tenant) => tenant, Err(e) => { error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); @@ -655,7 +707,7 @@ pub(crate) async fn set_new_tenant_config( Ok(()) } -#[instrument(skip_all, fields(tenant_id, new_location_config))] +#[instrument(skip_all, fields(%tenant_id))] pub(crate) async fn upsert_location( conf: &'static PageServerConf, tenant_id: TenantId, @@ -734,36 +786,61 @@ pub(crate) async fn upsert_location( } let new_slot = match &new_location_config.mode { - LocationMode::Secondary(_) => TenantSlot::Secondary, - LocationMode::Attached(_attach_config) => { - // Do a schedule_local_tenant_processing - // FIXME: should avoid doing this disk I/O inside the TenantsMap lock, - // we have the same problem in load_tenant/attach_tenant. Probably - // need a lock in TenantSlot to fix this. + LocationMode::Secondary(_) => { + let tenant_path = conf.tenant_path(&tenant_id); + // Directory doesn't need to be fsync'd because if we crash it can + // safely be recreated next time this tenant location is configured. + unsafe_create_dir_all(&tenant_path) + .await + .with_context(|| format!("Creating {tenant_path}"))?; + Tenant::persist_tenant_config(conf, &tenant_id, &new_location_config) .await .map_err(SetNewTenantConfigError::Persist)?; - let tenant_path = conf.tenant_path(&tenant_id); - let resources = TenantSharedResources { - broker_client, - remote_storage, - deletion_queue_client, - }; - let new_tenant = schedule_local_tenant_processing( + + TenantSlot::Secondary + } + LocationMode::Attached(_attach_config) => { + // FIXME: should avoid doing this disk I/O inside the TenantsMap lock, + // we have the same problem in load_tenant/attach_tenant. Probably + // need a lock in TenantSlot to fix this. + let timelines_path = conf.timelines_path(&tenant_id); + + // Directory doesn't need to be fsync'd because we do not depend on + // it to exist after crashes: it may be recreated when tenant is + // re-attached, see https://github.com/neondatabase/neon/issues/5550 + unsafe_create_dir_all(&timelines_path) + .await + .with_context(|| format!("Creating {timelines_path}"))?; + + Tenant::persist_tenant_config(conf, &tenant_id, &new_location_config) + .await + .map_err(SetNewTenantConfigError::Persist)?; + + let tenant = match Tenant::spawn_attach( conf, tenant_id, - &tenant_path, + TenantSharedResources { + broker_client, + remote_storage, + deletion_queue_client, + }, AttachedTenantConf::try_from(new_location_config)?, - resources, - None, &TENANTS, + // The LocationConf API does not use marker files, because we have Secondary + // locations where the directory's existence is not a signal that it contains + // all timelines. See https://github.com/neondatabase/neon/issues/5550 + AttachMarkerMode::Ignore, ctx, - ) - .with_context(|| { - format!("Failed to schedule tenant processing in path {tenant_path:?}") - })?; + ) { + Ok(tenant) => tenant, + Err(e) => { + error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); + Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}")) + } + }; - TenantSlot::Attached(new_tenant) + TenantSlot::Attached(tenant) } }; @@ -771,7 +848,6 @@ pub(crate) async fn upsert_location( }) .await?; } - Ok(()) }