From b06dffe3dc46330cd3a86a13b8ffd66cd75cdb49 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 17 Oct 2023 09:21:31 +0100 Subject: [PATCH] pageserver: fixes to `/location_config` API (#5548) ## Problem I found some issues with the `/location_config` API when writing new tests. ## Summary of changes - Calling the API with the "Detached" state is now idempotent. - `Tenant::spawn_attach` now takes a boolean to indicate whether to expect a marker file. Marker files are used in the old attach path, but not in the new location conf API. They aren't needed because in the New World, the choice of whether to attach via remote state ("attach") or to trust local state ("load") will be revised to cope with the transitions between secondary & attached (see https://github.com/neondatabase/neon/issues/5550). It is okay to merge this change ahead of that ticket, because the API is not used in the wild yet. - Instead of using `schedule_local_tenant_processing`, the location conf API handler does its own directory creation and calls `spawn_attach` directly. - A new `unsafe_create_dir_all` is added. This differs from crashsafe::create_dir_all in two ways: - It is intentionally not crashsafe, because in the location conf API we are no longer using directory or config existence as the signal for any important business logic. - It is async and uses `tokio::fs`. --- pageserver/src/http/routes.rs | 12 ++- pageserver/src/tenant.rs | 41 +++++++---- pageserver/src/tenant/delete.rs | 5 +- pageserver/src/tenant/mgr.rs | 126 +++++++++++++++++++++++++------- 4 files changed, 143 insertions(+), 41 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 38304eb16b..c00dad50ac 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1034,9 +1034,17 @@ async fn put_tenant_location_config_handler( // The `Detached` state is special, it doesn't upsert a tenant, it removes // its local disk content and drops it from memory. if let LocationConfigMode::Detached = request_data.config.mode { - mgr::detach_tenant(conf, tenant_id, true, &state.deletion_queue_client) + if let Err(e) = mgr::detach_tenant(conf, tenant_id, true, &state.deletion_queue_client) .instrument(info_span!("tenant_detach", %tenant_id)) - .await?; + .await + { + match e { + TenantStateError::NotFound(_) => { + // This API is idempotent: a NotFound on a detach is fine. + } + _ => return Err(e.into()), + } + } return json_response(StatusCode::OK, ()); } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 88b2423eec..185d7954ea 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -420,6 +420,12 @@ pub enum CreateTimelineError { Other(#[from] anyhow::Error), } +/// spawn_attach argument for whether the caller is using attachment markers +pub(super) enum AttachMarkerMode { + Expect, + Ignore, +} + struct TenantDirectoryScan { sorted_timelines_to_load: Vec<(TimelineId, TimelineMetadata)>, timelines_to_resume_deletion: Vec<(TimelineId, Option)>, @@ -567,6 +573,7 @@ impl Tenant { resources: TenantSharedResources, attached_conf: AttachedTenantConf, tenants: &'static tokio::sync::RwLock, + expect_marker: AttachMarkerMode, ctx: &RequestContext, ) -> anyhow::Result> { // TODO dedup with spawn_load @@ -648,7 +655,7 @@ impl Tenant { } } - match tenant_clone.attach(&ctx).await { + match tenant_clone.attach(&ctx, expect_marker).await { Ok(()) => { info!("attach finished, activating"); tenant_clone.activate(broker_client, None, &ctx); @@ -673,17 +680,23 @@ impl Tenant { /// /// No background tasks are started as part of this routine. /// - async fn attach(self: &Arc, ctx: &RequestContext) -> anyhow::Result<()> { + async fn attach( + self: &Arc, + ctx: &RequestContext, + expect_marker: AttachMarkerMode, + ) -> anyhow::Result<()> { span::debug_assert_current_span_has_tenant_id(); let marker_file = self.conf.tenant_attaching_mark_file_path(&self.tenant_id); - if !tokio::fs::try_exists(&marker_file) - .await - .context("check for existence of marker file")? - { - anyhow::bail!( - "implementation error: marker file should exist at beginning of this function" - ); + if let AttachMarkerMode::Expect = expect_marker { + if !tokio::fs::try_exists(&marker_file) + .await + .context("check for existence of marker file")? + { + anyhow::bail!( + "implementation error: marker file should exist at beginning of this function" + ); + } } // Get list of remote timelines @@ -805,10 +818,12 @@ impl Tenant { .map_err(LoadLocalTimelineError::ResumeDeletion)?; } - std::fs::remove_file(&marker_file) - .with_context(|| format!("unlink attach marker file {marker_file}"))?; - crashsafe::fsync(marker_file.parent().expect("marker file has parent dir")) - .context("fsync tenant directory after unlinking attach marker file")?; + if let AttachMarkerMode::Expect = expect_marker { + std::fs::remove_file(&marker_file) + .with_context(|| format!("unlink attach marker file {marker_file}"))?; + crashsafe::fsync(marker_file.parent().expect("marker file has parent dir")) + .context("fsync tenant directory after unlinking attach marker file")?; + } crate::failpoint_support::sleep_millis_async!("attach-before-activate"); diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 0db6213bbf..989f2cd779 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -458,7 +458,10 @@ impl DeleteTenantFlow { .await .expect("cant be stopping or broken"); - tenant.attach(ctx).await.context("attach")?; + tenant + .attach(ctx, super::AttachMarkerMode::Expect) + .await + .context("attach")?; Self::background( guard, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 83ba49e1f5..3b6039db93 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -27,7 +27,8 @@ use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt}; use crate::tenant::delete::DeleteTenantFlow; use crate::tenant::{ - create_tenant_files, AttachedTenantConf, CreateTenantFilesMode, Tenant, TenantState, + create_tenant_files, AttachMarkerMode, AttachedTenantConf, CreateTenantFilesMode, Tenant, + TenantState, }; use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX}; @@ -151,6 +152,49 @@ async fn safe_rename_tenant_dir(path: impl AsRef) -> std::io::Result> = Lazy::new(|| RwLock::new(TenantsMap::Initializing)); +/// Create a directory, including parents. This does no fsyncs and makes +/// no guarantees about the persistence of the resulting metadata: for +/// use when creating dirs for use as cache. +async fn unsafe_create_dir_all(path: &Utf8PathBuf) -> std::io::Result<()> { + let mut dirs_to_create = Vec::new(); + let mut path: &Utf8Path = path.as_ref(); + + // Figure out which directories we need to create. + loop { + let meta = tokio::fs::metadata(path).await; + match meta { + Ok(metadata) if metadata.is_dir() => break, + Ok(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!("non-directory found in path: {path}"), + )); + } + Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(e) => return Err(e), + } + + dirs_to_create.push(path); + + match path.parent() { + Some(parent) => path = parent, + None => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("can't find parent of path '{path}'"), + )); + } + } + } + + // Create directories from parent to child. + for &path in dirs_to_create.iter().rev() { + tokio::fs::create_dir(path).await?; + } + + Ok(()) +} + fn emergency_generations( tenant_confs: &HashMap>, ) -> HashMap { @@ -446,7 +490,15 @@ pub(crate) fn schedule_local_tenant_processing( "attaching mark file present but no remote storage configured".to_string(), ) } else { - match Tenant::spawn_attach(conf, tenant_id, resources, location_conf, tenants, ctx) { + match Tenant::spawn_attach( + conf, + tenant_id, + resources, + location_conf, + tenants, + AttachMarkerMode::Expect, + ctx, + ) { Ok(tenant) => tenant, Err(e) => { error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); @@ -655,7 +707,7 @@ pub(crate) async fn set_new_tenant_config( Ok(()) } -#[instrument(skip_all, fields(tenant_id, new_location_config))] +#[instrument(skip_all, fields(%tenant_id))] pub(crate) async fn upsert_location( conf: &'static PageServerConf, tenant_id: TenantId, @@ -734,36 +786,61 @@ pub(crate) async fn upsert_location( } let new_slot = match &new_location_config.mode { - LocationMode::Secondary(_) => TenantSlot::Secondary, - LocationMode::Attached(_attach_config) => { - // Do a schedule_local_tenant_processing - // FIXME: should avoid doing this disk I/O inside the TenantsMap lock, - // we have the same problem in load_tenant/attach_tenant. Probably - // need a lock in TenantSlot to fix this. + LocationMode::Secondary(_) => { + let tenant_path = conf.tenant_path(&tenant_id); + // Directory doesn't need to be fsync'd because if we crash it can + // safely be recreated next time this tenant location is configured. + unsafe_create_dir_all(&tenant_path) + .await + .with_context(|| format!("Creating {tenant_path}"))?; + Tenant::persist_tenant_config(conf, &tenant_id, &new_location_config) .await .map_err(SetNewTenantConfigError::Persist)?; - let tenant_path = conf.tenant_path(&tenant_id); - let resources = TenantSharedResources { - broker_client, - remote_storage, - deletion_queue_client, - }; - let new_tenant = schedule_local_tenant_processing( + + TenantSlot::Secondary + } + LocationMode::Attached(_attach_config) => { + // FIXME: should avoid doing this disk I/O inside the TenantsMap lock, + // we have the same problem in load_tenant/attach_tenant. Probably + // need a lock in TenantSlot to fix this. + let timelines_path = conf.timelines_path(&tenant_id); + + // Directory doesn't need to be fsync'd because we do not depend on + // it to exist after crashes: it may be recreated when tenant is + // re-attached, see https://github.com/neondatabase/neon/issues/5550 + unsafe_create_dir_all(&timelines_path) + .await + .with_context(|| format!("Creating {timelines_path}"))?; + + Tenant::persist_tenant_config(conf, &tenant_id, &new_location_config) + .await + .map_err(SetNewTenantConfigError::Persist)?; + + let tenant = match Tenant::spawn_attach( conf, tenant_id, - &tenant_path, + TenantSharedResources { + broker_client, + remote_storage, + deletion_queue_client, + }, AttachedTenantConf::try_from(new_location_config)?, - resources, - None, &TENANTS, + // The LocationConf API does not use marker files, because we have Secondary + // locations where the directory's existence is not a signal that it contains + // all timelines. See https://github.com/neondatabase/neon/issues/5550 + AttachMarkerMode::Ignore, ctx, - ) - .with_context(|| { - format!("Failed to schedule tenant processing in path {tenant_path:?}") - })?; + ) { + Ok(tenant) => tenant, + Err(e) => { + error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); + Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}")) + } + }; - TenantSlot::Attached(new_tenant) + TenantSlot::Attached(tenant) } }; @@ -771,7 +848,6 @@ pub(crate) async fn upsert_location( }) .await?; } - Ok(()) }