From 32dcf6dafa110c9c8ce87de03ecf5cdae4a5ae35 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 19 Oct 2023 09:20:08 +0100 Subject: [PATCH] pageserver: unified spawn() method --- pageserver/src/tenant.rs | 126 +++++++++-------------------------- pageserver/src/tenant/mgr.rs | 16 +++-- 2 files changed, 40 insertions(+), 102 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5f37d502cb..d52a2cc54e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -195,6 +195,13 @@ pub(crate) struct TenantPreload { timelines: HashMap, } +/// When we spawn a tenant, there is a special mode for tenant creation that +/// avoids trying to read anything from remote storage. +pub(crate) enum SpawnMode { + Normal, + Create, +} + /// /// Tenant consists of multiple timelines. Keep them in a hash table. /// @@ -504,84 +511,6 @@ impl Tenant { Ok(()) } - /// Special case for starting a Tenant: immediately after tenant creation, a Tenant has no - /// timelines and `spawn_attach` will not work. - /// - /// This function can go away once we always create Tenants with an initial timeline - /// See: `` - pub(crate) fn spawn_create( - conf: &'static PageServerConf, - tenant_id: TenantId, - resources: TenantSharedResources, - attached_conf: AttachedTenantConf, - ctx: &RequestContext, - ) -> anyhow::Result> { - let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new( - conf, tenant_id, - ))); - - let TenantSharedResources { - broker_client, - remote_storage, - deletion_queue_client, - } = resources; - - let tenant = Arc::new(Tenant::new( - TenantState::Attaching, - conf, - attached_conf, - wal_redo_manager, - tenant_id, - remote_storage.clone(), - deletion_queue_client, - )); - - // Do all the hard work in the background - let tenant_clone = Arc::clone(&tenant); - - let ctx = ctx.detached_child(TaskKind::Attach, DownloadBehavior::Warn); - task_mgr::spawn( - &tokio::runtime::Handle::current(), - TaskKind::Attach, - Some(tenant_id), - None, - "create tenant", - false, - async move { - // Ideally we should use Tenant::set_broken_no_wait, but it is not supposed to be used when tenant is in loading state. - let make_broken = |t: &Tenant, err: anyhow::Error| { - error!("creation failed, setting tenant state to Broken: {err:?}"); - t.state.send_modify(|state| { - assert_eq!( - *state, - TenantState::Attaching, - "the attach task owns the tenant state until activation is complete" - ); - *state = TenantState::broken_from_reason(err.to_string()); - }); - }; - - match tenant_clone.load_local(None, &ctx).await { - Ok(()) => { - info!("attach finished, activating"); - tenant_clone.activate(broker_client, None, &ctx); - } - Err(e) => { - make_broken(&tenant_clone, anyhow::anyhow!(e)); - } - } - Ok(()) - } - .instrument({ - let span = tracing::info_span!(parent: None, "spawn_create", tenant_id=%tenant_id); - span.follows_from(Span::current()); - span - }), - ); - Ok(tenant) - } - - /// /// Attach a tenant that's available in cloud storage. /// /// This returns quickly, after just creating the in-memory object @@ -591,13 +520,15 @@ impl Tenant { /// finishes. You can use wait_until_active() to wait for the task to /// complete. /// - pub(crate) fn spawn_attach( + #[allow(clippy::too_many_arguments)] + pub(crate) fn spawn( conf: &'static PageServerConf, tenant_id: TenantId, resources: TenantSharedResources, attached_conf: AttachedTenantConf, init_order: Option, tenants: &'static tokio::sync::RwLock, + mode: SpawnMode, ctx: &RequestContext, ) -> anyhow::Result> { let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new( @@ -658,22 +589,27 @@ impl Tenant { .as_mut() .and_then(|x| x.initial_tenant_load_remote.take()); - let preload = match &remote_storage { - Some(remote_storage) => Some( - match tenant_clone - .preload(remote_storage, task_mgr::shutdown_token()) - .instrument( - tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_id), - ) - .await { - Ok(p) => p, - Err(e) => { - make_broken(&tenant_clone, anyhow::anyhow!(e)); - return Ok(()); - } - }, - ), - None => None, + let preload = match mode { + SpawnMode::Create => {None}, + SpawnMode::Normal => { + match &remote_storage { + Some(remote_storage) => Some( + match tenant_clone + .preload(remote_storage, task_mgr::shutdown_token()) + .instrument( + tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_id), + ) + .await { + Ok(p) => p, + Err(e) => { + make_broken(&tenant_clone, anyhow::anyhow!(e)); + return Ok(()); + } + }, + ), + None => None, + } + } }; // Remote preload is complete. diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 0b8a391d7e..9df119f6e8 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -26,7 +26,7 @@ use crate::deletion_queue::DeletionQueueClient; use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt}; use crate::tenant::delete::DeleteTenantFlow; -use crate::tenant::{create_tenant_files, AttachedTenantConf, Tenant, TenantState}; +use crate::tenant::{create_tenant_files, AttachedTenantConf, SpawnMode, Tenant, TenantState}; use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX}; use utils::crashsafe::path_with_suffix_extension; @@ -476,18 +476,19 @@ pub(crate) fn schedule_local_tenant_processing( ); info!("Attaching tenant {tenant_id}"); - let tenant = match Tenant::spawn_attach( + let tenant = match Tenant::spawn( conf, tenant_id, resources, location_conf, init_order, tenants, + SpawnMode::Normal, ctx, ) { Ok(tenant) => tenant, Err(e) => { - error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); + error!("Failed to spawn tenant {tenant_id}, reason: {e:#}"); Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}")) } }; @@ -638,8 +639,8 @@ pub(crate) async fn create_tenant( // TODO: tenant directory remains on disk if we bail out from here on. // See https://github.com/neondatabase/neon/issues/4233 - let created_tenant = Tenant::spawn_create(conf, tenant_id, resources, - AttachedTenantConf::try_from(location_conf)?, ctx)?; + let created_tenant = Tenant::spawn(conf, tenant_id, resources, + AttachedTenantConf::try_from(location_conf)?, None, &TENANTS, SpawnMode::Create, ctx)?; // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here. // See https://github.com/neondatabase/neon/issues/4233 @@ -790,7 +791,7 @@ pub(crate) async fn upsert_location( .await .map_err(SetNewTenantConfigError::Persist)?; - let tenant = match Tenant::spawn_attach( + let tenant = match Tenant::spawn( conf, tenant_id, TenantSharedResources { @@ -801,11 +802,12 @@ pub(crate) async fn upsert_location( AttachedTenantConf::try_from(new_location_config)?, None, &TENANTS, + SpawnMode::Normal, ctx, ) { Ok(tenant) => tenant, Err(e) => { - error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}"); + error!("Failed to spawn tenant {tenant_id}, reason: {e:#}"); Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}")) } };