From f28bf70596030e39be09b41f9a1e92b9229dccf0 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 24 Nov 2022 08:44:20 -0500 Subject: [PATCH] tenant creation: re-use load_local_tenant() --- pageserver/src/http/routes.rs | 28 +++++++++++++++++++---- pageserver/src/tenant.rs | 31 +++++--------------------- pageserver/src/tenant_mgr.rs | 42 ++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 40 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e0a74ba2f0..0ca1d4aa04 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -558,7 +558,7 @@ async fn tenant_create_handler(mut request: Request) -> Result) -> Result) -> Result json_response(StatusCode::CREATED, TenantCreateResponse(id))?, + Ok(match new_tenant { + Some(tenant) => { + // We created the tenant. Existing API semantics are that the tenant + // is Active when this function returns. + match tenant.wait_to_become_active().await { + res @ Err(_) => { + // This shouldn't happen because we just created the tenant directory + // in tenant_mgr::create_tenat, and there aren't any remote timelines + // to load, so, nothing can really fail during load. + // Don't do cleanup because we don't know how we got here. + // Then tenant will likekly be in `Broken` state and subsequent + // calls will fail. + res.context("created tenant failed to become active") + .map_err(ApiError::InternalServerError)?; + } + Ok(_) => (), + } + json_response( + StatusCode::CREATED, + TenantCreateResponse(tenant.tenant_id()), + )? + } None => json_response(StatusCode::CONFLICT, ())?, }) } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5a837dec70..effd4c0249 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -705,29 +705,6 @@ impl Tenant { .await } - pub fn create_tenant( - conf: &'static PageServerConf, - tenant_conf: TenantConfOpt, - tenant_id: TenantId, - remote_storage: Option, - ) -> anyhow::Result> { - let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); - create_tenant_files(conf, tenant_conf, tenant_id)?; - // create tenant in Loading state, and activate it immediately so that it is - // possible to issue create_timeline request on it. - let tenant = Arc::new(Tenant::new( - TenantState::Loading, - conf, - tenant_conf, - wal_redo_manager, - tenant_id, - remote_storage, - )); - tenant.activate()?; - - Ok(tenant) - } - /// Create a placeholder Tenant object for a broken tenant pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc { let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); @@ -2234,11 +2211,11 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a Ok(()) } -fn create_tenant_files( +pub(crate) fn create_tenant_files( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, -) -> anyhow::Result<()> { +) -> anyhow::Result { let target_tenant_directory = conf.tenant_path(&tenant_id); anyhow::ensure!( !target_tenant_directory.exists(), @@ -2279,7 +2256,9 @@ fn create_tenant_files( } } - creation_result + creation_result?; + + Ok(target_tenant_directory) } fn try_create_target_tenant_dir( diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 2965553848..cb1529bfc5 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -78,7 +78,10 @@ pub fn init_tenant_mgr( } } else { match load_local_tenant(conf, &tenant_dir_path, remote_storage.clone()) { - Ok(Some(_)) => number_of_tenants += 1, + Ok(Some(tenant)) => { + tenants_state::write_tenants().insert(tenant.tenant_id(), tenant); + number_of_tenants += 1; + } Ok(None) => { // This case happens if we crash during attach before creating the attach marker file if let Err(e) = std::fs::remove_dir(&tenant_dir_path) { @@ -121,7 +124,7 @@ fn load_local_tenant( conf: &'static PageServerConf, tenant_path: &Path, remote_storage: Option, -) -> anyhow::Result> { +) -> anyhow::Result>> { if !tenant_path.is_dir() { anyhow::bail!("tenant_path is not a directory: {tenant_path:?}") } @@ -154,8 +157,7 @@ fn load_local_tenant( // Start loading the tenant into memory. It will initially be in Loading state. Tenant::spawn_load(conf, tenant_id, remote_storage)? }; - tenants_state::write_tenants().insert(tenant_id, tenant); - Ok(Some(())) + Ok(Some(tenant)) } /// @@ -201,18 +203,38 @@ pub fn create_tenant( conf: &'static PageServerConf, tenant_conf: TenantConfOpt, tenant_id: TenantId, - remote_storage: Option<&GenericRemoteStorage>, -) -> anyhow::Result> { + remote_storage: Option, +) -> anyhow::Result>> { match tenants_state::write_tenants().entry(tenant_id) { hash_map::Entry::Occupied(_) => { debug!("tenant {tenant_id} already exists"); Ok(None) } hash_map::Entry::Vacant(v) => { - let tenant = - Tenant::create_tenant(conf, tenant_conf, tenant_id, remote_storage.cloned())?; - v.insert(tenant); - Ok(Some(tenant_id)) + // Hold the write_tenants() lock, since all of this is local IO. + // If this section ever becomes contentious, introduce a new `TenantState::Creating`. + let tenant_directory = super::tenant::create_tenant_files(conf, tenant_conf, tenant_id) + .context("create tenant files")?; + let created_tenant = load_local_tenant(conf, &tenant_directory, remote_storage) + .context("load created tenant")?; + match created_tenant { + None => { + // We get None in case the directory is empty. + // This shouldn't happen here, because we just created the directory. + // So, skip any cleanup work for now, we don't know how we reached this state. + anyhow::bail!("we just created the tenant directory, it can't be empty"); + } + Some(tenant) => { + anyhow::ensure!( + tenant_id == tenant.tenant_id(), + "loaded created tenant has unexpected tenant id (expect {} != actual {})", + tenant_id, + tenant.tenant_id() + ); + v.insert(Arc::clone(&tenant)); + Ok(Some(tenant)) + } + } } } }