diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs
index 3e9281ac81..d55823b0b7 100644
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -131,7 +131,9 @@ pub fn api_error_handler(api_error: ApiError) -> Response
{
ApiError::ResourceUnavailable(_) => info!("Error processing HTTP request: {api_error:#}"),
ApiError::NotFound(_) => info!("Error processing HTTP request: {api_error:#}"),
ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"),
- _ => error!("Error processing HTTP request: {api_error:#}"),
+ ApiError::ShuttingDown => info!("Shut down while processing HTTP request"),
+ ApiError::Timeout(_) => info!("Timeout while processing HTTP request: {api_error:#}"),
+ _ => info!("Error processing HTTP request: {api_error:#}"),
}
api_error.into_response()
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 5e09a5aa1a..aa56806246 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -187,6 +187,7 @@ impl From for ApiError {
match e {
InternalError(e) => ApiError::InternalServerError(anyhow::anyhow!("{e}")),
MapState(e) => e.into(),
+ ShuttingDown(_) => ApiError::ShuttingDown,
}
}
}
@@ -495,6 +496,10 @@ async fn timeline_create_handler(
.map_err(ApiError::InternalServerError)?;
json_response(StatusCode::CREATED, timeline_info)
}
+ Err(_) if tenant.cancel.is_cancelled() => {
+ // In case we get some ugly error type during shutdown, cast it into a clean 503.
+ json_response(StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg("Tenant shutting down".to_string()))
+ }
Err(tenant::CreateTimelineError::Conflict | tenant::CreateTimelineError::AlreadyCreating) => {
json_response(StatusCode::CONFLICT, ())
}
@@ -1257,19 +1262,9 @@ async fn tenant_create_handler(
};
// We created the tenant. Existing API semantics are that the tenant
// is Active when this function returns.
- if let res @ Err(_) = new_tenant
+ new_tenant
.wait_to_become_active(ACTIVE_TENANT_TIMEOUT)
- .await
- {
- // This shouldn't happen because we just created the tenant directory
- // in upsert_location, and there aren't any remote timelines
- // to load, so, nothing can really fail during load.
- // Don't do cleanup because we don't know how we got here.
- // The tenant will likely be in `Broken` state and subsequent
- // calls will fail.
- res.context("created tenant failed to become active")
- .map_err(ApiError::InternalServerError)?;
- }
+ .await?;
json_response(
StatusCode::CREATED,
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 31af54d146..7bb5881aab 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -627,9 +627,15 @@ impl Tenant {
deletion_queue_client,
));
+ // The attach task will carry a GateGuard, so that shutdown() reliably waits for it to drop out if
+ // we shut down while attaching.
+ let Ok(attach_gate_guard) = tenant.gate.enter() else {
+ // We just created the Tenant: nothing else can have shut it down yet
+ unreachable!();
+ };
+
// Do all the hard work in the background
let tenant_clone = Arc::clone(&tenant);
-
let ctx = ctx.detached_child(TaskKind::Attach, DownloadBehavior::Warn);
task_mgr::spawn(
&tokio::runtime::Handle::current(),
@@ -639,6 +645,8 @@ impl Tenant {
"attach tenant",
false,
async move {
+ let _gate_guard = attach_gate_guard;
+
// Is this tenant being spawned as part of process startup?
let starting_up = init_order.is_some();
scopeguard::defer! {
@@ -813,7 +821,7 @@ impl Tenant {
SpawnMode::Create => None,
SpawnMode::Normal => {Some(TENANT.attach.start_timer())}
};
- match tenant_clone.attach(preload, &ctx).await {
+ match tenant_clone.attach(preload, mode, &ctx).await {
Ok(()) => {
info!("attach finished, activating");
if let Some(t)= attach_timer {t.observe_duration();}
@@ -900,15 +908,20 @@ impl Tenant {
async fn attach(
self: &Arc,
preload: Option,
+ mode: SpawnMode,
ctx: &RequestContext,
) -> anyhow::Result<()> {
span::debug_assert_current_span_has_tenant_id();
failpoint_support::sleep_millis_async!("before-attaching-tenant");
- let preload = match preload {
- Some(p) => p,
- None => {
+ let preload = match (preload, mode) {
+ (Some(p), _) => p,
+ (None, SpawnMode::Create) => TenantPreload {
+ deleting: false,
+ timelines: HashMap::new(),
+ },
+ (None, SpawnMode::Normal) => {
// Deprecated dev mode: load from local disk state instead of remote storage
// https://github.com/neondatabase/neon/issues/5624
return self.load_local(ctx).await;
@@ -1683,9 +1696,13 @@ impl Tenant {
ctx: &RequestContext,
) -> Result, CreateTimelineError> {
if !self.is_active() {
- return Err(CreateTimelineError::Other(anyhow::anyhow!(
- "Cannot create timelines on inactive tenant"
- )));
+ if matches!(self.current_state(), TenantState::Stopping { .. }) {
+ return Err(CreateTimelineError::ShuttingDown);
+ } else {
+ return Err(CreateTimelineError::Other(anyhow::anyhow!(
+ "Cannot create timelines on inactive tenant"
+ )));
+ }
}
let _gate = self
@@ -4035,7 +4052,7 @@ pub(crate) mod harness {
.instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
.await?;
tenant
- .attach(Some(preload), ctx)
+ .attach(Some(preload), SpawnMode::Normal, ctx)
.instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
.await?;
}
diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs
index ecffd4e6c1..97de0cdcf9 100644
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -409,7 +409,10 @@ impl DeleteTenantFlow {
.await
.expect("cant be stopping or broken");
- tenant.attach(preload, ctx).await.context("attach")?;
+ tenant
+ .attach(preload, super::SpawnMode::Normal, ctx)
+ .await
+ .context("attach")?;
Self::background(
guard,
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 84c7a20247..32535e0134 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -7,6 +7,7 @@ use pageserver_api::models::ShardParameters;
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId};
use rand::{distributions::Alphanumeric, Rng};
use std::borrow::Cow;
+use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap};
use std::ops::Deref;
use std::sync::Arc;
@@ -32,7 +33,8 @@ use crate::deletion_queue::DeletionQueueClient;
use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
use crate::task_mgr::{self, TaskKind};
use crate::tenant::config::{
- AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, TenantConfOpt,
+ AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
+ TenantConfOpt,
};
use crate::tenant::delete::DeleteTenantFlow;
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
@@ -466,6 +468,26 @@ pub async fn init_tenant_mgr(
// We have a generation map: treat it as the authority for whether
// this tenant is really attached.
if let Some(gen) = generations.get(&tenant_shard_id) {
+ if let LocationMode::Attached(attached) = &location_conf.mode {
+ if attached.generation > *gen {
+ tracing::error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+ "Control plane gave decreasing generation ({gen:?}) in re-attach response for tenant that was attached in generation {:?}, demoting to secondary",
+ attached.generation
+ );
+
+ // We cannot safely attach this tenant given a bogus generation number, but let's avoid throwing away
+ // local disk content: demote to secondary rather than detaching.
+ tenants.insert(
+ tenant_shard_id,
+ TenantSlot::Secondary(SecondaryTenant::new(
+ tenant_shard_id,
+ location_conf.shard,
+ location_conf.tenant_conf,
+ &SecondaryLocationConfig { warm: false },
+ )),
+ );
+ }
+ }
*gen
} else {
match &location_conf.mode {
@@ -721,7 +743,7 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock) {
tokio::select! {
Some(joined) = join_set.join_next() => {
match joined {
- Ok(()) => {}
+ Ok(()) => {},
Err(join_error) if join_error.is_cancelled() => {
unreachable!("we are not cancelling any of the tasks");
}
@@ -882,7 +904,7 @@ impl TenantManager {
tenant_shard_id: TenantShardId,
new_location_config: LocationConf,
flush: Option,
- spawn_mode: SpawnMode,
+ mut spawn_mode: SpawnMode,
ctx: &RequestContext,
) -> Result