diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs index 3e9281ac81..d55823b0b7 100644 --- a/libs/utils/src/http/error.rs +++ b/libs/utils/src/http/error.rs @@ -131,7 +131,9 @@ pub fn api_error_handler(api_error: ApiError) -> Response { ApiError::ResourceUnavailable(_) => info!("Error processing HTTP request: {api_error:#}"), ApiError::NotFound(_) => info!("Error processing HTTP request: {api_error:#}"), ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"), - _ => error!("Error processing HTTP request: {api_error:#}"), + ApiError::ShuttingDown => info!("Shut down while processing HTTP request"), + ApiError::Timeout(_) => info!("Timeout while processing HTTP request: {api_error:#}"), + _ => info!("Error processing HTTP request: {api_error:#}"), } api_error.into_response() diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 5e09a5aa1a..aa56806246 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -187,6 +187,7 @@ impl From for ApiError { match e { InternalError(e) => ApiError::InternalServerError(anyhow::anyhow!("{e}")), MapState(e) => e.into(), + ShuttingDown(_) => ApiError::ShuttingDown, } } } @@ -495,6 +496,10 @@ async fn timeline_create_handler( .map_err(ApiError::InternalServerError)?; json_response(StatusCode::CREATED, timeline_info) } + Err(_) if tenant.cancel.is_cancelled() => { + // In case we get some ugly error type during shutdown, cast it into a clean 503. + json_response(StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg("Tenant shutting down".to_string())) + } Err(tenant::CreateTimelineError::Conflict | tenant::CreateTimelineError::AlreadyCreating) => { json_response(StatusCode::CONFLICT, ()) } @@ -1257,19 +1262,9 @@ async fn tenant_create_handler( }; // We created the tenant. Existing API semantics are that the tenant // is Active when this function returns. - if let res @ Err(_) = new_tenant + new_tenant .wait_to_become_active(ACTIVE_TENANT_TIMEOUT) - .await - { - // This shouldn't happen because we just created the tenant directory - // in upsert_location, and there aren't any remote timelines - // to load, so, nothing can really fail during load. - // Don't do cleanup because we don't know how we got here. - // The tenant will likely be in `Broken` state and subsequent - // calls will fail. - res.context("created tenant failed to become active") - .map_err(ApiError::InternalServerError)?; - } + .await?; json_response( StatusCode::CREATED, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 31af54d146..7bb5881aab 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -627,9 +627,15 @@ impl Tenant { deletion_queue_client, )); + // The attach task will carry a GateGuard, so that shutdown() reliably waits for it to drop out if + // we shut down while attaching. + let Ok(attach_gate_guard) = tenant.gate.enter() else { + // We just created the Tenant: nothing else can have shut it down yet + unreachable!(); + }; + // Do all the hard work in the background let tenant_clone = Arc::clone(&tenant); - let ctx = ctx.detached_child(TaskKind::Attach, DownloadBehavior::Warn); task_mgr::spawn( &tokio::runtime::Handle::current(), @@ -639,6 +645,8 @@ impl Tenant { "attach tenant", false, async move { + let _gate_guard = attach_gate_guard; + // Is this tenant being spawned as part of process startup? let starting_up = init_order.is_some(); scopeguard::defer! { @@ -813,7 +821,7 @@ impl Tenant { SpawnMode::Create => None, SpawnMode::Normal => {Some(TENANT.attach.start_timer())} }; - match tenant_clone.attach(preload, &ctx).await { + match tenant_clone.attach(preload, mode, &ctx).await { Ok(()) => { info!("attach finished, activating"); if let Some(t)= attach_timer {t.observe_duration();} @@ -900,15 +908,20 @@ impl Tenant { async fn attach( self: &Arc, preload: Option, + mode: SpawnMode, ctx: &RequestContext, ) -> anyhow::Result<()> { span::debug_assert_current_span_has_tenant_id(); failpoint_support::sleep_millis_async!("before-attaching-tenant"); - let preload = match preload { - Some(p) => p, - None => { + let preload = match (preload, mode) { + (Some(p), _) => p, + (None, SpawnMode::Create) => TenantPreload { + deleting: false, + timelines: HashMap::new(), + }, + (None, SpawnMode::Normal) => { // Deprecated dev mode: load from local disk state instead of remote storage // https://github.com/neondatabase/neon/issues/5624 return self.load_local(ctx).await; @@ -1683,9 +1696,13 @@ impl Tenant { ctx: &RequestContext, ) -> Result, CreateTimelineError> { if !self.is_active() { - return Err(CreateTimelineError::Other(anyhow::anyhow!( - "Cannot create timelines on inactive tenant" - ))); + if matches!(self.current_state(), TenantState::Stopping { .. }) { + return Err(CreateTimelineError::ShuttingDown); + } else { + return Err(CreateTimelineError::Other(anyhow::anyhow!( + "Cannot create timelines on inactive tenant" + ))); + } } let _gate = self @@ -4035,7 +4052,7 @@ pub(crate) mod harness { .instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())) .await?; tenant - .attach(Some(preload), ctx) + .attach(Some(preload), SpawnMode::Normal, ctx) .instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())) .await?; } diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index ecffd4e6c1..97de0cdcf9 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -409,7 +409,10 @@ impl DeleteTenantFlow { .await .expect("cant be stopping or broken"); - tenant.attach(preload, ctx).await.context("attach")?; + tenant + .attach(preload, super::SpawnMode::Normal, ctx) + .await + .context("attach")?; Self::background( guard, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 84c7a20247..32535e0134 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -7,6 +7,7 @@ use pageserver_api::models::ShardParameters; use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId}; use rand::{distributions::Alphanumeric, Rng}; use std::borrow::Cow; +use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap}; use std::ops::Deref; use std::sync::Arc; @@ -32,7 +33,8 @@ use crate::deletion_queue::DeletionQueueClient; use crate::metrics::{TENANT, TENANT_MANAGER as METRICS}; use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::{ - AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, TenantConfOpt, + AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig, + TenantConfOpt, }; use crate::tenant::delete::DeleteTenantFlow; use crate::tenant::span::debug_assert_current_span_has_tenant_id; @@ -466,6 +468,26 @@ pub async fn init_tenant_mgr( // We have a generation map: treat it as the authority for whether // this tenant is really attached. if let Some(gen) = generations.get(&tenant_shard_id) { + if let LocationMode::Attached(attached) = &location_conf.mode { + if attached.generation > *gen { + tracing::error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), + "Control plane gave decreasing generation ({gen:?}) in re-attach response for tenant that was attached in generation {:?}, demoting to secondary", + attached.generation + ); + + // We cannot safely attach this tenant given a bogus generation number, but let's avoid throwing away + // local disk content: demote to secondary rather than detaching. + tenants.insert( + tenant_shard_id, + TenantSlot::Secondary(SecondaryTenant::new( + tenant_shard_id, + location_conf.shard, + location_conf.tenant_conf, + &SecondaryLocationConfig { warm: false }, + )), + ); + } + } *gen } else { match &location_conf.mode { @@ -721,7 +743,7 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock) { tokio::select! { Some(joined) = join_set.join_next() => { match joined { - Ok(()) => {} + Ok(()) => {}, Err(join_error) if join_error.is_cancelled() => { unreachable!("we are not cancelling any of the tasks"); } @@ -882,7 +904,7 @@ impl TenantManager { tenant_shard_id: TenantShardId, new_location_config: LocationConf, flush: Option, - spawn_mode: SpawnMode, + mut spawn_mode: SpawnMode, ctx: &RequestContext, ) -> Result>, UpsertLocationError> { debug_assert_current_span_has_tenant_id(); @@ -902,19 +924,29 @@ impl TenantManager { tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Write)?; match (&new_location_config.mode, peek_slot) { (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => { - if attach_conf.generation == tenant.generation { - // A transition from Attached to Attached in the same generation, we may - // take our fast path and just provide the updated configuration - // to the tenant. - tenant.set_new_location_config( - AttachedTenantConf::try_from(new_location_config.clone()) - .map_err(UpsertLocationError::BadRequest)?, - ); + match attach_conf.generation.cmp(&tenant.generation) { + Ordering::Equal => { + // A transition from Attached to Attached in the same generation, we may + // take our fast path and just provide the updated configuration + // to the tenant. + tenant.set_new_location_config( + AttachedTenantConf::try_from(new_location_config.clone()) + .map_err(UpsertLocationError::BadRequest)?, + ); - Some(FastPathModified::Attached(tenant.clone())) - } else { - // Different generations, fall through to general case - None + Some(FastPathModified::Attached(tenant.clone())) + } + Ordering::Less => { + return Err(UpsertLocationError::BadRequest(anyhow::anyhow!( + "Generation {:?} is less than existing {:?}", + attach_conf.generation, + tenant.generation + ))); + } + Ordering::Greater => { + // Generation advanced, fall through to general case of replacing `Tenant` object + None + } } } ( @@ -1019,6 +1051,12 @@ impl TenantManager { } } slot_guard.drop_old_value().expect("We just shut it down"); + + // Edge case: if we were called with SpawnMode::Create, but a Tenant already existed, then + // the caller thinks they're creating but the tenant already existed. We must switch to + // Normal mode so that when starting this Tenant we properly probe remote storage for timelines, + // rather than assuming it to be empty. + spawn_mode = SpawnMode::Normal; } Some(TenantSlot::Secondary(state)) => { info!("Shutting down secondary tenant"); @@ -1102,14 +1140,46 @@ impl TenantManager { None }; - slot_guard.upsert(new_slot).map_err(|e| match e { - TenantSlotUpsertError::InternalError(e) => { - UpsertLocationError::Other(anyhow::anyhow!(e)) + match slot_guard.upsert(new_slot) { + Err(TenantSlotUpsertError::InternalError(e)) => { + Err(UpsertLocationError::Other(anyhow::anyhow!(e))) } - TenantSlotUpsertError::MapState(e) => UpsertLocationError::Unavailable(e), - })?; + Err(TenantSlotUpsertError::MapState(e)) => Err(UpsertLocationError::Unavailable(e)), + Err(TenantSlotUpsertError::ShuttingDown((new_slot, _completion))) => { + // If we just called tenant_spawn() on a new tenant, and can't insert it into our map, then + // we must not leak it: this would violate the invariant that after shutdown_all_tenants, all tenants + // are shutdown. + // + // We must shut it down inline here. + match new_slot { + TenantSlot::InProgress(_) => { + // Unreachable because we never insert an InProgress + unreachable!() + } + TenantSlot::Attached(tenant) => { + let (_guard, progress) = utils::completion::channel(); + info!("Shutting down just-spawned tenant, because tenant manager is shut down"); + match tenant.shutdown(progress, false).await { + Ok(()) => { + info!("Finished shutting down just-spawned tenant"); + } + Err(barrier) => { + info!("Shutdown already in progress, waiting for it to complete"); + barrier.wait().await; + } + } + } + TenantSlot::Secondary(secondary_tenant) => { + secondary_tenant.shutdown().await; + } + } - Ok(attached_tenant) + Err(UpsertLocationError::Unavailable( + TenantMapError::ShuttingDown, + )) + } + Ok(()) => Ok(attached_tenant), + } } /// Resetting a tenant is equivalent to detaching it, then attaching it again with the same @@ -1728,14 +1798,31 @@ pub(crate) enum TenantSlotError { /// Superset of TenantMapError: issues that can occur when using a SlotGuard /// to insert a new value. -#[derive(Debug, thiserror::Error)] -pub enum TenantSlotUpsertError { +#[derive(thiserror::Error)] +pub(crate) enum TenantSlotUpsertError { /// An error where the slot is in an unexpected state, indicating a code bug #[error("Internal error updating Tenant")] InternalError(Cow<'static, str>), #[error(transparent)] - MapState(#[from] TenantMapError), + MapState(TenantMapError), + + // If we encounter TenantManager shutdown during upsert, we must carry the Completion + // from the SlotGuard, so that the caller can hold it while they clean up: otherwise + // TenantManager shutdown might race ahead before we're done cleaning up any Tenant that + // was protected by the SlotGuard. + #[error("Shutting down")] + ShuttingDown((TenantSlot, utils::completion::Completion)), +} + +impl std::fmt::Debug for TenantSlotUpsertError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::InternalError(reason) => write!(f, "Internal Error {reason}"), + Self::MapState(map_error) => write!(f, "Tenant map state: {map_error:?}"), + Self::ShuttingDown(_completion) => write!(f, "Tenant map shutting down"), + } + } } #[derive(Debug, thiserror::Error)] @@ -1784,7 +1871,7 @@ pub struct SlotGuard { /// [`TenantSlot::InProgress`] carries the corresponding Barrier: it will /// release any waiters as soon as this SlotGuard is dropped. - _completion: utils::completion::Completion, + completion: utils::completion::Completion, } impl SlotGuard { @@ -1797,7 +1884,7 @@ impl SlotGuard { tenant_shard_id, old_value, upserted: false, - _completion: completion, + completion, } } @@ -1830,9 +1917,16 @@ impl SlotGuard { } let m = match &mut *locked { - TenantsMap::Initializing => return Err(TenantMapError::StillInitializing.into()), + TenantsMap::Initializing => { + return Err(TenantSlotUpsertError::MapState( + TenantMapError::StillInitializing, + )) + } TenantsMap::ShuttingDown(_) => { - return Err(TenantMapError::ShuttingDown.into()); + return Err(TenantSlotUpsertError::ShuttingDown(( + new_value, + self.completion.clone(), + ))); } TenantsMap::Open(m) => m, }; @@ -1880,7 +1974,9 @@ impl SlotGuard { Err(TenantSlotUpsertError::InternalError(_)) => { // We already logged the error, nothing else we can do. } - Err(TenantSlotUpsertError::MapState(_)) => { + Err( + TenantSlotUpsertError::MapState(_) | TenantSlotUpsertError::ShuttingDown(_), + ) => { // If the map is shutting down, we need not replace anything } Ok(()) => {} @@ -1978,18 +2074,22 @@ fn tenant_map_peek_slot<'a>( tenant_shard_id: &TenantShardId, mode: TenantSlotPeekMode, ) -> Result, TenantMapError> { - let m = match tenants.deref() { - TenantsMap::Initializing => return Err(TenantMapError::StillInitializing), + match tenants.deref() { + TenantsMap::Initializing => Err(TenantMapError::StillInitializing), TenantsMap::ShuttingDown(m) => match mode { - TenantSlotPeekMode::Read => m, - TenantSlotPeekMode::Write => { - return Err(TenantMapError::ShuttingDown); - } + TenantSlotPeekMode::Read => Ok(Some( + // When reading in ShuttingDown state, we must translate None results + // into a ShuttingDown error, because absence of a tenant shard ID in the map + // isn't a reliable indicator of the tenant being gone: it might have been + // InProgress when shutdown started, and cleaned up from that state such + // that it's now no longer in the map. Callers will have to wait until + // we next start up to get a proper answer. This avoids incorrect 404 API responses. + m.get(tenant_shard_id).ok_or(TenantMapError::ShuttingDown)?, + )), + TenantSlotPeekMode::Write => Err(TenantMapError::ShuttingDown), }, - TenantsMap::Open(m) => m, - }; - - Ok(m.get(tenant_shard_id)) + TenantsMap::Open(m) => Ok(m.get(tenant_shard_id)), + } } enum TenantSlotAcquireMode { diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index ddf83b56a0..340cc9e9e3 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -20,6 +20,7 @@ from fixtures.utils import Fn class PageserverApiException(Exception): def __init__(self, message, status_code: int): super().__init__(message) + self.message = message self.status_code = status_code @@ -261,12 +262,18 @@ class PageserverHttpClient(requests.Session): ) self.verbose_error(res) - def tenant_detach(self, tenant_id: TenantId, detach_ignored=False): + def tenant_detach(self, tenant_id: TenantId, detach_ignored=False, timeout_secs=None): params = {} if detach_ignored: params["detach_ignored"] = "true" - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params) + kwargs = {} + if timeout_secs is not None: + kwargs["timeout"] = timeout_secs + + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach", params=params, **kwargs + ) self.verbose_error(res) def tenant_reset(self, tenant_id: Union[TenantId, TenantShardId], drop_cache: bool): diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 521b96779a..293152dd62 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -5,7 +5,10 @@ from typing import Any, Dict, Optional import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, S3Scrubber -from fixtures.pageserver.utils import assert_prefix_empty, tenant_delete_wait_completed +from fixtures.pageserver.utils import ( + assert_prefix_empty, + tenant_delete_wait_completed, +) from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind from fixtures.types import TenantId, TimelineId from fixtures.utils import wait_until @@ -135,6 +138,16 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): pageserver.stop() pageserver.start() if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id: + # /re-attach call will bump generation: track that in our state in case we do an + # "attach in same generation" operation later + assert last_state_ps[1] is not None # latest_attached == pageserfer.id implies this + # The re-attach API increments generation by exactly one. + new_generation = last_state_ps[1] + 1 + last_state[pageserver.id] = (last_state_ps[0], new_generation) + tenants = pageserver.http_client().tenant_list() + assert len(tenants) == 1 + assert tenants[0]["generation"] == new_generation + log.info("Entering postgres...") workload.churn_rows(rng.randint(128, 256), pageserver.id) workload.validate(pageserver.id) diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index bafc5711a1..b4e5a550f3 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -411,9 +411,7 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE pageserver_http.configure_failpoints((failpoint, "pause")) def hit_pausable_failpoint_and_later_fail(): - with pytest.raises( - PageserverApiException, match="new timeline \\S+ has invalid disk_consistent_lsn" - ): + with pytest.raises(PageserverApiException, match="NotFound: tenant"): pageserver_http.timeline_create( env.pg_version, env.initial_tenant, env.initial_timeline ) @@ -443,8 +441,8 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE try: wait_until(10, 1, has_hit_failpoint) - # it should start ok, sync up with the stuck creation, then fail because disk_consistent_lsn was not updated - # then deletion should fail and set the tenant broken + # it should start ok, sync up with the stuck creation, then hang waiting for the timeline + # to shut down. deletion = Thread(target=start_deletion) deletion.start() @@ -573,9 +571,11 @@ def test_tenant_delete_races_timeline_creation( ps_http = env.pageserver.http_client() tenant_id = env.initial_tenant - # Sometimes it ends with "InternalServerError(Cancelled", sometimes with "InternalServerError(Operation was cancelled" + # When timeline creation is cancelled by tenant deletion, it is during Tenant::shutdown(), and + # acting on a shutdown tenant generates a 503 response (if caller retried they would later) get + # a 404 after the tenant is fully deleted. CANCELLED_ERROR = ( - ".*POST.*Cancelled request finished with an error: InternalServerError\\(.*ancelled" + ".*POST.*Cancelled request finished successfully status=503 Service Unavailable" ) # This can occur sometimes. diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 2ee2d8125a..5164bda470 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -1,3 +1,4 @@ +import concurrent.futures import os import time from contextlib import closing @@ -7,6 +8,7 @@ from pathlib import Path from typing import List import pytest +import requests from fixtures.log_helper import log from fixtures.metrics import ( PAGESERVER_GLOBAL_METRICS, @@ -17,7 +19,9 @@ from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, ) -from fixtures.pageserver.utils import timeline_delete_wait_completed +from fixtures.pageserver.http import PageserverApiException +from fixtures.pageserver.utils import timeline_delete_wait_completed, wait_until_tenant_active +from fixtures.pg_version import PgVersion from fixtures.remote_storage import RemoteStorageKind from fixtures.types import Lsn, TenantId from fixtures.utils import wait_until @@ -341,3 +345,78 @@ def test_pageserver_with_empty_tenants(neon_env_builder: NeonEnvBuilder): assert ( tenant_active_count == 1 ), f"Tenant {tenant_with_empty_timelines} should have metric as active" + + +def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder): + """ + Probabilistic stress test for the pageserver's handling of tenant requests + across a restart. This is intended to catch things like: + - Bad response codes during shutdown (e.g. returning 500 instead of 503) + - Issues where a tenant is still starting up while we receive a request for it + - Issues with interrupting/resuming tenant/timeline creation in shutdown + """ + env = neon_env_builder.init_configs() + env.start() + tenant_id: TenantId = env.initial_tenant + timeline_id = env.initial_timeline + + # Multiple creation requests which race will generate this error + env.pageserver.allowed_errors.append(".*Conflict: Tenant is already being modified.*") + + # Tenant creation requests which arrive out of order will generate complaints about + # generation nubmers out of order. + env.pageserver.allowed_errors.append(".*Generation .+ is less than existing .+") + + # Our multiple creation requests will advance generation quickly, and when we skip + # a generation number we can generate these warnings + env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates for tenant .+") + + # Timeline::flush_and_shutdown cannot tell if it is hitting a failure because of + # an incomplete attach, or some other problem. In the field this should be rare, + # so we allow it to log at WARN, even if it is occasionally a false positive. + env.pageserver.allowed_errors.append(".*failed to freeze and flush.*") + + # When we shut down a tenant during a timeline creation, initdb is not cancelled, we wait + # for it to complete (since https://github.com/neondatabase/neon/pull/6451). This means + # that shutdown can be delayed by >=1s on debug builds where initdb takes a long time to run. + env.pageserver.allowed_errors.append(".*still waiting, taking longer than expected... gate.*") + + def create_bg(delay_ms): + time.sleep(delay_ms / 1000.0) + try: + env.pageserver.tenant_create(tenant_id=tenant_id) + env.pageserver.http_client().timeline_create( + PgVersion.NOT_SET, tenant_id, new_timeline_id=timeline_id + ) + except PageserverApiException as e: + if e.status_code == 409: + log.info(f"delay_ms={delay_ms} 409") + pass + elif e.status_code == 400: + if "is less than existing" in e.message: + # We send creation requests very close together in time: it is expected that these + # race, and sometimes chigher-generation'd requests arrive first. The pageserver rightly + # rejects any attempt to make a generation number go backwards. + pass + else: + raise + else: + raise + except requests.exceptions.ConnectionError: + # Our requests might arrive during shutdown and be cut off at the transport level + pass + + for _ in range(0, 10): + with concurrent.futures.ThreadPoolExecutor() as executor: + futs = [] + for delay_ms in (0, 1, 10, 50, 100, 200, 500, 800): + f = executor.submit(create_bg, delay_ms) + futs.append(f) + env.pageserver.stop() + env.pageserver.start() + + for f in futs: + f.result(timeout=10) + + # The tenant should end up active + wait_until_tenant_active(env.pageserver.http_client(), tenant_id, iterations=10, period=1) diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index 2e58a413e4..4c5cb32caa 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -868,7 +868,7 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder): ) assert pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total") == n_tenants - # Check that tenant deletion proactively wakes tenants: this is done separately to the main + # Check that tenant deletion/detach proactively wakes tenants: this is done separately to the main # body of the test because it will disrupt tenant counts env.pageserver.stop() env.pageserver.start( @@ -876,9 +876,22 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder): ) wait_until(10, 1, at_least_one_active) - delete_tenant_id = list( + + detach_tenant_id = list( [(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"] )[0][0] + delete_tenant_id = list( + [(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"] + )[1][0] + + # Detaching a stuck tenant should proceed promptly + # (reproducer for https://github.com/neondatabase/neon/pull/6430) + env.pageserver.http_client().tenant_detach(detach_tenant_id, timeout_secs=10) + tenant_ids.remove(detach_tenant_id) + # FIXME: currently the mechanism for cancelling attach is to set state to broken, which is reported spuriously at error level + env.pageserver.allowed_errors.append( + ".*attach failed, setting tenant state to Broken: Shut down while Attaching" + ) # Deleting a stuck tenant should prompt it to go active with concurrent.futures.ThreadPoolExecutor() as executor: @@ -912,9 +925,10 @@ def test_ondemand_activation(neon_env_builder: NeonEnvBuilder): wait_tenant_status_404(pageserver_http, tenant_id=delete_tenant_id, iterations=40) tenant_ids.remove(delete_tenant_id) - # Check that all the stuck tenants proceed to active (apart from the one that deletes) + # Check that all the stuck tenants proceed to active (apart from the one that deletes, and the one + # we detached) wait_until(10, 1, all_active) - assert len(get_tenant_states()) == n_tenants - 1 + assert len(get_tenant_states()) == n_tenants - 2 def test_timeline_logical_size_task_priority(neon_env_builder: NeonEnvBuilder):