mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 21:50:37 +00:00
set_stopping(): report in result if not transitioning to Stopping
This commit is contained in:
@@ -447,6 +447,11 @@ pub enum DeleteTimelineError {
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
pub enum SetStoppingError {
|
||||
AlreadyStopping,
|
||||
Broken,
|
||||
}
|
||||
|
||||
struct RemoteStartupData {
|
||||
index_part: IndexPart,
|
||||
remote_metadata: TimelineMetadata,
|
||||
@@ -1711,7 +1716,7 @@ impl Tenant {
|
||||
/// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.
|
||||
///
|
||||
/// This function is not cancel-safe!
|
||||
pub async fn set_stopping(&self) {
|
||||
pub async fn set_stopping(&self) -> Result<(), SetStoppingError> {
|
||||
let mut rx = self.state.subscribe();
|
||||
|
||||
// cannot stop before we're done activating, so wait out until we're done activating
|
||||
@@ -1729,8 +1734,8 @@ impl Tenant {
|
||||
.expect("cannot drop self.state while on a &self method");
|
||||
|
||||
// we now know we're done activating, let's see whether this task is the winner to transition into Stopping
|
||||
let mut stopping = false;
|
||||
self.state.send_modify(|current_state| match current_state {
|
||||
let mut err = None;
|
||||
let stopping = self.state.send_if_modified(|current_state| match current_state {
|
||||
TenantState::Activating | TenantState::Loading | TenantState::Attaching => {
|
||||
unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
|
||||
}
|
||||
@@ -1739,29 +1744,42 @@ impl Tenant {
|
||||
// are created after the transition to Stopping. That's harmless, as the Timelines
|
||||
// won't be accessible to anyone afterwards, because the Tenant is in Stopping state.
|
||||
*current_state = TenantState::Stopping;
|
||||
stopping = true;
|
||||
// Continue stopping outside the closure. We need to grab timelines.lock()
|
||||
// and we plan to turn it into a tokio::sync::Mutex in a future patch.
|
||||
true
|
||||
}
|
||||
TenantState::Broken { reason, .. } => {
|
||||
info!(
|
||||
"Cannot set tenant to Stopping state, it is in Broken state due to: {reason}"
|
||||
);
|
||||
err = Some(SetStoppingError::Broken);
|
||||
false
|
||||
}
|
||||
TenantState::Stopping => {
|
||||
info!("Tenant is already in Stopping state");
|
||||
err = Some(SetStoppingError::AlreadyStopping);
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if stopping {
|
||||
let timelines_accessor = self.timelines.lock().unwrap();
|
||||
let not_broken_timelines = timelines_accessor
|
||||
.values()
|
||||
.filter(|timeline| timeline.current_state() != TimelineState::Broken);
|
||||
for timeline in not_broken_timelines {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
match (stopping, err) {
|
||||
(true, None) => {} // continue
|
||||
(false, Some(err)) => return Err(err),
|
||||
(true, Some(_)) => unreachable!(
|
||||
"send_if_modified closure must error out if not transitioning to Stopping"
|
||||
),
|
||||
(false, None) => unreachable!(
|
||||
"send_if_modified closure must return true if transitioning to Stopping"
|
||||
),
|
||||
}
|
||||
|
||||
let timelines_accessor = self.timelines.lock().unwrap();
|
||||
let not_broken_timelines = timelines_accessor
|
||||
.values()
|
||||
.filter(|timeline| timeline.current_state() != TimelineState::Broken);
|
||||
for timeline in not_broken_timelines {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Method for tenant::mgr to transition us into Broken state in case of a late failure in
|
||||
|
||||
@@ -19,7 +19,9 @@ use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::config::TenantConfOpt;
|
||||
use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
|
||||
use crate::tenant::{
|
||||
create_tenant_files, CreateTenantFilesMode, SetStoppingError, Tenant, TenantState,
|
||||
};
|
||||
use crate::IGNORED_TENANT_FILE_NAME;
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
@@ -247,7 +249,7 @@ pub async fn shutdown_all_tenants() {
|
||||
let mut tenants_to_freeze_and_flush = Vec::with_capacity(tenants_to_shut_down.len());
|
||||
for (_, tenant) in tenants_to_shut_down {
|
||||
// updates tenant state, forbidding new GC and compaction iterations from starting
|
||||
tenant.set_stopping().await;
|
||||
let _ = tenant.set_stopping().await; // TODO handle error
|
||||
tenants_to_freeze_and_flush.push(tenant);
|
||||
}
|
||||
|
||||
@@ -587,14 +589,20 @@ where
|
||||
{
|
||||
let tenants_accessor = TENANTS.write().await;
|
||||
match tenants_accessor.get(&tenant_id) {
|
||||
Some(tenant) => match tenant.current_state() {
|
||||
TenantState::Attaching
|
||||
| TenantState::Loading
|
||||
| TenantState::Activating
|
||||
| TenantState::Broken { .. }
|
||||
| TenantState::Active => tenant.set_stopping().await,
|
||||
TenantState::Stopping => return Err(TenantStateError::IsStopping(tenant_id)),
|
||||
},
|
||||
Some(tenant) => {
|
||||
match tenant.set_stopping().await {
|
||||
Ok(()) => {
|
||||
// we won, continue stopping procedure
|
||||
}
|
||||
Err(SetStoppingError::Broken) => {
|
||||
// continue the procedure, let's hope the closure can deal with broken tenants
|
||||
}
|
||||
Err(SetStoppingError::AlreadyStopping) => {
|
||||
// the tenant is already stopping or broken, don't do anything
|
||||
return Err(TenantStateError::IsStopping(tenant_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
None => return Err(TenantStateError::NotFound(tenant_id)),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user