mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
remove TimelineState::Suspended, introduce TimelineState::Loading
The TimelineState::Suspsended was dubious to begin with. I suppose
that the intention was that timelines could transition back and
forth between Active and Suspended states.
But practically, the code before this patch never did that.
The transitions were:
() ==Timeline::new==> Suspended ==*==> {Active,Broken,Stopping}
One exception: Tenant::set_stopping() could transition timelines like
so:
!Broken ==Tenant::set_stopping()==> Suspended
But Tenant itself cannot transition from stopping state to any other
state.
Thus, this patch removes TimelineState::Suspended and introduces a new
state Loading. The aforementioned transitions change as follows:
- () ==Timeline::new==> Suspended ==*==> {Active,Broken,Stopping}
+ () ==Timeline::new==> Loading ==*==> {Active,Broken,Stopping}
- !Broken ==Tenant::set_stopping()==> Suspended
+ !Broken ==Tenant::set_stopping()==> Stopping
Walreceiver's connection manager loop watches TimelineState to decide
whether it should retry connecting, or exit.
This patch changes the loop to exit when it observes the transition
into Stopping state.
Walreceiver isn't supposed to be started until the timeline transitions
into Active state. So, this patch also adds some warn!() messages
in case this happens anyways.
This commit is contained in:
committed by
Christian Schwarz
parent
7704caa3ac
commit
6b6570b580
@@ -44,18 +44,17 @@ impl TenantState {
|
||||
/// A state of a timeline in pageserver's memory.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum TimelineState {
|
||||
/// Timeline is fully operational. If the containing Tenant is Active, the timeline's
|
||||
/// background jobs are running otherwise they will be launched when the tenant is activated.
|
||||
/// The timeline is recognized by the pageserver but is not yet operational.
|
||||
/// In particular, the walreceiver connection loop is not running for this timeline.
|
||||
/// It will eventually transition to state Active or Broken.
|
||||
Loading,
|
||||
/// The timeline is fully operational.
|
||||
/// It can be queried, and the walreceiver connection loop is running.
|
||||
Active,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate.
|
||||
/// The status indicates, that the timeline could eventually go back to Active automatically:
|
||||
/// for example, if the owning tenant goes back to Active again.
|
||||
Suspended,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to
|
||||
/// automatically become Active after certain events: only a management call can change this status.
|
||||
/// The timeline was previously Loading or Active but is shutting down.
|
||||
/// It cannot transition back into any other state.
|
||||
Stopping,
|
||||
/// A timeline is recognized by the pageserver, but can no longer be used for
|
||||
/// any operations, because it failed to be activated.
|
||||
/// The timeline is broken and not operational (previous states: Loading or Active).
|
||||
Broken,
|
||||
}
|
||||
|
||||
|
||||
@@ -1487,7 +1487,7 @@ impl Tenant {
|
||||
.values()
|
||||
.filter(|timeline| timeline.current_state() != TimelineState::Broken);
|
||||
for timeline in not_broken_timelines {
|
||||
timeline.set_state(TimelineState::Suspended);
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
}
|
||||
TenantState::Broken => {
|
||||
|
||||
@@ -812,7 +812,7 @@ impl Timeline {
|
||||
pg_version: u32,
|
||||
) -> Arc<Self> {
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn();
|
||||
let (state, _) = watch::channel(TimelineState::Suspended);
|
||||
let (state, _) = watch::channel(TimelineState::Loading);
|
||||
|
||||
let (layer_flush_start_tx, _) = tokio::sync::watch::channel(0);
|
||||
let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(())));
|
||||
@@ -1400,7 +1400,7 @@ impl Timeline {
|
||||
TimelineState::Active => continue,
|
||||
TimelineState::Broken
|
||||
| TimelineState::Stopping
|
||||
| TimelineState::Suspended => {
|
||||
| TimelineState::Loading => {
|
||||
break format!("aborted because timeline became inactive (new state: {new_state:?})")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,13 +183,23 @@ async fn connection_manager_loop_step(
|
||||
|
||||
new_event = async {
|
||||
loop {
|
||||
if walreceiver_state.timeline.current_state() == TimelineState::Loading {
|
||||
warn!("wal connection manager should only be launched after timeline has become active");
|
||||
}
|
||||
match timeline_state_updates.changed().await {
|
||||
Ok(()) => {
|
||||
let new_state = walreceiver_state.timeline.current_state();
|
||||
match new_state {
|
||||
// we're already active as walreceiver, no need to reactivate
|
||||
TimelineState::Active => continue,
|
||||
TimelineState::Broken | TimelineState::Stopping | TimelineState::Suspended => return ControlFlow::Continue(new_state),
|
||||
TimelineState::Broken | TimelineState::Stopping => {
|
||||
info!("timeline entered terminal state {new_state:?}, stopping wal connection manager loop");
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
TimelineState::Loading => {
|
||||
warn!("timeline transitioned back to Loading state, that should not happen");
|
||||
return ControlFlow::Continue(new_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_sender_dropped_error) => return ControlFlow::Break(()),
|
||||
@@ -197,7 +207,7 @@ async fn connection_manager_loop_step(
|
||||
}
|
||||
} => match new_event {
|
||||
ControlFlow::Continue(new_state) => {
|
||||
info!("Timeline became inactive (new state: {new_state:?}), dropping current connections until it reactivates");
|
||||
info!("observed timeline state change, new state is {new_state:?}");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
|
||||
Reference in New Issue
Block a user