mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-25 17:10:38 +00:00
Add new activating scheduling policy for safekeepers (#12441)
When deploying new safekeepers, we don't immediately want to send traffic to them. Maybe they are not ready yet by the time the deploy script is registering them with the storage controller. For pageservers, the storcon solves the problem by not scheduling stuff to them unless there has been a positive heartbeat response. We can't do the same for safekeepers though, otherwise a single down safekeeper would mean we can't create new timelines in smaller regions where there is only three safekeepers in total. So far we have created safekeepers as `pause` but this adds a manual step to safekeeper deployment which is prone to oversight. We want things to be automatted. So we introduce a new state `activating` that acts just like `pause`, except that we automatically transition the policy to `active` once we get a positive heartbeat from the safekeeper. For `pause`, we always keep the safekeeper paused.
This commit is contained in:
@@ -1388,6 +1388,48 @@ impl Persistence {
|
||||
.await
|
||||
}
|
||||
|
||||
/// Activate the given safekeeper, ensuring that there is no TOCTOU.
|
||||
/// Returns `Some` if the safekeeper has indeed been activating (or already active). Other states return `None`.
|
||||
pub(crate) async fn activate_safekeeper(&self, id_: i64) -> Result<Option<()>, DatabaseError> {
|
||||
use crate::schema::safekeepers::dsl::*;
|
||||
|
||||
self.with_conn(move |conn| {
|
||||
Box::pin(async move {
|
||||
#[derive(Insertable, AsChangeset)]
|
||||
#[diesel(table_name = crate::schema::safekeepers)]
|
||||
struct UpdateSkSchedulingPolicy<'a> {
|
||||
id: i64,
|
||||
scheduling_policy: &'a str,
|
||||
}
|
||||
let scheduling_policy_active = String::from(SkSchedulingPolicy::Active);
|
||||
let scheduling_policy_activating = String::from(SkSchedulingPolicy::Activating);
|
||||
|
||||
let rows_affected = diesel::update(
|
||||
safekeepers.filter(id.eq(id_)).filter(
|
||||
scheduling_policy
|
||||
.eq(scheduling_policy_activating)
|
||||
.or(scheduling_policy.eq(&scheduling_policy_active)),
|
||||
),
|
||||
)
|
||||
.set(scheduling_policy.eq(&scheduling_policy_active))
|
||||
.execute(conn)
|
||||
.await?;
|
||||
|
||||
if rows_affected == 0 {
|
||||
return Ok(Some(()));
|
||||
}
|
||||
if rows_affected != 1 {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"unexpected number of rows ({rows_affected})",
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Some(()))
|
||||
})
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Persist timeline. Returns if the timeline was newly inserted. If it wasn't, we haven't done any writes.
|
||||
pub(crate) async fn insert_timeline(&self, entry: TimelinePersistence) -> DatabaseResult<bool> {
|
||||
use crate::schema::timelines;
|
||||
|
||||
Reference in New Issue
Block a user