mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-26 01:20:38 +00:00
storcon: implement safekeeper_migrate handler (#11849)
This PR implements a safekeeper migration algorithm from RFC-035 https://github.com/neondatabase/neon/blob/main/docs/rfcs/035-safekeeper-dynamic-membership-change.md#change-algorithm - Closes: https://github.com/neondatabase/neon/issues/11823 It is not production-ready yet, but I think it's good enough to commit and start testing. There are some known issues which will be addressed in later PRs: - https://github.com/neondatabase/neon/issues/12186 - https://github.com/neondatabase/neon/issues/12187 - https://github.com/neondatabase/neon/issues/12188 - https://github.com/neondatabase/neon/issues/12189 - https://github.com/neondatabase/neon/issues/12190 - https://github.com/neondatabase/neon/issues/12191 - https://github.com/neondatabase/neon/issues/12192 ## Summary of changes - Implement `tenant_timeline_safekeeper_migrate` handler to drive the migration - Add possibility to specify number of safekeepers per timeline in tests (`timeline_safekeeper_count`) - Add `term` and `flush_lsn` to `TimelineMembershipSwitchResponse` - Implement compare-and-swap (CAS) operation over timeline in DB for updating membership configuration safely. - Write simple test to verify that migration code works
This commit is contained in:
@@ -29,6 +29,7 @@ use pageserver_api::shard::{
|
||||
use rustls::client::WebPkiServerVerifier;
|
||||
use rustls::client::danger::{ServerCertVerified, ServerCertVerifier};
|
||||
use rustls::crypto::ring;
|
||||
use safekeeper_api::membership::SafekeeperGeneration;
|
||||
use scoped_futures::ScopedBoxFuture;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::generation::Generation;
|
||||
@@ -94,6 +95,8 @@ pub(crate) enum DatabaseError {
|
||||
Logical(String),
|
||||
#[error("Migration error: {0}")]
|
||||
Migration(String),
|
||||
#[error("CAS error: {0}")]
|
||||
Cas(String),
|
||||
}
|
||||
|
||||
#[derive(measured::FixedCardinalityLabel, Copy, Clone)]
|
||||
@@ -126,6 +129,7 @@ pub(crate) enum DatabaseOperation {
|
||||
UpdateLeader,
|
||||
SetPreferredAzs,
|
||||
InsertTimeline,
|
||||
UpdateTimelineMembership,
|
||||
GetTimeline,
|
||||
InsertTimelineReconcile,
|
||||
RemoveTimelineReconcile,
|
||||
@@ -1410,6 +1414,56 @@ impl Persistence {
|
||||
.await
|
||||
}
|
||||
|
||||
/// Update timeline membership configuration in the database.
|
||||
/// Perform a compare-and-swap (CAS) operation on the timeline's generation.
|
||||
/// The `new_generation` must be the next (+1) generation after the one in the database.
|
||||
pub(crate) async fn update_timeline_membership(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
new_generation: SafekeeperGeneration,
|
||||
sk_set: &[NodeId],
|
||||
new_sk_set: Option<&[NodeId]>,
|
||||
) -> DatabaseResult<()> {
|
||||
use crate::schema::timelines::dsl;
|
||||
|
||||
let prev_generation = new_generation.previous().unwrap();
|
||||
|
||||
let tenant_id = &tenant_id;
|
||||
let timeline_id = &timeline_id;
|
||||
self.with_measured_conn(DatabaseOperation::UpdateTimelineMembership, move |conn| {
|
||||
Box::pin(async move {
|
||||
let updated = diesel::update(dsl::timelines)
|
||||
.filter(dsl::tenant_id.eq(&tenant_id.to_string()))
|
||||
.filter(dsl::timeline_id.eq(&timeline_id.to_string()))
|
||||
.filter(dsl::generation.eq(prev_generation.into_inner() as i32))
|
||||
.set((
|
||||
dsl::generation.eq(new_generation.into_inner() as i32),
|
||||
dsl::sk_set.eq(sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>()),
|
||||
dsl::new_sk_set.eq(new_sk_set
|
||||
.map(|set| set.iter().map(|id| id.0 as i64).collect::<Vec<_>>())),
|
||||
))
|
||||
.execute(conn)
|
||||
.await?;
|
||||
|
||||
match updated {
|
||||
0 => {
|
||||
// TODO(diko): It makes sense to select the current generation
|
||||
// and include it in the error message for better debuggability.
|
||||
Err(DatabaseError::Cas(
|
||||
"Failed to update membership configuration".to_string(),
|
||||
))
|
||||
}
|
||||
1 => Ok(()),
|
||||
_ => Err(DatabaseError::Logical(format!(
|
||||
"unexpected number of rows ({updated})"
|
||||
))),
|
||||
}
|
||||
})
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Load timeline from db. Returns `None` if not present.
|
||||
pub(crate) async fn get_timeline(
|
||||
&self,
|
||||
|
||||
Reference in New Issue
Block a user