mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 06:22:57 +00:00
storcon: soft disable SK heartbeats (#11041)
## Problem JWT tokens aren't in place, so all SK heartbeats fail. This is equivalent to a wait before applying the PS heartbeats and makes things more flaky. ## Summary of Changes Add a flag that skips loading SKs from the db on start-up and at runtime.
This commit is contained in:
@@ -138,6 +138,10 @@ struct Cli {
|
||||
// Flag to use https for requests to pageserver API.
|
||||
#[arg(long, default_value = "false")]
|
||||
use_https_pageserver_api: bool,
|
||||
|
||||
/// Whether to load safekeeprs from the database and heartbeat them
|
||||
#[arg(long, default_value = "false")]
|
||||
load_safekeepers: bool,
|
||||
}
|
||||
|
||||
enum StrictMode {
|
||||
@@ -350,6 +354,7 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
start_as_candidate: args.start_as_candidate,
|
||||
http_service_port: args.listen.port() as i32,
|
||||
use_https_pageserver_api: args.use_https_pageserver_api,
|
||||
load_safekeepers: args.load_safekeepers,
|
||||
};
|
||||
|
||||
// Validate that we can connect to the database
|
||||
|
||||
@@ -389,6 +389,8 @@ pub struct Config {
|
||||
pub long_reconcile_threshold: Duration,
|
||||
|
||||
pub use_https_pageserver_api: bool,
|
||||
|
||||
pub load_safekeepers: bool,
|
||||
}
|
||||
|
||||
impl From<DatabaseError> for ApiError {
|
||||
@@ -1405,15 +1407,20 @@ impl Service {
|
||||
.set(nodes.len() as i64);
|
||||
|
||||
tracing::info!("Loading safekeepers from database...");
|
||||
let safekeepers = persistence
|
||||
.list_safekeepers()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new()))
|
||||
.collect::<Vec<_>>();
|
||||
let safekeepers = if config.load_safekeepers {
|
||||
persistence
|
||||
.list_safekeepers()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new()))
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
tracing::info!("Skipping safekeeper loading");
|
||||
Default::default()
|
||||
};
|
||||
|
||||
let safekeepers: HashMap<NodeId, Safekeeper> =
|
||||
safekeepers.into_iter().map(|n| (n.get_id(), n)).collect();
|
||||
tracing::info!("Loaded {} safekeepers from database.", safekeepers.len());
|
||||
|
||||
tracing::info!("Loading shards from database...");
|
||||
let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?;
|
||||
@@ -8054,7 +8061,8 @@ impl Service {
|
||||
) -> Result<(), DatabaseError> {
|
||||
let node_id = NodeId(record.id as u64);
|
||||
self.persistence.safekeeper_upsert(record.clone()).await?;
|
||||
{
|
||||
|
||||
if self.config.load_safekeepers {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
match safekeepers.entry(node_id) {
|
||||
@@ -8086,7 +8094,7 @@ impl Service {
|
||||
.await?;
|
||||
let node_id = NodeId(id as u64);
|
||||
// After the change has been persisted successfully, update the in-memory state
|
||||
{
|
||||
if self.config.load_safekeepers {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let mut safekeepers = (*locked.safekeepers).clone();
|
||||
let sk = safekeepers
|
||||
|
||||
Reference in New Issue
Block a user