mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
storage_controller: wait for db on startup (#7479)
## Problem In some dev/test environments, there aren't health checks to guarantee the database is available before starting the controller. This creates friction for the developer. ## Summary of changes - Wait up to 5 seconds for the database to become available on startup
This commit is contained in:
@@ -5,6 +5,7 @@ use diesel::Connection;
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use metrics::BuildInfo;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use storage_controller::http::make_router;
|
||||
use storage_controller::metrics::preinitialize_metrics;
|
||||
use storage_controller::persistence::Persistence;
|
||||
@@ -245,6 +246,8 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
};
|
||||
|
||||
// After loading secrets & config, but before starting anything else, apply database migrations
|
||||
Persistence::await_connection(&secrets.database_url, Duration::from_secs(5)).await?;
|
||||
|
||||
migration_run(&secrets.database_url)
|
||||
.await
|
||||
.context("Running database migrations")?;
|
||||
|
||||
@@ -2,6 +2,7 @@ pub(crate) mod split_state;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use self::split_state::SplitState;
|
||||
use camino::Utf8Path;
|
||||
@@ -144,6 +145,31 @@ impl Persistence {
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper for use during startup, where we would like to tolerate concurrent restarts of the
|
||||
/// database and the storage controller, therefore the database might not be available right away
|
||||
pub async fn await_connection(
|
||||
database_url: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<(), diesel::ConnectionError> {
|
||||
let started_at = Instant::now();
|
||||
loop {
|
||||
match PgConnection::establish(database_url) {
|
||||
Ok(_) => {
|
||||
tracing::info!("Connected to database.");
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
if started_at.elapsed() > timeout {
|
||||
return Err(e);
|
||||
} else {
|
||||
tracing::info!("Database not yet available, waiting... ({e})");
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps `with_conn` in order to collect latency and error metrics
|
||||
async fn with_measured_conn<F, R>(&self, op: DatabaseOperation, func: F) -> DatabaseResult<R>
|
||||
where
|
||||
|
||||
Reference in New Issue
Block a user