mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 13:02:55 +00:00
storage controller: improved handling of node availability on restart (#6658)
- Automatically set a node's availability to Active if it is responsive in startup_reconcile - Impose a 5s timeout of HTTP request to list location conf, so that an unresponsive node can't hang it for minutes - Do several retries if the request fails with a retryable error, to be tolerant of concurrent pageserver & storage controller restarts - Add a readiness hook for use with k8s so that we can tell when the startup reconciliaton is done and the service is fully ready to do work. - Add /metrics to the list of un-authenticated endpoints (this is unrelated but we're touching the line in this PR already, and it fixes auth error spam in deployed container.) - A test for the above. Closes: #6670
This commit is contained in:
@@ -56,10 +56,18 @@ pub enum ForceAwaitLogicalSize {
|
||||
|
||||
impl Client {
|
||||
pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
|
||||
Self::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt)
|
||||
}
|
||||
|
||||
pub fn from_client(
|
||||
client: reqwest::Client,
|
||||
mgmt_api_endpoint: String,
|
||||
jwt: Option<&str>,
|
||||
) -> Self {
|
||||
Self {
|
||||
mgmt_api_endpoint,
|
||||
authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
|
||||
client: reqwest::Client::new(),
|
||||
client,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user