mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 08:52:56 +00:00
impr(controller_upcall_client): clean up copy-pasta code & add context to retries (#10991)
Before this PR, re-attach and validate would log the same warning ``` calling control plane generation validation API failed ``` on retry errors. This can be confusing. This PR makes the message generically valid for any upcall and adds additional tracing spans to capture context. Along the way, clean up some copy-pasta variable naming. refs - https://github.com/neondatabase/neon/issues/10381#issuecomment-2684755827 --------- Co-authored-by: Alexander Lakhin <alexander.lakhin@neon.tech>
This commit is contained in:
committed by
GitHub
parent
3a3d62dc4f
commit
e35f7758d8
@@ -84,6 +84,7 @@ impl ControllerUpcallClient {
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn retry_http_forever<R, T>(
|
||||
&self,
|
||||
url: &url::Url,
|
||||
@@ -108,7 +109,7 @@ impl ControllerUpcallClient {
|
||||
|_| false,
|
||||
3,
|
||||
u32::MAX,
|
||||
"calling control plane generation validation API",
|
||||
"storage controller upcall",
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
@@ -125,11 +126,12 @@ impl ControllerUpcallClient {
|
||||
|
||||
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
/// Block until we get a successful response, or error out if we are shut down
|
||||
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
|
||||
async fn re_attach(
|
||||
&self,
|
||||
conf: &PageServerConf,
|
||||
) -> Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError> {
|
||||
let re_attach_path = self
|
||||
let url = self
|
||||
.base_url
|
||||
.join("re-attach")
|
||||
.expect("Failed to build re-attach path");
|
||||
@@ -205,7 +207,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
register: register.clone(),
|
||||
};
|
||||
|
||||
let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
||||
let response: ReAttachResponse = self.retry_http_forever(&url, request).await?;
|
||||
tracing::info!(
|
||||
"Received re-attach response with {} tenants (node {}, register: {:?})",
|
||||
response.tenants.len(),
|
||||
@@ -223,11 +225,12 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
}
|
||||
|
||||
/// Block until we get a successful response, or error out if we are shut down
|
||||
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
|
||||
async fn validate(
|
||||
&self,
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
|
||||
let re_attach_path = self
|
||||
let url = self
|
||||
.base_url
|
||||
.join("validate")
|
||||
.expect("Failed to build validate path");
|
||||
@@ -257,8 +260,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
return Err(RetryForeverError::ShuttingDown);
|
||||
}
|
||||
|
||||
let response: ValidateResponse =
|
||||
self.retry_http_forever(&re_attach_path, request).await?;
|
||||
let response: ValidateResponse = self.retry_http_forever(&url, request).await?;
|
||||
for rt in response.tenants {
|
||||
result.insert(rt.id, rt.valid);
|
||||
}
|
||||
|
||||
@@ -94,7 +94,9 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
|
||||
".*Flushed oversized open layer with size.*",
|
||||
# During teardown, we stop the storage controller before the pageservers, so pageservers
|
||||
# can experience connection errors doing background deletion queue work.
|
||||
".*WARN deletion backend: calling control plane generation validation API failed.*error sending request.*",
|
||||
".*WARN deletion backend:.* storage controller upcall failed, will retry.*error sending request.*",
|
||||
# Can happen when the pageserver starts faster than the storage controller
|
||||
".*WARN init_tenant_mgr:.* storage controller upcall failed, will retry.*error sending request.*",
|
||||
# Can happen when the test shuts down the storage controller while it is calling the utilization API
|
||||
".*WARN.*path=/v1/utilization .*request was dropped before completing",
|
||||
# Can happen during shutdown
|
||||
|
||||
Reference in New Issue
Block a user