From f7d5322e8b428ea4277e2bdf572612e4733d193a Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 3 Dec 2024 18:36:37 +0000 Subject: [PATCH] pageserver: more detailed logs when calling re-attach (#9996) ## Problem We saw a peculiar case where a pageserver apparently got a 0-tenant response to `/re-attach` but we couldn't see the request landing on a storage controller. It was hard to confirm retrospectively that the pageserver was configured properly at the moment it sent the request. ## Summary of changes - Log the URL to which we are sending the request - Log the NodeId and metadata that we sent --- libs/pageserver_api/src/controller_api.rs | 4 ++-- pageserver/src/controller_upcall_client.rs | 12 +++++++++--- pageserver/src/tenant/mgr.rs | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 0ea30ce54f..9a5ebc95bd 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -48,7 +48,7 @@ pub struct TenantCreateResponse { pub shards: Vec, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct NodeRegisterRequest { pub node_id: NodeId, @@ -75,7 +75,7 @@ pub struct TenantPolicyRequest { pub scheduling: Option, } -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)] pub struct AvailabilityZone(pub String); impl Display for AvailabilityZone { diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index 73fc6dc3ab..d41bfd9021 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -115,6 +115,10 @@ impl ControllerUpcallClient { Ok(res) } + + pub(crate) fn base_url(&self) -> &Url { + &self.base_url + } } impl ControlPlaneGenerationsApi for ControllerUpcallClient { @@ -191,13 +195,15 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { let request = ReAttachRequest { node_id: self.node_id, - register, + register: register.clone(), }; let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?; tracing::info!( - "Received re-attach response with {} tenants", - response.tenants.len() + "Received re-attach response with {} tenants (node {}, register: {:?})", + response.tenants.len(), + self.node_id, + register, ); failpoint_support::sleep_millis_async!("control-plane-client-re-attach"); diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index eb8191e43e..45481c4ed4 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -347,7 +347,7 @@ async fn init_load_generations( ); emergency_generations(tenant_confs) } else if let Some(client) = ControllerUpcallClient::new(conf, cancel) { - info!("Calling control plane API to re-attach tenants"); + info!("Calling {} API to re-attach tenants", client.base_url()); // If we are configured to use the control plane API, then it is the source of truth for what tenants to load. match client.re_attach(conf).await { Ok(tenants) => tenants