Files
neon/pageserver/src/controller_upcall_client.rs
Christian Schwarz e35f7758d8 impr(controller_upcall_client): clean up copy-pasta code & add context to retries (#10991)
Before this PR, re-attach and validate would log the same warning
```
calling control plane generation validation API failed
```
on retry errors.

This can be confusing.

This PR makes the message generically valid for any upcall and adds
additional tracing spans to capture context.

Along the way, clean up some copy-pasta variable naming.

refs
-
https://github.com/neondatabase/neon/issues/10381#issuecomment-2684755827

---------

Co-authored-by: Alexander Lakhin <alexander.lakhin@neon.tech>
2025-02-27 10:59:43 +00:00

272 lines
9.8 KiB
Rust

use std::collections::HashMap;
use futures::Future;
use pageserver_api::config::NodeMetadata;
use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
use pageserver_api::shard::TenantShardId;
use pageserver_api::upcall_api::{
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
ValidateRequestTenant, ValidateResponse,
};
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio_util::sync::CancellationToken;
use url::Url;
use utils::generation::Generation;
use utils::id::NodeId;
use utils::{backoff, failpoint_support};
use crate::config::PageServerConf;
use crate::virtual_file::on_fatal_io_error;
/// The Pageserver's client for using the storage controller upcall API: this is a small API
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
///
/// The server presenting this API may either be the storage controller or some other
/// service (such as the Neon control plane) providing a store of generation numbers.
pub struct ControllerUpcallClient {
http_client: reqwest::Client,
base_url: Url,
node_id: NodeId,
cancel: CancellationToken,
}
/// Represent operations which internally retry on all errors other than
/// cancellation token firing: the only way they can fail is ShuttingDown.
pub enum RetryForeverError {
ShuttingDown,
}
pub trait ControlPlaneGenerationsApi {
fn re_attach(
&self,
conf: &PageServerConf,
) -> impl Future<
Output = Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError>,
> + Send;
fn validate(
&self,
tenants: Vec<(TenantShardId, Generation)>,
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
}
impl ControllerUpcallClient {
/// A None return value indicates that the input `conf` object does not have control
/// plane API enabled.
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
let mut url = match conf.control_plane_api.as_ref() {
Some(u) => u.clone(),
None => return None,
};
if let Ok(mut segs) = url.path_segments_mut() {
// This ensures that `url` ends with a slash if it doesn't already.
// That way, we can subsequently use join() to safely attach extra path elements.
segs.pop_if_empty().push("");
}
let mut client = reqwest::ClientBuilder::new();
if let Some(jwt) = &conf.control_plane_api_token {
let mut headers = reqwest::header::HeaderMap::new();
headers.insert(
"Authorization",
format!("Bearer {}", jwt.get_contents()).parse().unwrap(),
);
client = client.default_headers(headers);
}
Some(Self {
http_client: client.build().expect("Failed to construct HTTP client"),
base_url: url,
node_id: conf.id,
cancel: cancel.clone(),
})
}
#[tracing::instrument(skip_all)]
async fn retry_http_forever<R, T>(
&self,
url: &url::Url,
request: R,
) -> Result<T, RetryForeverError>
where
R: Serialize,
T: DeserializeOwned,
{
let res = backoff::retry(
|| async {
let response = self
.http_client
.post(url.clone())
.json(&request)
.send()
.await?;
response.error_for_status_ref()?;
response.json::<T>().await
},
|_| false,
3,
u32::MAX,
"storage controller upcall",
&self.cancel,
)
.await
.ok_or(RetryForeverError::ShuttingDown)?
.expect("We retry forever, this should never be reached");
Ok(res)
}
pub(crate) fn base_url(&self) -> &Url {
&self.base_url
}
}
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
/// Block until we get a successful response, or error out if we are shut down
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
async fn re_attach(
&self,
conf: &PageServerConf,
) -> Result<HashMap<TenantShardId, ReAttachResponseTenant>, RetryForeverError> {
let url = self
.base_url
.join("re-attach")
.expect("Failed to build re-attach path");
// Include registration content in the re-attach request if a metadata file is readable
let metadata_path = conf.metadata_path();
let register = match tokio::fs::read_to_string(&metadata_path).await {
Ok(metadata_str) => match serde_json::from_str::<NodeMetadata>(&metadata_str) {
Ok(m) => {
// Since we run one time at startup, be generous in our logging and
// dump all metadata.
tracing::info!(
"Loaded node metadata: postgres {}:{}, http {}:{}, other fields: {:?}",
m.postgres_host,
m.postgres_port,
m.http_host,
m.http_port,
m.other
);
let az_id = {
let az_id_from_metadata = m
.other
.get("availability_zone_id")
.and_then(|jv| jv.as_str().map(|str| str.to_owned()));
match az_id_from_metadata {
Some(az_id) => Some(AvailabilityZone(az_id)),
None => {
tracing::warn!(
"metadata.json does not contain an 'availability_zone_id' field"
);
conf.availability_zone.clone().map(AvailabilityZone)
}
}
};
if az_id.is_none() {
panic!(
"Availablity zone id could not be inferred from metadata.json or pageserver config"
);
}
Some(NodeRegisterRequest {
node_id: conf.id,
listen_pg_addr: m.postgres_host,
listen_pg_port: m.postgres_port,
listen_http_addr: m.http_host,
listen_http_port: m.http_port,
listen_https_port: None, // TODO: Support https.
availability_zone_id: az_id.expect("Checked above"),
})
}
Err(e) => {
tracing::error!("Unreadable metadata in {metadata_path}: {e}");
None
}
},
Err(e) => {
if e.kind() == std::io::ErrorKind::NotFound {
// This is legal: we may have been deployed with some external script
// doing registration for us.
tracing::info!("Metadata file not found at {metadata_path}");
} else {
on_fatal_io_error(&e, &format!("Loading metadata at {metadata_path}"))
}
None
}
};
let request = ReAttachRequest {
node_id: self.node_id,
register: register.clone(),
};
let response: ReAttachResponse = self.retry_http_forever(&url, request).await?;
tracing::info!(
"Received re-attach response with {} tenants (node {}, register: {:?})",
response.tenants.len(),
self.node_id,
register,
);
failpoint_support::sleep_millis_async!("control-plane-client-re-attach");
Ok(response
.tenants
.into_iter()
.map(|rart| (rart.id, rart))
.collect::<HashMap<_, _>>())
}
/// Block until we get a successful response, or error out if we are shut down
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
async fn validate(
&self,
tenants: Vec<(TenantShardId, Generation)>,
) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
let url = self
.base_url
.join("validate")
.expect("Failed to build validate path");
// When sending validate requests, break them up into chunks so that we
// avoid possible edge cases of generating any HTTP requests that
// require database I/O across many thousands of tenants.
let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
for tenant_chunk in (tenants).chunks(128) {
let request = ValidateRequest {
tenants: tenant_chunk
.iter()
.map(|(id, generation)| ValidateRequestTenant {
id: *id,
r#gen: (*generation).into().expect(
"Generation should always be valid for a Tenant doing deletions",
),
})
.collect(),
};
failpoint_support::sleep_millis_async!(
"control-plane-client-validate-sleep",
&self.cancel
);
if self.cancel.is_cancelled() {
return Err(RetryForeverError::ShuttingDown);
}
let response: ValidateResponse = self.retry_http_forever(&url, request).await?;
for rt in response.tenants {
result.insert(rt.id, rt.valid);
}
}
Ok(result.into_iter().collect())
}
}