mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
storcon: reuse reqwest http client (#11327)
## Problem - Part of https://github.com/neondatabase/neon/issues/11113 - Building a new `reqwest::Client` for every request is expensive because it parses CA certs under the hood. It's noticeable in storcon's flamegraph. ## Summary of changes - Reuse one `reqwest::Client` for all API calls to avoid parsing CA certificates every time.
This commit is contained in:
@@ -56,6 +56,14 @@ impl PageServerNode {
|
|||||||
Certificate::from_pem(&buf).expect("CA certificate should be valid")
|
Certificate::from_pem(&buf).expect("CA certificate should be valid")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let mut http_client = reqwest::Client::builder();
|
||||||
|
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
||||||
|
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||||
|
}
|
||||||
|
let http_client = http_client
|
||||||
|
.build()
|
||||||
|
.expect("Client constructs with no errors");
|
||||||
|
|
||||||
let endpoint = if env.storage_controller.use_https_pageserver_api {
|
let endpoint = if env.storage_controller.use_https_pageserver_api {
|
||||||
format!(
|
format!(
|
||||||
"https://{}",
|
"https://{}",
|
||||||
@@ -72,6 +80,7 @@ impl PageServerNode {
|
|||||||
conf: conf.clone(),
|
conf: conf.clone(),
|
||||||
env: env.clone(),
|
env: env.clone(),
|
||||||
http_client: mgmt_api::Client::new(
|
http_client: mgmt_api::Client::new(
|
||||||
|
http_client,
|
||||||
endpoint,
|
endpoint,
|
||||||
{
|
{
|
||||||
match conf.http_auth_type {
|
match conf.http_auth_type {
|
||||||
@@ -83,9 +92,7 @@ impl PageServerNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
.as_deref(),
|
.as_deref(),
|
||||||
ssl_ca_cert,
|
),
|
||||||
)
|
|
||||||
.expect("Client constructs with no errors"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -395,9 +395,15 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut http_client = reqwest::Client::builder();
|
||||||
|
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
||||||
|
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||||
|
}
|
||||||
|
let http_client = http_client.build()?;
|
||||||
|
|
||||||
let mut trimmed = cli.api.to_string();
|
let mut trimmed = cli.api.to_string();
|
||||||
trimmed.pop();
|
trimmed.pop();
|
||||||
let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref(), ssl_ca_cert)?;
|
let vps_client = mgmt_api::Client::new(http_client, trimmed, cli.jwt.as_deref());
|
||||||
|
|
||||||
match cli.command {
|
match cli.command {
|
||||||
Command::NodeRegister {
|
Command::NodeRegister {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
|
|||||||
use pageserver_api::models::*;
|
use pageserver_api::models::*;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
pub use reqwest::Body as ReqwestBody;
|
pub use reqwest::Body as ReqwestBody;
|
||||||
use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
|
use reqwest::{IntoUrl, Method, StatusCode, Url};
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
@@ -39,8 +39,8 @@ pub enum Error {
|
|||||||
#[error("Cancelled")]
|
#[error("Cancelled")]
|
||||||
Cancelled,
|
Cancelled,
|
||||||
|
|
||||||
#[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())]
|
#[error("request timed out: {0}")]
|
||||||
CreateClient(reqwest::Error),
|
Timeout(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
@@ -72,24 +72,7 @@ pub enum ForceAwaitLogicalSize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Client {
|
impl Client {
|
||||||
pub fn new(
|
pub fn new(client: reqwest::Client, mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
|
||||||
mgmt_api_endpoint: String,
|
|
||||||
jwt: Option<&str>,
|
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let mut http_client = reqwest::Client::builder();
|
|
||||||
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
|
||||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
|
||||||
}
|
|
||||||
let http_client = http_client.build().map_err(Error::CreateClient)?;
|
|
||||||
Ok(Self::from_client(http_client, mgmt_api_endpoint, jwt))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_client(
|
|
||||||
client: reqwest::Client,
|
|
||||||
mgmt_api_endpoint: String,
|
|
||||||
jwt: Option<&str>,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
Self {
|
||||||
mgmt_api_endpoint,
|
mgmt_api_endpoint,
|
||||||
authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
|
authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
|
||||||
|
|||||||
@@ -34,10 +34,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
|||||||
let args: &'static Args = Box::leak(Box::new(args));
|
let args: &'static Args = Box::leak(Box::new(args));
|
||||||
|
|
||||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||||
|
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||||
args.mgmt_api_endpoint.clone(),
|
args.mgmt_api_endpoint.clone(),
|
||||||
args.pageserver_jwt.as_deref(),
|
args.pageserver_jwt.as_deref(),
|
||||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
));
|
||||||
)?);
|
|
||||||
|
|
||||||
// discover targets
|
// discover targets
|
||||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||||
|
|||||||
@@ -75,10 +75,10 @@ async fn main_impl(
|
|||||||
let args: &'static Args = Box::leak(Box::new(args));
|
let args: &'static Args = Box::leak(Box::new(args));
|
||||||
|
|
||||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||||
|
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||||
args.mgmt_api_endpoint.clone(),
|
args.mgmt_api_endpoint.clone(),
|
||||||
args.pageserver_jwt.as_deref(),
|
args.pageserver_jwt.as_deref(),
|
||||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
));
|
||||||
)?);
|
|
||||||
|
|
||||||
// discover targets
|
// discover targets
|
||||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||||
|
|||||||
@@ -123,10 +123,10 @@ async fn main_impl(
|
|||||||
let args: &'static Args = Box::leak(Box::new(args));
|
let args: &'static Args = Box::leak(Box::new(args));
|
||||||
|
|
||||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||||
|
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||||
args.mgmt_api_endpoint.clone(),
|
args.mgmt_api_endpoint.clone(),
|
||||||
args.pageserver_jwt.as_deref(),
|
args.pageserver_jwt.as_deref(),
|
||||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
));
|
||||||
)?);
|
|
||||||
|
|
||||||
if let Some(engine_str) = &args.set_io_engine {
|
if let Some(engine_str) = &args.set_io_engine {
|
||||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||||
|
|||||||
@@ -81,10 +81,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
|||||||
let args: &'static Args = Box::leak(Box::new(args));
|
let args: &'static Args = Box::leak(Box::new(args));
|
||||||
|
|
||||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||||
|
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||||
args.mgmt_api_endpoint.clone(),
|
args.mgmt_api_endpoint.clone(),
|
||||||
args.pageserver_jwt.as_deref(),
|
args.pageserver_jwt.as_deref(),
|
||||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
));
|
||||||
)?);
|
|
||||||
|
|
||||||
if let Some(engine_str) = &args.set_io_engine {
|
if let Some(engine_str) = &args.set_io_engine {
|
||||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||||
|
|||||||
@@ -38,10 +38,10 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
|||||||
let args: &'static Args = Box::leak(Box::new(args));
|
let args: &'static Args = Box::leak(Box::new(args));
|
||||||
|
|
||||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||||
|
reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
|
||||||
args.mgmt_api_endpoint.clone(),
|
args.mgmt_api_endpoint.clone(),
|
||||||
args.pageserver_jwt.as_deref(),
|
args.pageserver_jwt.as_deref(),
|
||||||
None, // TODO: support ssl_ca_file for https APIs in pagebench.
|
));
|
||||||
)?);
|
|
||||||
|
|
||||||
// discover targets
|
// discover targets
|
||||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||||
|
|||||||
@@ -38,9 +38,8 @@ pub enum Error {
|
|||||||
#[error("Cancelled")]
|
#[error("Cancelled")]
|
||||||
Cancelled,
|
Cancelled,
|
||||||
|
|
||||||
/// Failed to create client.
|
#[error("request timed out: {0}")]
|
||||||
#[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())]
|
Timeout(String),
|
||||||
CreateClient(reqwest::Error),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ use futures::StreamExt;
|
|||||||
use futures::stream::FuturesUnordered;
|
use futures::stream::FuturesUnordered;
|
||||||
use pageserver_api::controller_api::{NodeAvailability, SkSchedulingPolicy};
|
use pageserver_api::controller_api::{NodeAvailability, SkSchedulingPolicy};
|
||||||
use pageserver_api::models::PageserverUtilization;
|
use pageserver_api::models::PageserverUtilization;
|
||||||
use reqwest::Certificate;
|
|
||||||
use safekeeper_api::models::SafekeeperUtilization;
|
use safekeeper_api::models::SafekeeperUtilization;
|
||||||
use safekeeper_client::mgmt_api;
|
use safekeeper_client::mgmt_api;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
@@ -27,8 +26,8 @@ struct HeartbeaterTask<Server, State> {
|
|||||||
|
|
||||||
max_offline_interval: Duration,
|
max_offline_interval: Duration,
|
||||||
max_warming_up_interval: Duration,
|
max_warming_up_interval: Duration,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt_token: Option<String>,
|
jwt_token: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -76,8 +75,8 @@ where
|
|||||||
HeartbeaterTask<Server, State>: HeartBeat<Server, State>,
|
HeartbeaterTask<Server, State>: HeartBeat<Server, State>,
|
||||||
{
|
{
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt_token: Option<String>,
|
jwt_token: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
max_offline_interval: Duration,
|
max_offline_interval: Duration,
|
||||||
max_warming_up_interval: Duration,
|
max_warming_up_interval: Duration,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
@@ -86,8 +85,8 @@ where
|
|||||||
tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest<Server, State>>();
|
tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest<Server, State>>();
|
||||||
let mut heartbeater = HeartbeaterTask::new(
|
let mut heartbeater = HeartbeaterTask::new(
|
||||||
receiver,
|
receiver,
|
||||||
|
http_client,
|
||||||
jwt_token,
|
jwt_token,
|
||||||
ssl_ca_cert,
|
|
||||||
max_offline_interval,
|
max_offline_interval,
|
||||||
max_warming_up_interval,
|
max_warming_up_interval,
|
||||||
cancel,
|
cancel,
|
||||||
@@ -122,8 +121,8 @@ where
|
|||||||
{
|
{
|
||||||
fn new(
|
fn new(
|
||||||
receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest<Server, State>>,
|
receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest<Server, State>>,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt_token: Option<String>,
|
jwt_token: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
max_offline_interval: Duration,
|
max_offline_interval: Duration,
|
||||||
max_warming_up_interval: Duration,
|
max_warming_up_interval: Duration,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
@@ -134,8 +133,8 @@ where
|
|||||||
state: HashMap::new(),
|
state: HashMap::new(),
|
||||||
max_offline_interval,
|
max_offline_interval,
|
||||||
max_warming_up_interval,
|
max_warming_up_interval,
|
||||||
|
http_client,
|
||||||
jwt_token,
|
jwt_token,
|
||||||
ssl_ca_cert,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
async fn run(&mut self) {
|
async fn run(&mut self) {
|
||||||
@@ -178,7 +177,7 @@ impl HeartBeat<Node, PageserverState> for HeartbeaterTask<Node, PageserverState>
|
|||||||
let mut heartbeat_futs = FuturesUnordered::new();
|
let mut heartbeat_futs = FuturesUnordered::new();
|
||||||
for (node_id, node) in &*pageservers {
|
for (node_id, node) in &*pageservers {
|
||||||
heartbeat_futs.push({
|
heartbeat_futs.push({
|
||||||
let ssl_ca_cert = self.ssl_ca_cert.clone();
|
let http_client = self.http_client.clone();
|
||||||
let jwt_token = self.jwt_token.clone();
|
let jwt_token = self.jwt_token.clone();
|
||||||
let cancel = self.cancel.clone();
|
let cancel = self.cancel.clone();
|
||||||
|
|
||||||
@@ -193,8 +192,8 @@ impl HeartBeat<Node, PageserverState> for HeartbeaterTask<Node, PageserverState>
|
|||||||
let response = node_clone
|
let response = node_clone
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.get_utilization().await },
|
|client| async move { client.get_utilization().await },
|
||||||
|
&http_client,
|
||||||
&jwt_token,
|
&jwt_token,
|
||||||
&ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
Duration::from_secs(1),
|
Duration::from_secs(1),
|
||||||
@@ -329,19 +328,19 @@ impl HeartBeat<Safekeeper, SafekeeperState> for HeartbeaterTask<Safekeeper, Safe
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
heartbeat_futs.push({
|
heartbeat_futs.push({
|
||||||
|
let http_client = self.http_client.clone();
|
||||||
let jwt_token = self
|
let jwt_token = self
|
||||||
.jwt_token
|
.jwt_token
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|t| SecretString::from(t.to_owned()));
|
.map(|t| SecretString::from(t.to_owned()));
|
||||||
let ssl_ca_cert = self.ssl_ca_cert.clone();
|
|
||||||
let cancel = self.cancel.clone();
|
let cancel = self.cancel.clone();
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
let response = sk
|
let response = sk
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.get_utilization().await },
|
|client| async move { client.get_utilization().await },
|
||||||
|
&http_client,
|
||||||
&jwt_token,
|
&jwt_token,
|
||||||
&ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
Duration::from_secs(1),
|
Duration::from_secs(1),
|
||||||
|
|||||||
@@ -656,11 +656,10 @@ async fn handle_tenant_timeline_passthrough(
|
|||||||
let _timer = latency.start_timer(labels.clone());
|
let _timer = latency.start_timer(labels.clone());
|
||||||
|
|
||||||
let client = mgmt_api::Client::new(
|
let client = mgmt_api::Client::new(
|
||||||
|
service.get_http_client().clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
service.get_config().pageserver_jwt_token.as_deref(),
|
service.get_config().pageserver_jwt_token.as_deref(),
|
||||||
service.get_config().ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
|
||||||
let resp = client.get_raw(path).await.map_err(|e|
|
let resp = client.get_raw(path).await.map_err(|e|
|
||||||
// We return 503 here because if we can't successfully send a request to the pageserver,
|
// We return 503 here because if we can't successfully send a request to the pageserver,
|
||||||
// either we aren't available or the pageserver is unavailable.
|
// either we aren't available or the pageserver is unavailable.
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use pageserver_api::controller_api::{
|
|||||||
};
|
};
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pageserver_client::mgmt_api;
|
use pageserver_client::mgmt_api;
|
||||||
use reqwest::{Certificate, StatusCode};
|
use reqwest::StatusCode;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use utils::backoff;
|
use utils::backoff;
|
||||||
@@ -280,8 +280,8 @@ impl Node {
|
|||||||
pub(crate) async fn with_client_retries<T, O, F>(
|
pub(crate) async fn with_client_retries<T, O, F>(
|
||||||
&self,
|
&self,
|
||||||
mut op: O,
|
mut op: O,
|
||||||
|
http_client: &reqwest::Client,
|
||||||
jwt: &Option<String>,
|
jwt: &Option<String>,
|
||||||
ssl_ca_cert: &Option<Certificate>,
|
|
||||||
warn_threshold: u32,
|
warn_threshold: u32,
|
||||||
max_retries: u32,
|
max_retries: u32,
|
||||||
timeout: Duration,
|
timeout: Duration,
|
||||||
@@ -300,24 +300,13 @@ impl Node {
|
|||||||
| ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
|
| ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
|
||||||
ApiError(_, _) => true,
|
ApiError(_, _) => true,
|
||||||
Cancelled => true,
|
Cancelled => true,
|
||||||
CreateClient(_) => true,
|
Timeout(_) => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: refactor PageserverClient and with_client_retires (#11113).
|
|
||||||
let mut http_client = reqwest::ClientBuilder::new().timeout(timeout);
|
|
||||||
if let Some(ssl_ca_cert) = ssl_ca_cert.as_ref() {
|
|
||||||
http_client = http_client.add_root_certificate(ssl_ca_cert.clone())
|
|
||||||
}
|
|
||||||
|
|
||||||
let http_client = match http_client.build() {
|
|
||||||
Ok(http_client) => http_client,
|
|
||||||
Err(err) => return Some(Err(mgmt_api::Error::CreateClient(err))),
|
|
||||||
};
|
|
||||||
|
|
||||||
backoff::retry(
|
backoff::retry(
|
||||||
|| {
|
|| {
|
||||||
let client = PageserverClient::from_client(
|
let client = PageserverClient::new(
|
||||||
self.get_id(),
|
self.get_id(),
|
||||||
http_client.clone(),
|
http_client.clone(),
|
||||||
self.base_url(),
|
self.base_url(),
|
||||||
@@ -326,11 +315,14 @@ impl Node {
|
|||||||
|
|
||||||
let node_cancel_fut = self.cancel.cancelled();
|
let node_cancel_fut = self.cancel.cancelled();
|
||||||
|
|
||||||
let op_fut = op(client);
|
let op_fut = tokio::time::timeout(timeout, op(client));
|
||||||
|
|
||||||
async {
|
async {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
r = op_fut=> {r},
|
r = op_fut => match r {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => Err(mgmt_api::Error::Timeout(format!("{e}"))),
|
||||||
|
},
|
||||||
_ = node_cancel_fut => {
|
_ = node_cancel_fut => {
|
||||||
Err(mgmt_api::Error::Cancelled)
|
Err(mgmt_api::Error::Cancelled)
|
||||||
}}
|
}}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use pageserver_api::models::{
|
|||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pageserver_client::BlockUnblock;
|
use pageserver_client::BlockUnblock;
|
||||||
use pageserver_client::mgmt_api::{Client, Result};
|
use pageserver_client::mgmt_api::{Client, Result};
|
||||||
use reqwest::{Certificate, StatusCode};
|
use reqwest::StatusCode;
|
||||||
use utils::id::{NodeId, TenantId, TimelineId};
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
|
|
||||||
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
|
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
|
||||||
@@ -47,25 +47,13 @@ macro_rules! measured_request {
|
|||||||
|
|
||||||
impl PageserverClient {
|
impl PageserverClient {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
node_id: NodeId,
|
|
||||||
mgmt_api_endpoint: String,
|
|
||||||
jwt: Option<&str>,
|
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
) -> Result<Self> {
|
|
||||||
Ok(Self {
|
|
||||||
inner: Client::new(mgmt_api_endpoint, jwt, ssl_ca_cert)?,
|
|
||||||
node_id_label: node_id.0.to_string(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn from_client(
|
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
raw_client: reqwest::Client,
|
raw_client: reqwest::Client,
|
||||||
mgmt_api_endpoint: String,
|
mgmt_api_endpoint: String,
|
||||||
jwt: Option<&str>,
|
jwt: Option<&str>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
inner: Client::from_client(raw_client, mgmt_api_endpoint, jwt),
|
inner: Client::new(raw_client, mgmt_api_endpoint, jwt),
|
||||||
node_id_label: node_id.0.to_string(),
|
node_id_label: node_id.0.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -86,6 +86,9 @@ pub(super) struct Reconciler {
|
|||||||
|
|
||||||
/// Access to persistent storage for updating generation numbers
|
/// Access to persistent storage for updating generation numbers
|
||||||
pub(crate) persistence: Arc<Persistence>,
|
pub(crate) persistence: Arc<Persistence>,
|
||||||
|
|
||||||
|
/// HTTP client with proper CA certs.
|
||||||
|
pub(crate) http_client: reqwest::Client,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct ReconcilerConfigBuilder {
|
pub(crate) struct ReconcilerConfigBuilder {
|
||||||
@@ -298,8 +301,8 @@ impl Reconciler {
|
|||||||
.location_config(tenant_shard_id, config.clone(), flush_ms, lazy)
|
.location_config(tenant_shard_id, config.clone(), flush_ms, lazy)
|
||||||
.await
|
.await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.service_config.pageserver_jwt_token,
|
&self.service_config.pageserver_jwt_token,
|
||||||
&self.service_config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
timeout,
|
timeout,
|
||||||
@@ -419,10 +422,10 @@ impl Reconciler {
|
|||||||
|
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.service_config.pageserver_jwt_token.as_deref(),
|
self.service_config.pageserver_jwt_token.as_deref(),
|
||||||
self.service_config.ssl_ca_cert.clone(),
|
);
|
||||||
)?;
|
|
||||||
|
|
||||||
client
|
client
|
||||||
.wait_lsn(
|
.wait_lsn(
|
||||||
@@ -443,10 +446,10 @@ impl Reconciler {
|
|||||||
) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
|
) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.service_config.pageserver_jwt_token.as_deref(),
|
self.service_config.pageserver_jwt_token.as_deref(),
|
||||||
self.service_config.ssl_ca_cert.clone(),
|
);
|
||||||
)?;
|
|
||||||
|
|
||||||
let timelines = client.timeline_list(&tenant_shard_id).await?;
|
let timelines = client.timeline_list(&tenant_shard_id).await?;
|
||||||
Ok(timelines
|
Ok(timelines
|
||||||
@@ -483,8 +486,8 @@ impl Reconciler {
|
|||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.service_config.pageserver_jwt_token,
|
&self.service_config.pageserver_jwt_token,
|
||||||
&self.service_config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
request_download_timeout * 2,
|
request_download_timeout * 2,
|
||||||
@@ -778,8 +781,8 @@ impl Reconciler {
|
|||||||
let observed_conf = match attached_node
|
let observed_conf = match attached_node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.get_location_config(tenant_shard_id).await },
|
|client| async move { client.get_location_config(tenant_shard_id).await },
|
||||||
|
&self.http_client,
|
||||||
&self.service_config.pageserver_jwt_token,
|
&self.service_config.pageserver_jwt_token,
|
||||||
&self.service_config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
Duration::from_secs(5),
|
Duration::from_secs(5),
|
||||||
@@ -1127,8 +1130,8 @@ impl Reconciler {
|
|||||||
match origin
|
match origin
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.get_location_config(tenant_shard_id).await },
|
|client| async move { client.get_location_config(tenant_shard_id).await },
|
||||||
|
&self.http_client,
|
||||||
&self.service_config.pageserver_jwt_token,
|
&self.service_config.pageserver_jwt_token,
|
||||||
&self.service_config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
Duration::from_secs(5),
|
Duration::from_secs(5),
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy};
|
use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy};
|
||||||
use reqwest::{Certificate, StatusCode};
|
use reqwest::StatusCode;
|
||||||
use safekeeper_client::mgmt_api;
|
use safekeeper_client::mgmt_api;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use utils::backoff;
|
use utils::backoff;
|
||||||
@@ -94,8 +94,8 @@ impl Safekeeper {
|
|||||||
pub(crate) async fn with_client_retries<T, O, F>(
|
pub(crate) async fn with_client_retries<T, O, F>(
|
||||||
&self,
|
&self,
|
||||||
mut op: O,
|
mut op: O,
|
||||||
|
http_client: &reqwest::Client,
|
||||||
jwt: &Option<SecretString>,
|
jwt: &Option<SecretString>,
|
||||||
ssl_ca_cert: &Option<Certificate>,
|
|
||||||
warn_threshold: u32,
|
warn_threshold: u32,
|
||||||
max_retries: u32,
|
max_retries: u32,
|
||||||
timeout: Duration,
|
timeout: Duration,
|
||||||
@@ -114,17 +114,10 @@ impl Safekeeper {
|
|||||||
| ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
|
| ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
|
||||||
ApiError(_, _) => true,
|
ApiError(_, _) => true,
|
||||||
Cancelled => true,
|
Cancelled => true,
|
||||||
CreateClient(_) => true,
|
Timeout(_) => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: refactor SafekeeperClient and with_client_retires (#11113).
|
|
||||||
let mut http_client = reqwest::Client::builder().timeout(timeout);
|
|
||||||
if let Some(ssl_ca_cert) = ssl_ca_cert.as_ref() {
|
|
||||||
http_client = http_client.add_root_certificate(ssl_ca_cert.clone());
|
|
||||||
}
|
|
||||||
let http_client = http_client.build().map_err(mgmt_api::Error::CreateClient)?;
|
|
||||||
|
|
||||||
backoff::retry(
|
backoff::retry(
|
||||||
|| {
|
|| {
|
||||||
let client = SafekeeperClient::new(
|
let client = SafekeeperClient::new(
|
||||||
@@ -136,11 +129,14 @@ impl Safekeeper {
|
|||||||
|
|
||||||
let node_cancel_fut = self.cancel.cancelled();
|
let node_cancel_fut = self.cancel.cancelled();
|
||||||
|
|
||||||
let op_fut = op(client);
|
let op_fut = tokio::time::timeout(timeout, op(client));
|
||||||
|
|
||||||
async {
|
async {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
r = op_fut=> {r},
|
r = op_fut => match r {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => Err(mgmt_api::Error::Timeout(format!("{e}"))),
|
||||||
|
},
|
||||||
_ = node_cancel_fut => {
|
_ = node_cancel_fut => {
|
||||||
Err(mgmt_api::Error::Cancelled)
|
Err(mgmt_api::Error::Cancelled)
|
||||||
}}
|
}}
|
||||||
|
|||||||
@@ -267,7 +267,7 @@ fn passthrough_api_error(node: &Node, e: mgmt_api::Error) -> ApiError {
|
|||||||
ApiError::Conflict(format!("{node} {status}: {status} {msg}"))
|
ApiError::Conflict(format!("{node} {status}: {status} {msg}"))
|
||||||
}
|
}
|
||||||
mgmt_api::Error::Cancelled => ApiError::ShuttingDown,
|
mgmt_api::Error::Cancelled => ApiError::ShuttingDown,
|
||||||
mgmt_api::Error::CreateClient(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
|
mgmt_api::Error::Timeout(e) => ApiError::Timeout(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -524,6 +524,9 @@ pub struct Service {
|
|||||||
/// This waits for initial reconciliation with pageservers to complete. Until this barrier
|
/// This waits for initial reconciliation with pageservers to complete. Until this barrier
|
||||||
/// passes, it isn't safe to do any actions that mutate tenants.
|
/// passes, it isn't safe to do any actions that mutate tenants.
|
||||||
pub(crate) startup_complete: Barrier,
|
pub(crate) startup_complete: Barrier,
|
||||||
|
|
||||||
|
/// HTTP client with proper CA certs.
|
||||||
|
http_client: reqwest::Client,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ReconcileWaitError> for ApiError {
|
impl From<ReconcileWaitError> for ApiError {
|
||||||
@@ -667,6 +670,10 @@ impl Service {
|
|||||||
&self.config
|
&self.config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_http_client(&self) -> &reqwest::Client {
|
||||||
|
&self.http_client
|
||||||
|
}
|
||||||
|
|
||||||
/// Called once on startup, this function attempts to contact all pageservers to build an up-to-date
|
/// Called once on startup, this function attempts to contact all pageservers to build an up-to-date
|
||||||
/// view of the world, and determine which pageservers are responsive.
|
/// view of the world, and determine which pageservers are responsive.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
@@ -965,8 +972,8 @@ impl Service {
|
|||||||
let response = node
|
let response = node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.list_location_config().await },
|
|client| async move { client.list_location_config().await },
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
5,
|
5,
|
||||||
timeout,
|
timeout,
|
||||||
@@ -1064,20 +1071,12 @@ impl Service {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let client = match PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.config.pageserver_jwt_token.as_deref(),
|
self.config.pageserver_jwt_token.as_deref(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
);
|
||||||
) {
|
|
||||||
Ok(client) => client,
|
|
||||||
Err(e) => {
|
|
||||||
tracing::error!(
|
|
||||||
"Failed to create client to detach unknown shard {tenant_shard_id} on pageserver {node_id}: {e}"
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
match client
|
match client
|
||||||
.location_config(
|
.location_config(
|
||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
@@ -1655,17 +1654,36 @@ impl Service {
|
|||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
let reconcilers_cancel = cancel.child_token();
|
let reconcilers_cancel = cancel.child_token();
|
||||||
|
|
||||||
|
let mut http_client = reqwest::Client::builder();
|
||||||
|
// We intentionally disable the connection pool, so every request will create its own TCP connection.
|
||||||
|
// It's especially important for heartbeaters to notice more network problems.
|
||||||
|
//
|
||||||
|
// TODO: It makes sense to use this client only in heartbeaters and create a second one with
|
||||||
|
// connection pooling for everything else. But reqwest::Client may create a connection without
|
||||||
|
// ever using it (it uses hyper's Client under the hood):
|
||||||
|
// https://github.com/hyperium/hyper-util/blob/d51318df3461d40e5f5e5ca163cb3905ac960209/src/client/legacy/client.rs#L415
|
||||||
|
//
|
||||||
|
// Because of a bug in hyper0::Connection::graceful_shutdown such connections hang during
|
||||||
|
// graceful server shutdown: https://github.com/hyperium/hyper/issues/2730
|
||||||
|
//
|
||||||
|
// The bug has been fixed in hyper v1, so keep alive may be enabled only after we migrate to hyper1.
|
||||||
|
http_client = http_client.pool_max_idle_per_host(0);
|
||||||
|
if let Some(ssl_ca_cert) = &config.ssl_ca_cert {
|
||||||
|
http_client = http_client.add_root_certificate(ssl_ca_cert.clone());
|
||||||
|
}
|
||||||
|
let http_client = http_client.build()?;
|
||||||
|
|
||||||
let heartbeater_ps = Heartbeater::new(
|
let heartbeater_ps = Heartbeater::new(
|
||||||
|
http_client.clone(),
|
||||||
config.pageserver_jwt_token.clone(),
|
config.pageserver_jwt_token.clone(),
|
||||||
config.ssl_ca_cert.clone(),
|
|
||||||
config.max_offline_interval,
|
config.max_offline_interval,
|
||||||
config.max_warming_up_interval,
|
config.max_warming_up_interval,
|
||||||
cancel.clone(),
|
cancel.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let heartbeater_sk = Heartbeater::new(
|
let heartbeater_sk = Heartbeater::new(
|
||||||
|
http_client.clone(),
|
||||||
config.safekeeper_jwt_token.clone(),
|
config.safekeeper_jwt_token.clone(),
|
||||||
config.ssl_ca_cert.clone(),
|
|
||||||
config.max_offline_interval,
|
config.max_offline_interval,
|
||||||
config.max_warming_up_interval,
|
config.max_warming_up_interval,
|
||||||
cancel.clone(),
|
cancel.clone(),
|
||||||
@@ -1708,6 +1726,7 @@ impl Service {
|
|||||||
reconcilers_gate: Gate::default(),
|
reconcilers_gate: Gate::default(),
|
||||||
tenant_op_locks: Default::default(),
|
tenant_op_locks: Default::default(),
|
||||||
node_op_locks: Default::default(),
|
node_op_locks: Default::default(),
|
||||||
|
http_client,
|
||||||
});
|
});
|
||||||
|
|
||||||
let result_task_this = this.clone();
|
let result_task_this = this.clone();
|
||||||
@@ -2013,8 +2032,8 @@ impl Service {
|
|||||||
let configs = match node
|
let configs = match node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.list_location_config().await },
|
|client| async move { client.list_location_config().await },
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
5,
|
5,
|
||||||
SHORT_RECONCILE_TIMEOUT,
|
SHORT_RECONCILE_TIMEOUT,
|
||||||
@@ -2092,8 +2111,8 @@ impl Service {
|
|||||||
.location_config(tenant_shard_id, config, None, false)
|
.location_config(tenant_shard_id, config, None, false)
|
||||||
.await
|
.await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
5,
|
5,
|
||||||
SHORT_RECONCILE_TIMEOUT,
|
SHORT_RECONCILE_TIMEOUT,
|
||||||
@@ -3235,11 +3254,10 @@ impl Service {
|
|||||||
for tenant_shard_id in shard_ids {
|
for tenant_shard_id in shard_ids {
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.config.pageserver_jwt_token.as_deref(),
|
self.config.pageserver_jwt_token.as_deref(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
|
|
||||||
tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
|
tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
|
||||||
|
|
||||||
@@ -3298,11 +3316,10 @@ impl Service {
|
|||||||
for (tenant_shard_id, node) in targets {
|
for (tenant_shard_id, node) in targets {
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.config.pageserver_jwt_token.as_deref(),
|
self.config.pageserver_jwt_token.as_deref(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
futs.push(async move {
|
futs.push(async move {
|
||||||
let result = client
|
let result = client
|
||||||
.tenant_secondary_download(tenant_shard_id, wait)
|
.tenant_secondary_download(tenant_shard_id, wait)
|
||||||
@@ -3427,8 +3444,8 @@ impl Service {
|
|||||||
.tenant_delete(TenantShardId::unsharded(tenant_id))
|
.tenant_delete(TenantShardId::unsharded(tenant_id))
|
||||||
.await
|
.await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
RECONCILE_TIMEOUT,
|
RECONCILE_TIMEOUT,
|
||||||
@@ -3580,8 +3597,8 @@ impl Service {
|
|||||||
async fn create_one(
|
async fn create_one(
|
||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
locations: ShardMutationLocations,
|
locations: ShardMutationLocations,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt: Option<String>,
|
jwt: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
create_req: TimelineCreateRequest,
|
create_req: TimelineCreateRequest,
|
||||||
) -> Result<TimelineInfo, ApiError> {
|
) -> Result<TimelineInfo, ApiError> {
|
||||||
let latest = locations.latest.node;
|
let latest = locations.latest.node;
|
||||||
@@ -3594,8 +3611,7 @@ impl Service {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let client =
|
let client =
|
||||||
PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref(), ssl_ca_cert.clone())
|
PageserverClient::new(latest.get_id(), http_client.clone(), latest.base_url(), jwt.as_deref());
|
||||||
.map_err(|e| passthrough_api_error(&latest, e))?;
|
|
||||||
|
|
||||||
let timeline_info = client
|
let timeline_info = client
|
||||||
.timeline_create(tenant_shard_id, &create_req)
|
.timeline_create(tenant_shard_id, &create_req)
|
||||||
@@ -3616,11 +3632,10 @@ impl Service {
|
|||||||
|
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
location.node.get_id(),
|
location.node.get_id(),
|
||||||
|
http_client.clone(),
|
||||||
location.node.base_url(),
|
location.node.base_url(),
|
||||||
jwt.as_deref(),
|
jwt.as_deref(),
|
||||||
ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(&location.node, e))?;
|
|
||||||
|
|
||||||
let res = client
|
let res = client
|
||||||
.timeline_create(tenant_shard_id, &create_req)
|
.timeline_create(tenant_shard_id, &create_req)
|
||||||
@@ -3648,8 +3663,8 @@ impl Service {
|
|||||||
let timeline_info = create_one(
|
let timeline_info = create_one(
|
||||||
shard_zero_tid,
|
shard_zero_tid,
|
||||||
shard_zero_locations,
|
shard_zero_locations,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
create_req.clone(),
|
create_req.clone(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
@@ -3678,8 +3693,8 @@ impl Service {
|
|||||||
Box::pin(create_one(
|
Box::pin(create_one(
|
||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
mutation_locations,
|
mutation_locations,
|
||||||
|
self.http_client.clone(),
|
||||||
jwt.clone(),
|
jwt.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
create_req,
|
create_req,
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
@@ -3762,16 +3777,15 @@ impl Service {
|
|||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
node: Node,
|
node: Node,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt: Option<String>,
|
jwt: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
req: TimelineArchivalConfigRequest,
|
req: TimelineArchivalConfigRequest,
|
||||||
) -> Result<(), ApiError> {
|
) -> Result<(), ApiError> {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Setting archival config of timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
"Setting archival config of timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
||||||
);
|
);
|
||||||
|
|
||||||
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert)
|
let client = PageserverClient::new(node.get_id(), http_client, node.base_url(), jwt.as_deref());
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
|
|
||||||
client
|
client
|
||||||
.timeline_archival_config(tenant_shard_id, timeline_id, &req)
|
.timeline_archival_config(tenant_shard_id, timeline_id, &req)
|
||||||
@@ -3793,8 +3807,8 @@ impl Service {
|
|||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
node,
|
node,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
req.clone(),
|
req.clone(),
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
@@ -3831,16 +3845,15 @@ impl Service {
|
|||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
node: Node,
|
node: Node,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt: Option<String>,
|
jwt: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
behavior: Option<DetachBehavior>,
|
behavior: Option<DetachBehavior>,
|
||||||
) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> {
|
) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
"Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
||||||
);
|
);
|
||||||
|
|
||||||
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert)
|
let client = PageserverClient::new(node.get_id(), http_client, node.base_url(), jwt.as_deref());
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
|
|
||||||
client
|
client
|
||||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
|
.timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
|
||||||
@@ -3879,8 +3892,8 @@ impl Service {
|
|||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
node,
|
node,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
behavior,
|
behavior,
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
@@ -3933,17 +3946,16 @@ impl Service {
|
|||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
node: Node,
|
node: Node,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt: Option<String>,
|
jwt: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
dir: BlockUnblock,
|
dir: BlockUnblock,
|
||||||
) -> Result<(), ApiError> {
|
) -> Result<(), ApiError> {
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
http_client,
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
jwt.as_deref(),
|
jwt.as_deref(),
|
||||||
ssl_ca_cert,
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
|
|
||||||
client
|
client
|
||||||
.timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
|
.timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
|
||||||
@@ -3962,8 +3974,8 @@ impl Service {
|
|||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
node,
|
node,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
dir,
|
dir,
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
@@ -4091,8 +4103,8 @@ impl Service {
|
|||||||
let r = node
|
let r = node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| op(tenant_shard_id, client),
|
|client| op(tenant_shard_id, client),
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
warn_threshold,
|
warn_threshold,
|
||||||
max_retries,
|
max_retries,
|
||||||
timeout,
|
timeout,
|
||||||
@@ -4316,15 +4328,14 @@ impl Service {
|
|||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
node: Node,
|
node: Node,
|
||||||
|
http_client: reqwest::Client,
|
||||||
jwt: Option<String>,
|
jwt: Option<String>,
|
||||||
ssl_ca_cert: Option<Certificate>,
|
|
||||||
) -> Result<StatusCode, ApiError> {
|
) -> Result<StatusCode, ApiError> {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Deleting timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
"Deleting timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
||||||
);
|
);
|
||||||
|
|
||||||
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert)
|
let client = PageserverClient::new(node.get_id(), http_client, node.base_url(), jwt.as_deref());
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
let res = client
|
let res = client
|
||||||
.timeline_delete(tenant_shard_id, timeline_id)
|
.timeline_delete(tenant_shard_id, timeline_id)
|
||||||
.await;
|
.await;
|
||||||
@@ -4350,8 +4361,8 @@ impl Service {
|
|||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
node,
|
node,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
@@ -4373,8 +4384,8 @@ impl Service {
|
|||||||
shard_zero_tid,
|
shard_zero_tid,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
shard_zero_locations.latest.node,
|
shard_zero_locations.latest.node,
|
||||||
|
self.http_client.clone(),
|
||||||
self.config.pageserver_jwt_token.clone(),
|
self.config.pageserver_jwt_token.clone(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(shard_zero_status)
|
Ok(shard_zero_status)
|
||||||
@@ -4809,8 +4820,8 @@ impl Service {
|
|||||||
|
|
||||||
client.location_config(child_id, config, None, false).await
|
client.location_config(child_id, config, None, false).await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
10,
|
10,
|
||||||
Duration::from_secs(5),
|
Duration::from_secs(5),
|
||||||
@@ -5412,11 +5423,10 @@ impl Service {
|
|||||||
} = target;
|
} = target;
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.config.pageserver_jwt_token.as_deref(),
|
self.config.pageserver_jwt_token.as_deref(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(node, e))?;
|
|
||||||
let response = client
|
let response = client
|
||||||
.tenant_shard_split(
|
.tenant_shard_split(
|
||||||
*parent_id,
|
*parent_id,
|
||||||
@@ -5900,11 +5910,10 @@ impl Service {
|
|||||||
|
|
||||||
let client = PageserverClient::new(
|
let client = PageserverClient::new(
|
||||||
node.get_id(),
|
node.get_id(),
|
||||||
|
self.http_client.clone(),
|
||||||
node.base_url(),
|
node.base_url(),
|
||||||
self.config.pageserver_jwt_token.as_deref(),
|
self.config.pageserver_jwt_token.as_deref(),
|
||||||
self.config.ssl_ca_cert.clone(),
|
);
|
||||||
)
|
|
||||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
|
||||||
|
|
||||||
let scan_result = client
|
let scan_result = client
|
||||||
.tenant_scan_remote_storage(tenant_id)
|
.tenant_scan_remote_storage(tenant_id)
|
||||||
@@ -7138,6 +7147,7 @@ impl Service {
|
|||||||
units,
|
units,
|
||||||
gate_guard,
|
gate_guard,
|
||||||
&self.reconcilers_cancel,
|
&self.reconcilers_cancel,
|
||||||
|
self.http_client.clone(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7545,8 +7555,8 @@ impl Service {
|
|||||||
match attached_node
|
match attached_node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.tenant_heatmap_upload(tenant_shard_id).await },
|
|client| async move { client.tenant_heatmap_upload(tenant_shard_id).await },
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
10,
|
10,
|
||||||
SHORT_RECONCILE_TIMEOUT,
|
SHORT_RECONCILE_TIMEOUT,
|
||||||
@@ -7582,8 +7592,8 @@ impl Service {
|
|||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
},
|
},
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
10,
|
10,
|
||||||
SHORT_RECONCILE_TIMEOUT,
|
SHORT_RECONCILE_TIMEOUT,
|
||||||
@@ -7856,8 +7866,8 @@ impl Service {
|
|||||||
futures.push(async move {
|
futures.push(async move {
|
||||||
node.with_client_retries(
|
node.with_client_retries(
|
||||||
|client| async move { client.top_tenant_shards(request.clone()).await },
|
|client| async move { client.top_tenant_shards(request.clone()).await },
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
Duration::from_secs(5),
|
Duration::from_secs(5),
|
||||||
@@ -7976,8 +7986,8 @@ impl Service {
|
|||||||
match node
|
match node
|
||||||
.with_client_retries(
|
.with_client_retries(
|
||||||
|client| async move { client.tenant_secondary_status(tenant_shard_id).await },
|
|client| async move { client.tenant_secondary_status(tenant_shard_id).await },
|
||||||
|
&self.http_client,
|
||||||
&self.config.pageserver_jwt_token,
|
&self.config.pageserver_jwt_token,
|
||||||
&self.config.ssl_ca_cert,
|
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
Duration::from_millis(250),
|
Duration::from_millis(250),
|
||||||
|
|||||||
@@ -338,7 +338,6 @@ impl SafekeeperReconciler {
|
|||||||
.safekeeper_jwt_token
|
.safekeeper_jwt_token
|
||||||
.clone()
|
.clone()
|
||||||
.map(SecretString::from);
|
.map(SecretString::from);
|
||||||
let ssl_ca_cert = self.service.config.ssl_ca_cert.clone();
|
|
||||||
loop {
|
loop {
|
||||||
let res = req
|
let res = req
|
||||||
.safekeeper
|
.safekeeper
|
||||||
@@ -347,8 +346,8 @@ impl SafekeeperReconciler {
|
|||||||
let closure = &closure;
|
let closure = &closure;
|
||||||
async move { closure(client).await }
|
async move { closure(client).await }
|
||||||
},
|
},
|
||||||
|
self.service.get_http_client(),
|
||||||
&jwt,
|
&jwt,
|
||||||
&ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
10,
|
10,
|
||||||
Duration::from_secs(10),
|
Duration::from_secs(10),
|
||||||
|
|||||||
@@ -78,8 +78,8 @@ impl Service {
|
|||||||
for sk in timeline_persistence.sk_set.iter() {
|
for sk in timeline_persistence.sk_set.iter() {
|
||||||
let sk_id = NodeId(*sk as u64);
|
let sk_id = NodeId(*sk as u64);
|
||||||
let safekeepers = safekeepers.clone();
|
let safekeepers = safekeepers.clone();
|
||||||
|
let http_client = self.http_client.clone();
|
||||||
let jwt = jwt.clone();
|
let jwt = jwt.clone();
|
||||||
let ssl_ca_cert = self.config.ssl_ca_cert.clone();
|
|
||||||
let req = req.clone();
|
let req = req.clone();
|
||||||
joinset.spawn(async move {
|
joinset.spawn(async move {
|
||||||
// Unwrap is fine as we already would have returned error above
|
// Unwrap is fine as we already would have returned error above
|
||||||
@@ -90,8 +90,8 @@ impl Service {
|
|||||||
let req = req.clone();
|
let req = req.clone();
|
||||||
async move { client.create_timeline(&req).await }
|
async move { client.create_timeline(&req).await }
|
||||||
},
|
},
|
||||||
|
&http_client,
|
||||||
&jwt,
|
&jwt,
|
||||||
&ssl_ca_cert,
|
|
||||||
3,
|
3,
|
||||||
3,
|
3,
|
||||||
SK_CREATE_TIMELINE_RECONCILE_TIMEOUT,
|
SK_CREATE_TIMELINE_RECONCILE_TIMEOUT,
|
||||||
|
|||||||
@@ -1588,6 +1588,7 @@ impl TenantShard {
|
|||||||
units: ReconcileUnits,
|
units: ReconcileUnits,
|
||||||
gate_guard: GateGuard,
|
gate_guard: GateGuard,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
|
http_client: reqwest::Client,
|
||||||
) -> Option<ReconcilerWaiter> {
|
) -> Option<ReconcilerWaiter> {
|
||||||
// Reconcile in flight for a stale sequence? Our sequence's task will wait for it before
|
// Reconcile in flight for a stale sequence? Our sequence's task will wait for it before
|
||||||
// doing our sequence's work.
|
// doing our sequence's work.
|
||||||
@@ -1633,6 +1634,7 @@ impl TenantShard {
|
|||||||
cancel: reconciler_cancel.clone(),
|
cancel: reconciler_cancel.clone(),
|
||||||
persistence: persistence.clone(),
|
persistence: persistence.clone(),
|
||||||
compute_notify_failure: false,
|
compute_notify_failure: false,
|
||||||
|
http_client,
|
||||||
};
|
};
|
||||||
|
|
||||||
let reconcile_seq = self.sequence;
|
let reconcile_seq = self.sequence;
|
||||||
|
|||||||
@@ -1599,6 +1599,12 @@ def test_storage_controller_heartbeats(
|
|||||||
env.storage_controller.allowed_errors.append(
|
env.storage_controller.allowed_errors.append(
|
||||||
".*Call to node.*management API.*failed.*failpoint.*"
|
".*Call to node.*management API.*failed.*failpoint.*"
|
||||||
)
|
)
|
||||||
|
# The server starts listening to the socket before sending re-attach request,
|
||||||
|
# but it starts serving HTTP only when re-attach is completed.
|
||||||
|
# If re-attach is slow (last scenario), storcon's heartbeat requests will time out.
|
||||||
|
env.storage_controller.allowed_errors.append(
|
||||||
|
".*Call to node.*management API.*failed.* Timeout.*"
|
||||||
|
)
|
||||||
|
|
||||||
# Initially we have two online pageservers
|
# Initially we have two online pageservers
|
||||||
nodes = env.storage_controller.node_list()
|
nodes = env.storage_controller.node_list()
|
||||||
|
|||||||
Reference in New Issue
Block a user