mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
storcon: infrastructure for safekeeper specific JWT tokens (#10905)
Safekeepers only respond to requests with the per-token scope, or the `safekeeperdata` JWT scope. Therefore, add infrastructure in the storage controller for safekeeper JWTs. Also, rename the ambiguous `jwt_token` to `pageserver_jwt_token`. Part of #9011 Related: https://github.com/neondatabase/cloud/issues/24727
This commit is contained in:
@@ -598,7 +598,10 @@ async fn handle_tenant_timeline_passthrough(
|
||||
|
||||
let _timer = latency.start_timer(labels.clone());
|
||||
|
||||
let client = mgmt_api::Client::new(node.base_url(), service.get_config().jwt_token.as_deref());
|
||||
let client = mgmt_api::Client::new(
|
||||
node.base_url(),
|
||||
service.get_config().pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
let resp = client.get_raw(path).await.map_err(|e|
|
||||
// We return 503 here because if we can't successfully send a request to the pageserver,
|
||||
// either we aren't available or the pageserver is unavailable.
|
||||
|
||||
@@ -53,6 +53,10 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
jwt_token: Option<String>,
|
||||
|
||||
/// Token for authenticating this service with the safekeepers it controls
|
||||
#[arg(long)]
|
||||
safekeeper_jwt_token: Option<String>,
|
||||
|
||||
/// Token for authenticating this service with the control plane, when calling
|
||||
/// the compute notification endpoint
|
||||
#[arg(long)]
|
||||
@@ -153,7 +157,8 @@ impl Default for StrictMode {
|
||||
struct Secrets {
|
||||
database_url: String,
|
||||
public_key: Option<JwtAuth>,
|
||||
jwt_token: Option<String>,
|
||||
pageserver_jwt_token: Option<String>,
|
||||
safekeeper_jwt_token: Option<String>,
|
||||
control_plane_jwt_token: Option<String>,
|
||||
peer_jwt_token: Option<String>,
|
||||
}
|
||||
@@ -161,6 +166,7 @@ struct Secrets {
|
||||
impl Secrets {
|
||||
const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
|
||||
const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
|
||||
const SAFEKEEPER_JWT_TOKEN_ENV: &'static str = "SAFEKEEPER_JWT_TOKEN";
|
||||
const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
|
||||
const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN";
|
||||
const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY";
|
||||
@@ -184,7 +190,14 @@ impl Secrets {
|
||||
let this = Self {
|
||||
database_url,
|
||||
public_key,
|
||||
jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV),
|
||||
pageserver_jwt_token: Self::load_secret(
|
||||
&args.jwt_token,
|
||||
Self::PAGESERVER_JWT_TOKEN_ENV,
|
||||
),
|
||||
safekeeper_jwt_token: Self::load_secret(
|
||||
&args.safekeeper_jwt_token,
|
||||
Self::SAFEKEEPER_JWT_TOKEN_ENV,
|
||||
),
|
||||
control_plane_jwt_token: Self::load_secret(
|
||||
&args.control_plane_jwt_token,
|
||||
Self::CONTROL_PLANE_JWT_TOKEN_ENV,
|
||||
@@ -264,11 +277,17 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
|
||||
let secrets = Secrets::load(&args).await?;
|
||||
|
||||
// TODO: once we've rolled out the safekeeper JWT token everywhere, put it into the validation code below
|
||||
tracing::info!(
|
||||
"safekeeper_jwt_token set: {:?}",
|
||||
secrets.safekeeper_jwt_token.is_some()
|
||||
);
|
||||
|
||||
// Validate required secrets and arguments are provided in strict mode
|
||||
match strict_mode {
|
||||
StrictMode::Strict
|
||||
if (secrets.public_key.is_none()
|
||||
|| secrets.jwt_token.is_none()
|
||||
|| secrets.pageserver_jwt_token.is_none()
|
||||
|| secrets.control_plane_jwt_token.is_none()) =>
|
||||
{
|
||||
// Production systems should always have secrets configured: if public_key was not set
|
||||
@@ -293,7 +312,8 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
let config = Config {
|
||||
jwt_token: secrets.jwt_token,
|
||||
pageserver_jwt_token: secrets.pageserver_jwt_token,
|
||||
safekeeper_jwt_token: secrets.safekeeper_jwt_token,
|
||||
control_plane_jwt_token: secrets.control_plane_jwt_token,
|
||||
peer_jwt_token: secrets.peer_jwt_token,
|
||||
compute_hook_url: args.compute_hook_url,
|
||||
|
||||
@@ -296,7 +296,7 @@ impl Reconciler {
|
||||
.location_config(tenant_shard_id, config.clone(), flush_ms, lazy)
|
||||
.await
|
||||
},
|
||||
&self.service_config.jwt_token,
|
||||
&self.service_config.pageserver_jwt_token,
|
||||
1,
|
||||
3,
|
||||
timeout,
|
||||
@@ -417,7 +417,7 @@ impl Reconciler {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.service_config.jwt_token.as_deref(),
|
||||
self.service_config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
|
||||
client
|
||||
@@ -440,7 +440,7 @@ impl Reconciler {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.service_config.jwt_token.as_deref(),
|
||||
self.service_config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
|
||||
let timelines = client.timeline_list(&tenant_shard_id).await?;
|
||||
@@ -478,7 +478,7 @@ impl Reconciler {
|
||||
)
|
||||
.await
|
||||
},
|
||||
&self.service_config.jwt_token,
|
||||
&self.service_config.pageserver_jwt_token,
|
||||
1,
|
||||
3,
|
||||
request_download_timeout * 2,
|
||||
@@ -771,7 +771,7 @@ impl Reconciler {
|
||||
let observed_conf = match attached_node
|
||||
.with_client_retries(
|
||||
|client| async move { client.get_location_config(tenant_shard_id).await },
|
||||
&self.service_config.jwt_token,
|
||||
&self.service_config.pageserver_jwt_token,
|
||||
1,
|
||||
1,
|
||||
Duration::from_secs(5),
|
||||
@@ -1099,7 +1099,7 @@ impl Reconciler {
|
||||
match origin
|
||||
.with_client_retries(
|
||||
|client| async move { client.get_location_config(tenant_shard_id).await },
|
||||
&self.service_config.jwt_token,
|
||||
&self.service_config.pageserver_jwt_token,
|
||||
1,
|
||||
3,
|
||||
Duration::from_secs(5),
|
||||
|
||||
@@ -348,7 +348,12 @@ pub struct Config {
|
||||
// All pageservers managed by one instance of this service must have
|
||||
// the same public key. This JWT token will be used to authenticate
|
||||
// this service to the pageservers it manages.
|
||||
pub jwt_token: Option<String>,
|
||||
pub pageserver_jwt_token: Option<String>,
|
||||
|
||||
// All safekeepers managed by one instance of this service must have
|
||||
// the same public key. This JWT token will be used to authenticate
|
||||
// this service to the safekeepers it manages.
|
||||
pub safekeeper_jwt_token: Option<String>,
|
||||
|
||||
// This JWT token will be used to authenticate this service to the control plane.
|
||||
pub control_plane_jwt_token: Option<String>,
|
||||
@@ -882,7 +887,7 @@ impl Service {
|
||||
let response = node
|
||||
.with_client_retries(
|
||||
|client| async move { client.list_location_config().await },
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
5,
|
||||
timeout,
|
||||
@@ -983,7 +988,7 @@ impl Service {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.config.jwt_token.as_deref(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
match client
|
||||
.location_config(
|
||||
@@ -1553,14 +1558,14 @@ impl Service {
|
||||
let reconcilers_cancel = cancel.child_token();
|
||||
|
||||
let heartbeater_ps = Heartbeater::new(
|
||||
config.jwt_token.clone(),
|
||||
config.pageserver_jwt_token.clone(),
|
||||
config.max_offline_interval,
|
||||
config.max_warming_up_interval,
|
||||
cancel.clone(),
|
||||
);
|
||||
|
||||
let heartbeater_sk = Heartbeater::new(
|
||||
config.jwt_token.clone(),
|
||||
config.safekeeper_jwt_token.clone(),
|
||||
config.max_offline_interval,
|
||||
config.max_warming_up_interval,
|
||||
cancel.clone(),
|
||||
@@ -1907,7 +1912,7 @@ impl Service {
|
||||
let configs = match node
|
||||
.with_client_retries(
|
||||
|client| async move { client.list_location_config().await },
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
5,
|
||||
SHORT_RECONCILE_TIMEOUT,
|
||||
@@ -1965,7 +1970,7 @@ impl Service {
|
||||
.location_config(tenant_shard_id, config, None, false)
|
||||
.await
|
||||
},
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
5,
|
||||
SHORT_RECONCILE_TIMEOUT,
|
||||
@@ -3100,7 +3105,7 @@ impl Service {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.config.jwt_token.as_deref(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
|
||||
tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
|
||||
@@ -3161,7 +3166,7 @@ impl Service {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.config.jwt_token.as_deref(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
futs.push(async move {
|
||||
let result = client
|
||||
@@ -3284,7 +3289,7 @@ impl Service {
|
||||
.tenant_delete(TenantShardId::unsharded(tenant_id))
|
||||
.await
|
||||
},
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
3,
|
||||
RECONCILE_TIMEOUT,
|
||||
@@ -3503,7 +3508,7 @@ impl Service {
|
||||
let timeline_info = create_one(
|
||||
shard_zero_tid,
|
||||
shard_zero_locations,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
create_req.clone(),
|
||||
)
|
||||
.await?;
|
||||
@@ -3519,7 +3524,7 @@ impl Service {
|
||||
// Create timeline on remaining shards with number >0
|
||||
if !targets.0.is_empty() {
|
||||
// If we had multiple shards, issue requests for the remainder now.
|
||||
let jwt = &self.config.jwt_token;
|
||||
let jwt = &self.config.pageserver_jwt_token;
|
||||
self.tenant_for_shards(
|
||||
targets
|
||||
.0
|
||||
@@ -3602,7 +3607,7 @@ impl Service {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
req.clone(),
|
||||
))
|
||||
})
|
||||
@@ -3683,7 +3688,7 @@ impl Service {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
))
|
||||
})
|
||||
.await?;
|
||||
@@ -3757,7 +3762,7 @@ impl Service {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
dir,
|
||||
))
|
||||
})
|
||||
@@ -3872,7 +3877,7 @@ impl Service {
|
||||
futs.push(async move {
|
||||
node.with_client_retries(
|
||||
|client| op(tenant_shard_id, client),
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
timeout,
|
||||
@@ -4121,7 +4126,7 @@ impl Service {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
))
|
||||
})
|
||||
.await?;
|
||||
@@ -4143,7 +4148,7 @@ impl Service {
|
||||
shard_zero_tid,
|
||||
timeline_id,
|
||||
shard_zero_locations.latest.node,
|
||||
self.config.jwt_token.clone(),
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
)
|
||||
.await?;
|
||||
Ok(shard_zero_status)
|
||||
@@ -4542,7 +4547,7 @@ impl Service {
|
||||
|
||||
client.location_config(child_id, config, None, false).await
|
||||
},
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
10,
|
||||
Duration::from_secs(5),
|
||||
@@ -5142,7 +5147,7 @@ impl Service {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.config.jwt_token.as_deref(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
let response = client
|
||||
.tenant_shard_split(
|
||||
@@ -5468,7 +5473,7 @@ impl Service {
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
node.base_url(),
|
||||
self.config.jwt_token.as_deref(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
|
||||
let scan_result = client
|
||||
@@ -7094,7 +7099,7 @@ impl Service {
|
||||
match attached_node
|
||||
.with_client_retries(
|
||||
|client| async move { client.tenant_heatmap_upload(tenant_shard_id).await },
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
3,
|
||||
10,
|
||||
SHORT_RECONCILE_TIMEOUT,
|
||||
@@ -7130,7 +7135,7 @@ impl Service {
|
||||
)
|
||||
.await
|
||||
},
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
3,
|
||||
10,
|
||||
SHORT_RECONCILE_TIMEOUT,
|
||||
@@ -7185,7 +7190,7 @@ impl Service {
|
||||
let request = request_ref.clone();
|
||||
client.top_tenant_shards(request.clone()).await
|
||||
},
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
3,
|
||||
3,
|
||||
Duration::from_secs(5),
|
||||
@@ -7358,7 +7363,7 @@ impl Service {
|
||||
match node
|
||||
.with_client_retries(
|
||||
|client| async move { client.tenant_secondary_status(tenant_shard_id).await },
|
||||
&self.config.jwt_token,
|
||||
&self.config.pageserver_jwt_token,
|
||||
1,
|
||||
3,
|
||||
Duration::from_millis(250),
|
||||
|
||||
Reference in New Issue
Block a user