From fa0750a37e01cee2e909d91be9b556ee2f128406 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 20 Aug 2024 15:25:21 +0100 Subject: [PATCH] storcon: add peer jwt token (#8764) ## Problem Storage controllers did not have the right token to speak to their peers for leadership transitions. ## Summary of changes Accept a peer jwt token for the storage controller. Epic: https://github.com/neondatabase/cloud/issues/14701 --- control_plane/src/storage_controller.rs | 5 +++++ storage_controller/src/leadership.rs | 3 +-- storage_controller/src/main.rs | 20 +++++++++++-------- storage_controller/src/service.rs | 3 +++ .../regress/test_storage_controller.py | 2 ++ 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index f6539ad5b0..27d8e2de0c 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -453,6 +453,11 @@ impl StorageController { let jwt_token = encode_from_key_file(&claims, private_key).expect("failed to generate jwt token"); args.push(format!("--jwt-token={jwt_token}")); + + let peer_claims = Claims::new(None, Scope::Admin); + let peer_jwt_token = encode_from_key_file(&peer_claims, private_key) + .expect("failed to generate jwt token"); + args.push(format!("--peer-jwt-token={peer_jwt_token}")); } if let Some(public_key) = &self.public_key { diff --git a/storage_controller/src/leadership.rs b/storage_controller/src/leadership.rs index a171bab451..5fae8991ec 100644 --- a/storage_controller/src/leadership.rs +++ b/storage_controller/src/leadership.rs @@ -110,10 +110,9 @@ impl Leadership { ) -> Option { tracing::info!("Sending step down request to {leader:?}"); - // TODO: jwt token let client = PeerClient::new( Uri::try_from(leader.address.as_str()).expect("Failed to build leader URI"), - self.config.jwt_token.clone(), + self.config.peer_jwt_token.clone(), ); let state = client.step_down(&self.cancel).await; match state { diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 17685b1140..e3f29b84e7 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -47,6 +47,9 @@ struct Cli { #[arg(long)] control_plane_jwt_token: Option, + #[arg(long)] + peer_jwt_token: Option, + /// URL to control plane compute notification endpoint #[arg(long)] compute_hook_url: Option, @@ -126,28 +129,28 @@ struct Secrets { public_key: Option, jwt_token: Option, control_plane_jwt_token: Option, + peer_jwt_token: Option, } impl Secrets { const DATABASE_URL_ENV: &'static str = "DATABASE_URL"; const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN"; const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN"; + const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN"; const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY"; /// Load secrets from, in order of preference: /// - CLI args if database URL is provided on the CLI /// - Environment variables if DATABASE_URL is set. - /// - AWS Secrets Manager secrets async fn load(args: &Cli) -> anyhow::Result { - let Some(database_url) = - Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV).await + let Some(database_url) = Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV) else { anyhow::bail!( "Database URL is not set (set `--database-url`, or `DATABASE_URL` environment)" ) }; - let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV).await { + let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV) { Some(v) => Some(JwtAuth::from_key(v).context("Loading public key")?), None => None, }; @@ -155,18 +158,18 @@ impl Secrets { let this = Self { database_url, public_key, - jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV).await, + jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV), control_plane_jwt_token: Self::load_secret( &args.control_plane_jwt_token, Self::CONTROL_PLANE_JWT_TOKEN_ENV, - ) - .await, + ), + peer_jwt_token: Self::load_secret(&args.peer_jwt_token, Self::PEER_JWT_TOKEN_ENV), }; Ok(this) } - async fn load_secret(cli: &Option, env_name: &str) -> Option { + fn load_secret(cli: &Option, env_name: &str) -> Option { if let Some(v) = cli { Some(v.clone()) } else if let Ok(v) = std::env::var(env_name) { @@ -266,6 +269,7 @@ async fn async_main() -> anyhow::Result<()> { let config = Config { jwt_token: secrets.jwt_token, control_plane_jwt_token: secrets.control_plane_jwt_token, + peer_jwt_token: secrets.peer_jwt_token, compute_hook_url: args.compute_hook_url, max_offline_interval: args .max_offline_interval diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 780f4a7ee5..453e96bad3 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -288,6 +288,9 @@ pub struct Config { // This JWT token will be used to authenticate this service to the control plane. pub control_plane_jwt_token: Option, + // This JWT token will be used to authenticate with other storage controller instances + pub peer_jwt_token: Option, + /// Where the compute hook should send notifications of pageserver attachment locations /// (this URL points to the control plane in prod). If this is None, the compute hook will /// assume it is running in a test environment and try to update neon_local. diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 95c35e9641..94d71a7677 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -2144,6 +2144,8 @@ def test_storage_controller_leadership_transfer( port_distributor: PortDistributor, step_down_times_out: bool, ): + neon_env_builder.auth_enabled = True + neon_env_builder.num_pageservers = 3 neon_env_builder.storage_controller_config = {