From ea118a238a31187248b8ca44bb77849e63bd3fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 8 Nov 2023 17:56:53 +0100 Subject: [PATCH] JWT logging improvements (#5823) * lower level on auth success from info to debug (fixes #5820) * don't log stacktraces on auth errors (as requested on slack). we do this by introducing an `AuthError` type instead of using `anyhow` and `bail`. * return errors that have been censored for improved security. --- libs/postgres_backend/src/lib.rs | 12 ++++++-- libs/utils/src/auth.rs | 52 ++++++++++++++++++++------------ libs/utils/src/http/endpoint.rs | 17 ++++++----- libs/utils/src/http/error.rs | 5 ++- pageserver/src/auth.rs | 19 ++++++------ pageserver/src/http/routes.rs | 2 ++ pageserver/src/page_service.rs | 17 ++++++----- safekeeper/src/auth.rs | 17 ++++++----- safekeeper/src/handler.rs | 27 +++++++++-------- test_runner/regress/test_auth.py | 12 ++++---- 10 files changed, 105 insertions(+), 75 deletions(-) diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 1e25c6ed6a..92a0ec7c73 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -17,7 +17,7 @@ use std::{fmt, io}; use std::{future::Future, str::FromStr}; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_rustls::TlsAcceptor; -use tracing::{debug, error, info, trace}; +use tracing::{debug, error, info, trace, warn}; use pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter}; use pq_proto::{ @@ -35,6 +35,9 @@ pub enum QueryError { /// We were instructed to shutdown while processing the query #[error("Shutting down")] Shutdown, + /// Authentication failure + #[error("Unauthorized: {0}")] + Unauthorized(std::borrow::Cow<'static, str>), /// Some other error #[error(transparent)] Other(#[from] anyhow::Error), @@ -51,6 +54,7 @@ impl QueryError { match self { Self::Disconnected(_) => b"08006", // connection failure Self::Shutdown => SQLSTATE_ADMIN_SHUTDOWN, + Self::Unauthorized(_) => SQLSTATE_INTERNAL_ERROR, Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error } } @@ -610,7 +614,7 @@ impl PostgresBackend { if let Err(e) = handler.check_auth_jwt(self, jwt_response) { self.write_message_noflush(&BeMessage::ErrorResponse( - &e.to_string(), + &short_error(&e), Some(e.pg_error_code()), ))?; return Err(e); @@ -966,6 +970,7 @@ pub fn short_error(e: &QueryError) -> String { match e { QueryError::Disconnected(connection_error) => connection_error.to_string(), QueryError::Shutdown => "shutdown".to_string(), + QueryError::Unauthorized(_e) => "JWT authentication error".to_string(), QueryError::Other(e) => format!("{e:#}"), } } @@ -985,6 +990,9 @@ fn log_query_error(query: &str, e: &QueryError) { QueryError::Shutdown => { info!("query handler for '{query}' cancelled during tenant shutdown") } + QueryError::Unauthorized(e) => { + warn!("query handler for '{query}' failed with authentication error: {e}"); + } QueryError::Other(e) => { error!("query handler for '{query}' failed: {e:?}"); } diff --git a/libs/utils/src/auth.rs b/libs/utils/src/auth.rs index 6a26f17115..66b1f6e866 100644 --- a/libs/utils/src/auth.rs +++ b/libs/utils/src/auth.rs @@ -2,7 +2,7 @@ use arc_swap::ArcSwap; use serde; -use std::{fs, sync::Arc}; +use std::{borrow::Cow, fmt::Display, fs, sync::Arc}; use anyhow::Result; use camino::Utf8Path; @@ -11,7 +11,7 @@ use jsonwebtoken::{ }; use serde::{Deserialize, Serialize}; -use crate::id::TenantId; +use crate::{http::error::ApiError, id::TenantId}; /// Algorithm to use. We require EdDSA. const STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA; @@ -54,7 +54,7 @@ impl SwappableJwtAuth { pub fn swap(&self, jwt_auth: JwtAuth) { self.0.swap(Arc::new(jwt_auth)); } - pub fn decode(&self, token: &str) -> Result> { + pub fn decode(&self, token: &str) -> std::result::Result, AuthError> { self.0.load().decode(token) } } @@ -65,6 +65,24 @@ impl std::fmt::Debug for SwappableJwtAuth { } } +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub struct AuthError(pub Cow<'static, str>); + +impl Display for AuthError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for ApiError { + fn from(_value: AuthError) -> Self { + // Don't pass on the value of the AuthError as a precautionary measure. + // Being intentionally vague in public error communication hurts debugability + // but it is more secure. + ApiError::Forbidden("JWT authentication error".to_string()) + } +} + pub struct JwtAuth { decoding_keys: Vec, validation: Validation, @@ -114,7 +132,7 @@ impl JwtAuth { /// The function tries the stored decoding keys in succession, /// and returns the first yielding a successful result. /// If there is no working decoding key, it returns the last error. - pub fn decode(&self, token: &str) -> Result> { + pub fn decode(&self, token: &str) -> std::result::Result, AuthError> { let mut res = None; for decoding_key in &self.decoding_keys { res = Some(decode(token, decoding_key, &self.validation)); @@ -123,9 +141,9 @@ impl JwtAuth { } } if let Some(res) = res { - res.map_err(anyhow::Error::new) + res.map_err(|e| AuthError(Cow::Owned(e.to_string()))) } else { - anyhow::bail!("no JWT decoding keys configured") + Err(AuthError(Cow::Borrowed("no JWT decoding keys configured"))) } } } @@ -166,9 +184,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH "#; #[test] - fn test_decode() -> Result<(), anyhow::Error> { + fn test_decode() { let expected_claims = Claims { - tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?), + tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()), scope: Scope::Tenant, }; @@ -187,28 +205,24 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH let encoded_eddsa = "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6InRlbmFudCIsInRlbmFudF9pZCI6IjNkMWY3NTk1YjQ2ODIzMDMwNGUwYjczY2VjYmNiMDgxIiwiaXNzIjoibmVvbi5jb250cm9scGxhbmUiLCJleHAiOjE3MDkyMDA4NzksImlhdCI6MTY3ODQ0MjQ3OX0.U3eA8j-uU-JnhzeO3EDHRuXLwkAUFCPxtGHEgw6p7Ccc3YRbFs2tmCdbD9PZEXP-XsxSeBQi1FY0YPcT3NXADw"; // Check it can be validated with the public key - let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]); - let claims_from_token = auth.decode(encoded_eddsa)?.claims; + let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]); + let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims; assert_eq!(claims_from_token, expected_claims); - - Ok(()) } #[test] - fn test_encode() -> Result<(), anyhow::Error> { + fn test_encode() { let claims = Claims { - tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?), + tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()), scope: Scope::Tenant, }; - let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519)?; + let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap(); // decode it back - let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519)?]); - let decoded = auth.decode(&encoded)?; + let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]); + let decoded = auth.decode(&encoded).unwrap(); assert_eq!(decoded.claims, claims); - - Ok(()) } } diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index a8a635b6fd..550ab10700 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -1,4 +1,4 @@ -use crate::auth::{Claims, SwappableJwtAuth}; +use crate::auth::{AuthError, Claims, SwappableJwtAuth}; use crate::http::error::{api_error_handler, route_error_handler, ApiError}; use anyhow::Context; use hyper::header::{HeaderName, AUTHORIZATION}; @@ -400,9 +400,11 @@ pub fn auth_middleware( })?; let token = parse_token(header_value)?; - let data = auth - .decode(token) - .map_err(|_| ApiError::Unauthorized("malformed jwt token".to_string()))?; + let data = auth.decode(token).map_err(|err| { + warn!("Authentication error: {err}"); + // Rely on From for ApiError impl + err + })?; req.set_context(data.claims); } None => { @@ -450,12 +452,11 @@ where pub fn check_permission_with( req: &Request, - check_permission: impl Fn(&Claims) -> Result<(), anyhow::Error>, + check_permission: impl Fn(&Claims) -> Result<(), AuthError>, ) -> Result<(), ApiError> { match req.context::() { - Some(claims) => { - Ok(check_permission(&claims).map_err(|err| ApiError::Forbidden(err.to_string()))?) - } + Some(claims) => Ok(check_permission(&claims) + .map_err(|_err| ApiError::Forbidden("JWT authentication error".to_string()))?), None => Ok(()), // claims is None because auth is disabled } } diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs index 7233d3a662..ac68b04888 100644 --- a/libs/utils/src/http/error.rs +++ b/libs/utils/src/http/error.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::error::Error as StdError; use thiserror::Error; -use tracing::{error, info}; +use tracing::{error, info, warn}; #[derive(Debug, Error)] pub enum ApiError { @@ -118,6 +118,9 @@ pub fn api_error_handler(api_error: ApiError) -> Response { // Print a stack trace for Internal Server errors match api_error { + ApiError::Forbidden(_) | ApiError::Unauthorized(_) => { + warn!("Error processing HTTP request: {api_error:#}") + } ApiError::ResourceUnavailable(_) => info!("Error processing HTTP request: {api_error:#}"), ApiError::NotFound(_) => info!("Error processing HTTP request: {api_error:#}"), ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"), diff --git a/pageserver/src/auth.rs b/pageserver/src/auth.rs index 268117cae2..2cb661863d 100644 --- a/pageserver/src/auth.rs +++ b/pageserver/src/auth.rs @@ -1,22 +1,21 @@ -use anyhow::{bail, Result}; -use utils::auth::{Claims, Scope}; +use utils::auth::{AuthError, Claims, Scope}; use utils::id::TenantId; -pub fn check_permission(claims: &Claims, tenant_id: Option) -> Result<()> { +pub fn check_permission(claims: &Claims, tenant_id: Option) -> Result<(), AuthError> { match (&claims.scope, tenant_id) { - (Scope::Tenant, None) => { - bail!("Attempt to access management api with tenant scope. Permission denied") - } + (Scope::Tenant, None) => Err(AuthError( + "Attempt to access management api with tenant scope. Permission denied".into(), + )), (Scope::Tenant, Some(tenant_id)) => { if claims.tenant_id.unwrap() != tenant_id { - bail!("Tenant id mismatch. Permission denied") + return Err(AuthError("Tenant id mismatch. Permission denied".into())); } Ok(()) } (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope - (Scope::SafekeeperData, _) => { - bail!("SafekeeperData scope makes no sense for Pageserver") - } + (Scope::SafekeeperData, _) => Err(AuthError( + "SafekeeperData scope makes no sense for Pageserver".into(), + )), } } diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index b215b3016e..7a8f37f923 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1671,6 +1671,8 @@ where ); match handle.await { + // TODO: never actually return Err from here, always Ok(...) so that we can log + // spanned errors. Call api_error_handler instead and return appropriate Body. Ok(result) => result, Err(e) => { // The handler task panicked. We have a global panic handler that logs the diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 5193b3c5ff..5487a9d7c1 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -898,7 +898,7 @@ impl PageServerHandler { // when accessing management api supply None as an argument // when using to authorize tenant pass corresponding tenant id - fn check_permission(&self, tenant_id: Option) -> anyhow::Result<()> { + fn check_permission(&self, tenant_id: Option) -> Result<(), QueryError> { if self.auth.is_none() { // auth is set to Trust, nothing to check so just return ok return Ok(()); @@ -910,7 +910,7 @@ impl PageServerHandler { .claims .as_ref() .expect("claims presence already checked"); - check_permission(claims, tenant_id) + check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0)) } /// Shorthand for getting a reference to a Timeline of an Active tenant. @@ -949,16 +949,17 @@ where .auth .as_ref() .unwrap() - .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?; + .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?) + .map_err(|e| QueryError::Unauthorized(e.0))?; if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() { - return Err(QueryError::Other(anyhow::anyhow!( - "jwt token scope is Tenant, but tenant id is missing" - ))); + return Err(QueryError::Unauthorized( + "jwt token scope is Tenant, but tenant id is missing".into(), + )); } - info!( - "jwt auth succeeded for scope: {:#?} by tenant id: {:?}", + debug!( + "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}", data.claims.scope, data.claims.tenant_id, ); diff --git a/safekeeper/src/auth.rs b/safekeeper/src/auth.rs index 2684d82365..bf4905aaa7 100644 --- a/safekeeper/src/auth.rs +++ b/safekeeper/src/auth.rs @@ -1,19 +1,20 @@ -use anyhow::{bail, Result}; -use utils::auth::{Claims, Scope}; +use utils::auth::{AuthError, Claims, Scope}; use utils::id::TenantId; -pub fn check_permission(claims: &Claims, tenant_id: Option) -> Result<()> { +pub fn check_permission(claims: &Claims, tenant_id: Option) -> Result<(), AuthError> { match (&claims.scope, tenant_id) { - (Scope::Tenant, None) => { - bail!("Attempt to access management api with tenant scope. Permission denied") - } + (Scope::Tenant, None) => Err(AuthError( + "Attempt to access management api with tenant scope. Permission denied".into(), + )), (Scope::Tenant, Some(tenant_id)) => { if claims.tenant_id.unwrap() != tenant_id { - bail!("Tenant id mismatch. Permission denied") + return Err(AuthError("Tenant id mismatch. Permission denied".into())); } Ok(()) } - (Scope::PageServerApi, _) => bail!("PageServerApi scope makes no sense for Safekeeper"), + (Scope::PageServerApi, _) => Err(AuthError( + "PageServerApi scope makes no sense for Safekeeper".into(), + )), (Scope::SafekeeperData, _) => Ok(()), } } diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index 6d0ed8650d..d5333abae6 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -6,7 +6,7 @@ use std::str::FromStr; use std::str::{self}; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::{info, info_span, Instrument}; +use tracing::{debug, info, info_span, Instrument}; use crate::auth::check_permission; use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage}; @@ -165,26 +165,27 @@ impl postgres_backend::Handler .auth .as_ref() .expect("auth_type is configured but .auth of handler is missing"); - let data = - auth.decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?; + let data = auth + .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?) + .map_err(|e| QueryError::Unauthorized(e.0))?; // The handler might be configured to allow only tenant scope tokens. if matches!(allowed_auth_scope, Scope::Tenant) && !matches!(data.claims.scope, Scope::Tenant) { - return Err(QueryError::Other(anyhow::anyhow!( - "passed JWT token is for full access, but only tenant scope is allowed" - ))); + return Err(QueryError::Unauthorized( + "passed JWT token is for full access, but only tenant scope is allowed".into(), + )); } if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() { - return Err(QueryError::Other(anyhow::anyhow!( - "jwt token scope is Tenant, but tenant id is missing" - ))); + return Err(QueryError::Unauthorized( + "jwt token scope is Tenant, but tenant id is missing".into(), + )); } - info!( - "jwt auth succeeded for scope: {:#?} by tenant id: {:?}", + debug!( + "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}", data.claims.scope, data.claims.tenant_id, ); @@ -263,7 +264,7 @@ impl SafekeeperPostgresHandler { // when accessing management api supply None as an argument // when using to authorize tenant pass corresponding tenant id - fn check_permission(&self, tenant_id: Option) -> anyhow::Result<()> { + fn check_permission(&self, tenant_id: Option) -> Result<(), QueryError> { if self.auth.is_none() { // auth is set to Trust, nothing to check so just return ok return Ok(()); @@ -275,7 +276,7 @@ impl SafekeeperPostgresHandler { .claims .as_ref() .expect("claims presence already checked"); - check_permission(claims, tenant_id) + check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0)) } async fn handle_timeline_status( diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py index f785dc0c8e..f729bdee98 100644 --- a/test_runner/regress/test_auth.py +++ b/test_runner/regress/test_auth.py @@ -25,7 +25,7 @@ def assert_client_authorized(env: NeonEnv, http_client: PageserverHttpClient): def assert_client_not_authorized(env: NeonEnv, http_client: PageserverHttpClient): with pytest.raises( PageserverApiException, - match="Unauthorized: malformed jwt token", + match="Forbidden: JWT authentication error", ): assert_client_authorized(env, http_client) @@ -56,9 +56,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): assert_client_authorized(env, pageserver_http_client) # fail to create branch using token with different tenant_id - with pytest.raises( - PageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied" - ): + with pytest.raises(PageserverApiException, match="Forbidden: JWT authentication error"): assert_client_authorized(env, invalid_tenant_http_client) # create tenant using management token @@ -67,7 +65,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): # fail to create tenant using tenant token with pytest.raises( PageserverApiException, - match="Forbidden: Attempt to access management api with tenant scope. Permission denied", + match="Forbidden: JWT authentication error", ): tenant_http_client.tenant_create(TenantId.generate()) @@ -94,6 +92,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder): def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder): neon_env_builder.auth_enabled = True env = neon_env_builder.init_start() + env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*") env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*") pageserver_token_old = env.auth_keys.generate_pageserver_token() @@ -146,6 +145,7 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder): def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder): neon_env_builder.auth_enabled = True env = neon_env_builder.init_start() + env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*") env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*") pageserver_token_old = env.auth_keys.generate_pageserver_token() @@ -162,7 +162,7 @@ def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder): # Next attempt fails as we use the old auth token with pytest.raises( PageserverApiException, - match="Unauthorized: malformed jwt token", + match="Forbidden: JWT authentication error", ): pageserver_http_client_old.reload_auth_validation_keys()