mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 13:10:38 +00:00
## Problem 1. In the CacheInvalid state loop, we weren't checking the `num_retries`. If this managed to get up to `32`, the retry_after procedure would compute 2^32 which would overflow to 0 and trigger a div by zero 2. When fixing the above, I started working on a flow diagram for the state machine logic and realised it was more complex than it had to be: a. We start in a `Cached` state b. `Cached`: call `connect_once`. After the first connect_once error, we always move to the `CacheInvalid` state, otherwise, we return the connection. c. `CacheInvalid`: we attempt to `wake_compute` and we either switch to Cached or we retry this step (or we error). d. `Cached`: call `connect_once`. We either retry this step or we have a connection (or we error) - After num_retries > 1 we never switch back to `CacheInvalid`. ## Summary of changes 1. Insert a `num_retries` check in the `handle_try_wake` procedure. Also using floats in the retry_after procedure to prevent the overflow entirely 2. Refactor connect_to_compute to be more linear in design.
74 lines
2.6 KiB
Rust
74 lines
2.6 KiB
Rust
use std::ops::ControlFlow;
|
|
|
|
use super::AuthSuccess;
|
|
use crate::{
|
|
auth::{self, AuthFlow, ClientCredentials},
|
|
compute,
|
|
console::{self, AuthInfo, CachedNodeInfo, ConsoleReqExtra},
|
|
proxy::handle_try_wake,
|
|
sasl, scram,
|
|
stream::PqStream,
|
|
};
|
|
use tokio::io::{AsyncRead, AsyncWrite};
|
|
use tracing::info;
|
|
|
|
pub(super) async fn authenticate(
|
|
api: &impl console::Api,
|
|
extra: &ConsoleReqExtra<'_>,
|
|
creds: &ClientCredentials<'_>,
|
|
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
|
) -> auth::Result<AuthSuccess<CachedNodeInfo>> {
|
|
info!("fetching user's authentication info");
|
|
let info = api.get_auth_info(extra, creds).await?.unwrap_or_else(|| {
|
|
// If we don't have an authentication secret, we mock one to
|
|
// prevent malicious probing (possible due to missing protocol steps).
|
|
// This mocked secret will never lead to successful authentication.
|
|
info!("authentication info not found, mocking it");
|
|
AuthInfo::Scram(scram::ServerSecret::mock(creds.user, rand::random()))
|
|
});
|
|
|
|
let flow = AuthFlow::new(client);
|
|
let scram_keys = match info {
|
|
AuthInfo::Md5(_) => {
|
|
info!("auth endpoint chooses MD5");
|
|
return Err(auth::AuthError::bad_auth_method("MD5"));
|
|
}
|
|
AuthInfo::Scram(secret) => {
|
|
info!("auth endpoint chooses SCRAM");
|
|
let scram = auth::Scram(&secret);
|
|
let client_key = match flow.begin(scram).await?.authenticate().await? {
|
|
sasl::Outcome::Success(key) => key,
|
|
sasl::Outcome::Failure(reason) => {
|
|
info!("auth backend failed with an error: {reason}");
|
|
return Err(auth::AuthError::auth_failed(creds.user));
|
|
}
|
|
};
|
|
|
|
Some(compute::ScramKeys {
|
|
client_key: client_key.as_bytes(),
|
|
server_key: secret.server_key.as_bytes(),
|
|
})
|
|
}
|
|
};
|
|
|
|
info!("compute node's state has likely changed; requesting a wake-up");
|
|
let mut num_retries = 0;
|
|
let mut node = loop {
|
|
let wake_res = api.wake_compute(extra, creds).await;
|
|
match handle_try_wake(wake_res, num_retries)? {
|
|
ControlFlow::Continue(_) => num_retries += 1,
|
|
ControlFlow::Break(n) => break n,
|
|
}
|
|
info!(num_retries, "retrying wake compute");
|
|
};
|
|
if let Some(keys) = scram_keys {
|
|
use tokio_postgres::config::AuthKeys;
|
|
node.config.auth_keys(AuthKeys::ScramSha256(keys));
|
|
}
|
|
|
|
Ok(AuthSuccess {
|
|
reported_auth_ok: false,
|
|
value: node,
|
|
})
|
|
}
|