[proxy] Classify "database not found" errors as user errors (#12603)

## Problem

If a user provides a wrong database name in the connection string, it
should be logged as a user error, not postgres error.

I found 4 different places where we log such errors:
1. `proxy/src/stream.rs:193`, e.g.:
```
{"timestamp":"2025-07-15T11:33:35.660026Z","level":"INFO","message":"forwarding error to user","fields":{"kind":"postgres","msg":"database \"[redacted]\" does not exist"},"spans":{"connect_request#9":{"protocol":"tcp","session_id":"ce1f2c90-dfb5-44f7-b9e9-8b8535e8b9b8","conn_info":"[redacted]","ep":"[redacted]","role":"[redacted]"}},"thread_id":22,"task_id":"370407867","target":"proxy::stream","src":"proxy/src/stream.rs:193","extract":{"ep":"[redacted]","session_id":"ce1f2c90-dfb5-44f7-b9e9-8b8535e8b9b8"}}
```
2. `proxy/src/pglb/mod.rs:137`, e.g.:
```
{"timestamp":"2025-07-15T11:37:44.340497Z","level":"WARN","message":"per-client task finished with an error: Couldn't connect to compute node: db error: FATAL: database \"[redacted]\" does not exist","spans":{"connect_request#8":{"protocol":"tcp","session_id":"763baaac-d039-4f4d-9446-c149e32660eb","conn_info":"[redacted]","ep":"[redacted]","role":"[redacted]"}},"thread_id":14,"task_id":"866658139","target":"proxy::pglb","src":"proxy/src/pglb/mod.rs:137","extract":{"ep":"[redacted]","session_id":"763baaac-d039-4f4d-9446-c149e32660eb"}}
```
3. `proxy/src/serverless/mod.rs:451`, e.g. (note that the error is
repeated 4 times — retries?):
```
{"timestamp":"2025-07-15T11:37:54.515891Z","level":"WARN","message":"error in websocket connection: Couldn't connect to compute node: db error: FATAL: database \"[redacted]\" does not exist: Couldn't connect to compute node: db error: FATAL: database \"[redacted]\" does not exist: db error: FATAL: database \"[redacted]\" does not exist: FATAL: database \"[redacted]\" does not exist","spans":{"http_conn#8":{"conn_id":"ec7780db-a145-4f0e-90df-0ba35f41b828"},"connect_request#9":{"protocol":"ws","session_id":"1eaaeeec-b671-4153-b1f4-247839e4b1c7","conn_info":"[redacted]","ep":"[redacted]","role":"[redacted]"}},"thread_id":10,"task_id":"366331699","target":"proxy::serverless","src":"proxy/src/serverless/mod.rs:451","extract":{"conn_id":"ec7780db-a145-4f0e-90df-0ba35f41b828","ep":"[redacted]","session_id":"1eaaeeec-b671-4153-b1f4-247839e4b1c7"}}
```
4. `proxy/src/serverless/sql_over_http.rs:219`, e.g.
```
{"timestamp":"2025-07-15T10:32:34.866603Z","level":"INFO","message":"forwarding error to user","fields":{"kind":"postgres","error":"could not connect to postgres in compute","msg":"database \"[redacted]\" does not exist"},"spans":{"http_conn#19":{"conn_id":"7da08203-5dab-45e8-809f-503c9019ec6b"},"connect_request#5":{"protocol":"http","session_id":"68387f1c-cbc8-45b3-a7db-8bb1c55ca809","conn_info":"[redacted]","ep":"[redacted]","role":"[redacted]"}},"thread_id":17,"task_id":"16432250","target":"proxy::serverless::sql_over_http","src":"proxy/src/serverless/sql_over_http.rs:219","extract":{"conn_id":"7da08203-5dab-45e8-809f-503c9019ec6b","ep":"[redacted]","session_id":"68387f1c-cbc8-45b3-a7db-8bb1c55ca809"}}
```

This PR directly addresses 1 and 4. I _think_ it _should_ also help with
2 and 3, although in those places we don't seem to log `kind`, so I'm
not quite sure. I'm also confused why in 3 the error is repeated
multiple times.

## Summary of changes

Resolves https://github.com/neondatabase/neon/issues/9440
This commit is contained in:
Krzysztof Szafrański
2025-07-29 17:25:22 +02:00
committed by GitHub
parent 62d844e657
commit 0ffdc98e20
4 changed files with 37 additions and 40 deletions

View File

@@ -9,7 +9,7 @@ use postgres_protocol2::message::backend::{ErrorFields, ErrorResponseBody};
pub use self::sqlstate::*;
#[allow(clippy::unreadable_literal)]
mod sqlstate;
pub mod sqlstate;
/// The severity of a Postgres error or notice.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]

View File

@@ -8,6 +8,7 @@ use futures::{FutureExt, TryFutureExt};
use itertools::Itertools;
use postgres_client::config::{AuthKeys, ChannelBinding, SslMode};
use postgres_client::connect_raw::StartupStream;
use postgres_client::error::SqlState;
use postgres_client::maybe_tls_stream::MaybeTlsStream;
use postgres_client::tls::MakeTlsConnect;
use thiserror::Error;
@@ -22,7 +23,7 @@ use crate::context::RequestContext;
use crate::control_plane::client::ApiLockError;
use crate::control_plane::errors::WakeComputeError;
use crate::control_plane::messages::MetricsAuxInfo;
use crate::error::{ReportableError, UserFacingError};
use crate::error::{ErrorKind, ReportableError, UserFacingError};
use crate::metrics::{Metrics, NumDbConnectionsGuard};
use crate::pqproto::StartupMessageParams;
use crate::proxy::connect_compute::TlsNegotiation;
@@ -65,12 +66,13 @@ impl UserFacingError for PostgresError {
}
impl ReportableError for PostgresError {
fn get_error_kind(&self) -> crate::error::ErrorKind {
fn get_error_kind(&self) -> ErrorKind {
match self {
PostgresError::Postgres(e) if e.as_db_error().is_some() => {
crate::error::ErrorKind::Postgres
}
PostgresError::Postgres(_) => crate::error::ErrorKind::Compute,
PostgresError::Postgres(err) => match err.as_db_error() {
Some(err) if err.code() == &SqlState::INVALID_CATALOG_NAME => ErrorKind::User,
Some(_) => ErrorKind::Postgres,
None => ErrorKind::Compute,
},
}
}
}
@@ -110,9 +112,9 @@ impl UserFacingError for ConnectionError {
}
impl ReportableError for ConnectionError {
fn get_error_kind(&self) -> crate::error::ErrorKind {
fn get_error_kind(&self) -> ErrorKind {
match self {
ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute,
ConnectionError::TlsError(_) => ErrorKind::Compute,
ConnectionError::WakeComputeError(e) => e.get_error_kind(),
ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(),
#[cfg(test)]

View File

@@ -4,6 +4,7 @@ use std::time::Duration;
use ed25519_dalek::SigningKey;
use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
use jose_jwk::jose_b64;
use postgres_client::error::SqlState;
use postgres_client::maybe_tls_stream::MaybeTlsStream;
use rand_core::OsRng;
use tracing::field::display;
@@ -459,15 +460,14 @@ impl ReportableError for HttpConnError {
match self {
HttpConnError::ConnectError(_) => ErrorKind::Compute,
HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
HttpConnError::PostgresConnectionError(p) => {
if p.as_db_error().is_some() {
// postgres rejected the connection
ErrorKind::Postgres
} else {
// couldn't even reach postgres
ErrorKind::Compute
}
}
HttpConnError::PostgresConnectionError(p) => match p.as_db_error() {
// user provided a wrong database name
Some(err) if err.code() == &SqlState::INVALID_CATALOG_NAME => ErrorKind::User,
// postgres rejected the connection
Some(_) => ErrorKind::Postgres,
// couldn't even reach postgres
None => ErrorKind::Compute,
},
HttpConnError::LocalProxyConnectionError(_) => ErrorKind::Compute,
HttpConnError::ComputeCtl(_) => ErrorKind::Service,
HttpConnError::JwtPayloadError(_) => ErrorKind::User,

View File

@@ -192,34 +192,29 @@ pub(crate) async fn handle(
let line = get(db_error, |db| db.line().map(|l| l.to_string()));
let routine = get(db_error, |db| db.routine());
match &e {
SqlOverHttpError::Postgres(e)
if e.as_db_error().is_some() && error_kind == ErrorKind::User =>
{
// this error contains too much info, and it's not an error we care about.
if tracing::enabled!(Level::DEBUG) {
tracing::debug!(
kind=error_kind.to_metric_label(),
error=%e,
msg=message,
"forwarding error to user"
);
} else {
tracing::info!(
kind = error_kind.to_metric_label(),
error = "bad query",
"forwarding error to user"
);
}
}
_ => {
tracing::info!(
if db_error.is_some() && error_kind == ErrorKind::User {
// this error contains too much info, and it's not an error we care about.
if tracing::enabled!(Level::DEBUG) {
debug!(
kind=error_kind.to_metric_label(),
error=%e,
msg=message,
"forwarding error to user"
);
} else {
info!(
kind = error_kind.to_metric_label(),
error = "bad query",
"forwarding error to user"
);
}
} else {
info!(
kind=error_kind.to_metric_label(),
error=%e,
msg=message,
"forwarding error to user"
);
}
json_response(