From a32e8871acc1922f8bfd8057c08a97e504b1dacc Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 14 Feb 2025 21:11:42 +0100 Subject: [PATCH] compute/pageserver: correlation of logs through backend PID (via `application_name`) (#10810) This PR makes compute set the `application_name` field to the PG backend process PID which is also included in each compute log line. This allows correlation of Pageserver connection logs with compute logs in a way that was guesswork before this PR. In future, we can switch for a more unique identifier for a page_service session. Refs - discussion in https://neondb.slack.com/archives/C08DE6Q9C3B/p1739465208296169?thread_ts=1739462628.361019&cid=C08DE6Q9C3B - fixes https://github.com/neondatabase/neon/issues/10808 --- pageserver/src/page_service.rs | 11 +++++++++-- pgxn/neon/libpagestore.c | 31 ++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index e9d87dec71..53a6a7124d 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -237,7 +237,7 @@ pub async fn libpq_listener_main( type ConnectionHandlerResult = anyhow::Result<()>; -#[instrument(skip_all, fields(peer_addr))] +#[instrument(skip_all, fields(peer_addr, application_name))] #[allow(clippy::too_many_arguments)] async fn page_service_conn_main( conf: &'static PageServerConf, @@ -2463,9 +2463,16 @@ where fn startup( &mut self, _pgb: &mut PostgresBackend, - _sm: &FeStartupPacket, + sm: &FeStartupPacket, ) -> Result<(), QueryError> { fail::fail_point!("ps::connection-start::startup-packet"); + + if let FeStartupPacket::StartupMessage { params, .. } = sm { + if let Some(app_name) = params.get("application_name") { + Span::current().record("application_name", field::display(app_name)); + } + }; + Ok(()) } diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 22aeb2e2d6..fc1aecd340 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -378,8 +378,9 @@ pageserver_connect(shardno_t shard_no, int elevel) { case PS_Disconnected: { - const char *keywords[3]; - const char *values[3]; + const char *keywords[4]; + const char *values[4]; + char pid_str[16]; int n_pgsql_params; TimestampTz now; int64 us_since_last_attempt; @@ -424,14 +425,30 @@ pageserver_connect(shardno_t shard_no, int elevel) * can override the password from the env variable. Seems useful, although * we don't currently use that capability anywhere. */ - keywords[0] = "dbname"; - values[0] = connstr; - n_pgsql_params = 1; + n_pgsql_params = 0; + + /* + * Pageserver logs include this in the connection's tracing span. + * This allows for reasier log correlation between compute and pageserver. + */ + keywords[n_pgsql_params] = "application_name"; + { + int ret = snprintf(pid_str, sizeof(pid_str), "%d", MyProcPid); + if (ret < 0 || ret >= (int)(sizeof(pid_str))) + elog(FATAL, "stack-allocated buffer too small to hold pid"); + } + /* lifetime: PQconnectStartParams strdups internally */ + values[n_pgsql_params] = (const char*) pid_str; + n_pgsql_params++; + + keywords[n_pgsql_params] = "dbname"; + values[n_pgsql_params] = connstr; + n_pgsql_params++; if (neon_auth_token) { - keywords[1] = "password"; - values[1] = neon_auth_token; + keywords[n_pgsql_params] = "password"; + values[n_pgsql_params] = neon_auth_token; n_pgsql_params++; }