From 6ee84d985a9cccc128aed7aceccde2a210c3110a Mon Sep 17 00:00:00 2001
From: Christian Schwarz <christian@neon.tech>
Date: Fri, 4 Apr 2025 17:13:54 +0200
Subject: [PATCH] impr(perf tracing): ability to correlate with page_service
 logs (#11398)

# Problem

Current perf tracing fields do not allow answering the question what a
specific Postgres backend was waiting for.

# Background

For Pageserver logs, we set the backend PID as the libpq
`application_name` on the compute side, and funnel that into the a
tracing field for the spans that emit to the global tracing subscriber.

# Solution

Funnel `application_name`, and the other fields that we use in the
logging spans, into the root span for perf tracing.

# Refs

- fixes https://github.com/neondatabase/neon/issues/11393
- stacked atop https://github.com/neondatabase/neon/pull/11433
- epic: https://github.com/neondatabase/neon/issues/9873
---
 pageserver/src/page_service.rs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index f9bf45bb71..7e3991dbdc 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -247,6 +247,15 @@ pub async fn libpq_listener_main(
 
 type ConnectionHandlerResult = anyhow::Result<()>;
 
+/// Perf root spans start at the per-request level, after shard routing.
+/// This struct carries connection-level information to the root perf span definition.
+#[derive(Clone)]
+struct ConnectionPerfSpanFields {
+    peer_addr: String,
+    application_name: Option<String>,
+    compute_mode: Option<String>,
+}
+
 #[instrument(skip_all, fields(peer_addr, application_name, compute_mode))]
 #[allow(clippy::too_many_arguments)]
 async fn page_service_conn_main(
@@ -271,6 +280,12 @@ async fn page_service_conn_main(
     let socket_fd = socket.as_raw_fd();
 
     let peer_addr = socket.peer_addr().context("get peer address")?;
+
+    let perf_span_fields = ConnectionPerfSpanFields {
+        peer_addr: peer_addr.to_string(),
+        application_name: None, // filled in later
+        compute_mode: None,     // filled in later
+    };
     tracing::Span::current().record("peer_addr", field::display(peer_addr));
 
     // setup read timeout of 10 minutes. the timeout is rather arbitrary for requirements:
@@ -314,6 +329,7 @@ async fn page_service_conn_main(
         tenant_manager,
         auth,
         pipelining_config,
+        perf_span_fields,
         connection_ctx,
         cancel.clone(),
         gate_guard,
@@ -358,6 +374,8 @@ struct PageServerHandler {
     /// `process_query` creates a child context from this one.
     connection_ctx: RequestContext,
 
+    perf_span_fields: ConnectionPerfSpanFields,
+
     cancel: CancellationToken,
 
     /// None only while pagestream protocol is being processed.
@@ -703,11 +721,13 @@ impl BatchedFeMessage {
 }
 
 impl PageServerHandler {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         conf: &'static PageServerConf,
         tenant_manager: Arc<TenantManager>,
         auth: Option<Arc<SwappableJwtAuth>>,
         pipelining_config: PageServicePipeliningConfig,
+        perf_span_fields: ConnectionPerfSpanFields,
         connection_ctx: RequestContext,
         cancel: CancellationToken,
         gate_guard: GateGuard,
@@ -717,6 +737,7 @@ impl PageServerHandler {
             auth,
             claims: None,
             connection_ctx,
+            perf_span_fields,
             timeline_handles: Some(TimelineHandles::new(tenant_manager)),
             cancel,
             pipelining_config,
@@ -754,6 +775,7 @@ impl PageServerHandler {
         tenant_id: TenantId,
         timeline_id: TimelineId,
         timeline_handles: &mut TimelineHandles,
+        conn_perf_span_fields: &ConnectionPerfSpanFields,
         cancel: &CancellationToken,
         ctx: &RequestContext,
         protocol_version: PagestreamProtocolVersion,
@@ -952,6 +974,9 @@ impl PageServerHandler {
                             info_span!(
                             target: PERF_TRACE_TARGET,
                             "GET_PAGE",
+                            peer_addr = conn_perf_span_fields.peer_addr,
+                            application_name = conn_perf_span_fields.application_name,
+                            compute_mode = conn_perf_span_fields.compute_mode,
                             tenant_id = %tenant_id,
                             shard_id = %shard.get_shard_identity().shard_slug(),
                             timeline_id = %timeline_id,
@@ -1581,6 +1606,7 @@ impl PageServerHandler {
                 tenant_id,
                 timeline_id,
                 &mut timeline_handles,
+                &self.perf_span_fields,
                 &cancel,
                 ctx,
                 protocol_version,
@@ -1714,6 +1740,8 @@ impl PageServerHandler {
         // Batcher
         //
 
+        let perf_span_fields = self.perf_span_fields.clone();
+
         let cancel_batcher = self.cancel.child_token();
         let (mut batch_tx, mut batch_rx) = spsc_fold::channel();
         let batcher = pipeline_stage!("batcher", cancel_batcher.clone(), move |cancel_batcher| {
@@ -1727,6 +1755,7 @@ impl PageServerHandler {
                         tenant_id,
                         timeline_id,
                         &mut timeline_handles,
+                        &perf_span_fields,
                         &cancel_batcher,
                         &ctx,
                         protocol_version,
@@ -2669,12 +2698,14 @@ where
 
         if let FeStartupPacket::StartupMessage { params, .. } = sm {
             if let Some(app_name) = params.get("application_name") {
+                self.perf_span_fields.application_name = Some(app_name.to_string());
                 Span::current().record("application_name", field::display(app_name));
             }
             if let Some(options) = params.get("options") {
                 let (config, _) = parse_options(options);
                 for (key, value) in config {
                     if key == "neon.compute_mode" {
+                        self.perf_span_fields.compute_mode = Some(value.clone());
                         Span::current().record("compute_mode", field::display(value));
                     }
                 }