From d0102a473adbb00886bba2a1169a8be0160ec00e Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Thu, 20 Mar 2025 16:06:00 +0000 Subject: [PATCH] pgxn: Include local port in no-response log messages (#11321) ## Problem Now that stuck connections are quickly terminated it's not easy to quickly find the right port from the pid to correlate the connection with the one seen on pageserver side. ## Summary of changes Call getsockname() and include the local port number in the no-response-from-pageserver log messages. --- pgxn/neon/libpagestore.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 6f54180a4d..511790ccb0 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -15,6 +15,7 @@ #include "postgres.h" #include +#include #include "libpq-int.h" @@ -763,6 +764,24 @@ get_socket_stats(int socketfd, int *sndbuf, int *recvbuf) #endif } +/* + * Tries to get the local port of a socket. Sets 'port' to -1 on error. + */ +static void +get_local_port(int socketfd, int *port) +{ + struct sockaddr_in addr; + socklen_t addr_len = sizeof(addr); + + memset(&addr, 0, addr_len); + if (getsockname(socketfd, (struct sockaddr*) &addr, &addr_len) == 0) + { + *port = ntohs(addr.sin_port); + } else { + *port = -1; + } +} + /* * A wrapper around PQgetCopyData that checks for interrupts while sleeping. */ @@ -852,15 +871,17 @@ retry: */ if (INSTR_TIME_GET_MILLISEC(since_last_log) >= pageserver_response_log_timeout) { + int port; int sndbuf; int recvbuf; + get_local_port(PQsocket(pageserver_conn), &port); get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf); neon_shard_log(shard_no, LOG, - "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d) (conn start=%d end=%d)", + "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket port=%d sndbuf=%d recvbuf=%d) (conn start=%d end=%d)", INSTR_TIME_GET_DOUBLE(since_start), - shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf, + shard->nrequests_sent, shard->nresponses_received, port, sndbuf, recvbuf, pageserver_conn->inStart, pageserver_conn->inEnd); shard->receive_last_log_time = now; shard->receive_logged = true; @@ -882,8 +903,10 @@ retry: */ if (INSTR_TIME_GET_MILLISEC(since_start) >= pageserver_response_disconnect_timeout) { - neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting", - INSTR_TIME_GET_DOUBLE(since_start)); + int port; + get_local_port(PQsocket(pageserver_conn), &port); + neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting (socket port=%d)", + INSTR_TIME_GET_DOUBLE(since_start), port); pageserver_disconnect(shard_no); return -1; }