From ea32f1d0a36a4d77c1181d623f14a91f2a06d6dd Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 2 Oct 2024 11:12:50 +0200 Subject: [PATCH] Expose more granular wait event data to the user (#9163) In PG17, there is this newfangled custom wait events system. This commit adds that feature to Neon, so that users can see what their backends may be waiting for when a PostgreSQL backend is playing the waiting game in Neon code. --- pgxn/neon/file_cache.c | 8 +++++++ pgxn/neon/libpagestore.c | 13 ++++++----- pgxn/neon/neon.c | 45 +++++++++++++++++++++++++++++++++++++ pgxn/neon/neon.h | 23 +++++++++++++++++++ pgxn/neon/walsender_hooks.c | 2 +- 5 files changed, 85 insertions(+), 6 deletions(-) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 2b461c8641..892a272252 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -42,6 +42,7 @@ #include "hll.h" #include "bitmap.h" +#include "neon.h" #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0) @@ -173,7 +174,9 @@ lfc_disable(char const *op) * If the reason of error is ENOSPC, then truncation of file may * help to reclaim some space */ + pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE); int rc = ftruncate(lfc_desc, 0); + pgstat_report_wait_end(); if (rc < 0) elog(WARNING, "Failed to truncate local file cache %s: %m", lfc_path); @@ -769,8 +772,10 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, if (iteration_hits != 0) { + pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_READ); rc = preadv(lfc_desc, iov, blocks_in_chunk, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ); + pgstat_report_wait_end(); if (rc != (BLCKSZ * blocks_in_chunk)) { @@ -944,8 +949,11 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, lfc_ctl->writes += blocks_in_chunk; LWLockRelease(lfc_lock); + pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE); rc = pwritev(lfc_desc, iov, blocks_in_chunk, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ); + pgstat_report_wait_end(); + if (rc != BLCKSZ * blocks_in_chunk) { lfc_disable("write"); diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 6c6489277d..0ca8a70d6d 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -490,7 +490,7 @@ pageserver_connect(shardno_t shard_no, int elevel) WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | WL_SOCKET_READABLE, PQsocket(shard->conn), 0, - PG_WAIT_EXTENSION); + WAIT_EVENT_NEON_PS_STARTING); elog(DEBUG5, "PGRES_POLLING_READING=>%d", rc); if (rc & WL_LATCH_SET) { @@ -512,7 +512,7 @@ pageserver_connect(shardno_t shard_no, int elevel) WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | WL_SOCKET_WRITEABLE, PQsocket(shard->conn), 0, - PG_WAIT_EXTENSION); + WAIT_EVENT_NEON_PS_STARTING); elog(DEBUG5, "PGRES_POLLING_WRITING=>%d", rc); if (rc & WL_LATCH_SET) { @@ -608,7 +608,8 @@ pageserver_connect(shardno_t shard_no, int elevel) WaitEvent event; /* Sleep until there's something to do */ - (void) WaitEventSetWait(shard->wes_read, -1L, &event, 1, PG_WAIT_EXTENSION); + (void) WaitEventSetWait(shard->wes_read, -1L, &event, 1, + WAIT_EVENT_NEON_PS_CONFIGURING); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); @@ -656,7 +657,8 @@ static int call_PQgetCopyData(shardno_t shard_no, char **buffer) { int ret; - PGconn *pageserver_conn = page_servers[shard_no].conn; + PageServer *shard = &page_servers[shard_no]; + PGconn *pageserver_conn = shard->conn; retry: ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ ); @@ -666,7 +668,8 @@ retry: WaitEvent event; /* Sleep until there's something to do */ - (void) WaitEventSetWait(page_servers[shard_no].wes_read, -1L, &event, 1, PG_WAIT_EXTENSION); + (void) WaitEventSetWait(shard->wes_read, -1L, &event, 1, + WAIT_EVENT_NEON_PS_READ); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index fe8e276d1c..c3ed96710a 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -41,6 +41,9 @@ #include "pagestore_client.h" #include "control_plane_connector.h" #include "walsender_hooks.h" +#if PG_MAJORVERSION_NUM >= 16 +#include "storage/ipc.h" +#endif PG_MODULE_MAGIC; void _PG_init(void); @@ -49,6 +52,23 @@ static int logical_replication_max_snap_files = 300; static int running_xacts_overflow_policy; +#if PG_MAJORVERSION_NUM >= 16 +static shmem_startup_hook_type prev_shmem_startup_hook; + +static void neon_shmem_startup_hook(void); +#endif +#if PG_MAJORVERSION_NUM >= 17 +uint32 WAIT_EVENT_NEON_LFC_MAINTENANCE; +uint32 WAIT_EVENT_NEON_LFC_READ; +uint32 WAIT_EVENT_NEON_LFC_TRUNCATE; +uint32 WAIT_EVENT_NEON_LFC_WRITE; +uint32 WAIT_EVENT_NEON_PS_STARTING; +uint32 WAIT_EVENT_NEON_PS_CONFIGURING; +uint32 WAIT_EVENT_NEON_PS_SEND; +uint32 WAIT_EVENT_NEON_PS_READ; +uint32 WAIT_EVENT_NEON_WAL_DL; +#endif + enum RunningXactsOverflowPolicies { OP_IGNORE, OP_SKIP, @@ -635,6 +655,9 @@ _PG_init(void) */ #if PG_VERSION_NUM >= 160000 load_file("$libdir/neon_rmgr", false); + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = neon_shmem_startup_hook; #endif pg_init_libpagestore(); @@ -721,3 +744,25 @@ backpressure_throttling_time(PG_FUNCTION_ARGS) { PG_RETURN_UINT64(BackpressureThrottlingTime()); } + +#if PG_MAJORVERSION_NUM >= 16 +static void +neon_shmem_startup_hook(void) +{ + /* Initialize */ + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + +#if PG_PG_MAJORVERSION_NUM >= 17 + WAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew("Neon/FileCache_Maintenance"); + WAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew("Neon/FileCache_Read"); + WAIT_EVENT_NEON_LFC_TRUNCATE = WaitEventExtensionNew("Neon/FileCache_Truncate"); + WAIT_EVENT_NEON_LFC_WRITE = WaitEventExtensionNew("Neon/FileCache_Write"); + WAIT_EVENT_NEON_PS_STARTING = WaitEventExtensionNew("Neon/PS_Starting"); + WAIT_EVENT_NEON_PS_CONFIGURING = WaitEventExtensionNew("Neon/PS_Configuring"); + WAIT_EVENT_NEON_PS_SEND = WaitEventExtensionNew("Neon/PS_SendIO"); + WAIT_EVENT_NEON_PS_READ = WaitEventExtensionNew("Neon/PS_ReadIO"); + WAIT_EVENT_NEON_WAL_DL = WaitEventExtensionNew("Neon/WAL_Download"); +#endif +} +#endif diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h index 5c653fc6c6..79aa88b8d3 100644 --- a/pgxn/neon/neon.h +++ b/pgxn/neon/neon.h @@ -12,6 +12,7 @@ #ifndef NEON_H #define NEON_H #include "access/xlogreader.h" +#include "utils/wait_event.h" /* GUCs */ extern char *neon_auth_token; @@ -22,6 +23,28 @@ extern char *wal_acceptors_list; extern int wal_acceptor_reconnect_timeout; extern int wal_acceptor_connection_timeout; +#if PG_MAJORVERSION_NUM >= 17 +extern uint32 WAIT_EVENT_NEON_LFC_MAINTENANCE; +extern uint32 WAIT_EVENT_NEON_LFC_READ; +extern uint32 WAIT_EVENT_NEON_LFC_TRUNCATE; +extern uint32 WAIT_EVENT_NEON_LFC_WRITE; +extern uint32 WAIT_EVENT_NEON_PS_STARTING; +extern uint32 WAIT_EVENT_NEON_PS_CONFIGURING; +extern uint32 WAIT_EVENT_NEON_PS_SEND; +extern uint32 WAIT_EVENT_NEON_PS_READ; +extern uint32 WAIT_EVENT_NEON_WAL_DL; +#else +#define WAIT_EVENT_NEON_LFC_MAINTENANCE PG_WAIT_EXTENSION +#define WAIT_EVENT_NEON_LFC_READ WAIT_EVENT_BUFFILE_READ +#define WAIT_EVENT_NEON_LFC_TRUNCATE WAIT_EVENT_BUFFILE_TRUNCATE +#define WAIT_EVENT_NEON_LFC_WRITE WAIT_EVENT_BUFFILE_WRITE +#define WAIT_EVENT_NEON_PS_STARTING PG_WAIT_EXTENSION +#define WAIT_EVENT_NEON_PS_CONFIGURING PG_WAIT_EXTENSION +#define WAIT_EVENT_NEON_PS_SEND PG_WAIT_EXTENSION +#define WAIT_EVENT_NEON_PS_READ PG_WAIT_EXTENSION +#define WAIT_EVENT_NEON_WAL_DL WAIT_EVENT_WAL_READ +#endif + extern void pg_init_libpagestore(void); extern void pg_init_walproposer(void); diff --git a/pgxn/neon/walsender_hooks.c b/pgxn/neon/walsender_hooks.c index bd3856e9d9..575dddef02 100644 --- a/pgxn/neon/walsender_hooks.c +++ b/pgxn/neon/walsender_hooks.c @@ -160,7 +160,7 @@ NeonWALPageRead( WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | reader_events, sock, timeout_ms, - WAIT_EVENT_WAL_SENDER_MAIN); + WAIT_EVENT_NEON_WAL_DL); } } }