From e1935f42a1d1de4948fa6f689f2fe4cd83d15549 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 27 Nov 2023 15:40:01 +0300 Subject: [PATCH] Don't generate core dump when walproposer intentionally panics. Walproposer sometimes intentionally PANICs when its term is defeated as the basebackup is likely spoiled by that time. We don't want core dumped in this case. --- pgxn/neon/neon_utils.c | 25 +++++++++++++++++++++++++ pgxn/neon/neon_utils.h | 1 + pgxn/neon/walproposer.c | 15 ++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pgxn/neon/neon_utils.c b/pgxn/neon/neon_utils.c index 06faea7490..807d2decf6 100644 --- a/pgxn/neon/neon_utils.c +++ b/pgxn/neon/neon_utils.c @@ -1,3 +1,6 @@ + +#include + #include "postgres.h" #include "access/timeline.h" @@ -114,3 +117,25 @@ pq_sendint64_le(StringInfo buf, uint64 i) memcpy(buf->data + buf->len, &i, sizeof(uint64)); buf->len += sizeof(uint64); } + +/* + * Disables core dump for the current process. + */ +void +disable_core_dump() +{ + struct rlimit rlim; + +#ifdef WALPROPOSER_LIB /* skip in simulation mode */ + return; +#endif + + rlim.rlim_cur = 0; + rlim.rlim_max = 0; + if (setrlimit(RLIMIT_CORE, &rlim)) + { + int save_errno = errno; + + fprintf(stderr, "WARNING: disable cores setrlimit failed: %s", strerror(save_errno)); + } +} diff --git a/pgxn/neon/neon_utils.h b/pgxn/neon/neon_utils.h index e3fafc8d0f..20745d8b26 100644 --- a/pgxn/neon/neon_utils.h +++ b/pgxn/neon/neon_utils.h @@ -8,5 +8,6 @@ uint32 pq_getmsgint32_le(StringInfo msg); uint64 pq_getmsgint64_le(StringInfo msg); void pq_sendint32_le(StringInfo buf, uint32 i); void pq_sendint64_le(StringInfo buf, uint64 i); +extern void disable_core_dump(); #endif /* __NEON_UTILS_H__ */ diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 7d9dbfdb7f..fc3332612c 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -35,6 +35,8 @@ * *------------------------------------------------------------------------- */ +#include + #include "postgres.h" #include "libpq/pqformat.h" #include "neon.h" @@ -1069,6 +1071,12 @@ DetermineEpochStartLsn(WalProposer *wp) if (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term == walprop_shared->mineLastElectedTerm))) { + /* + * Panic to restart PG as we need to retake basebackup. + * However, don't dump core as this is kinda expected + * scenario. + */ + disable_core_dump(); walprop_log(PANIC, "collected propEpochStartLsn %X/%X, but basebackup LSN %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn), @@ -1445,7 +1453,12 @@ RecvAppendResponses(Safekeeper *sk) if (sk->appendResponse.term > wp->propTerm) { - /* Another compute with higher term is running. */ + /* + * Another compute with higher term is running. Panic to restart + * PG as we likely need to retake basebackup. However, don't dump + * core as this is kinda expected scenario. + */ + disable_core_dump(); walprop_log(PANIC, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT "", sk->host, sk->port, sk->appendResponse.term, wp->propTerm);