From 8b15252f988b2fc77218dea0c4af47cf1fb82c84 Mon Sep 17 00:00:00 2001
From: Arthur Petukhovsky <petuhovskiy@yandex.ru>
Date: Thu, 5 Oct 2023 20:48:01 +0300
Subject: [PATCH] Move walproposer state into struct (#5364)

This patch extracts all postgres-dependent functions in a separate
`walproposer_api` functions struct. It helps to compile walproposer as
static library without compiling all other postgres server code. This is
useful to allow calling walproposer C code from Rust, or linking this
library with anything else.

All global variables containing walproposer state were extracted to a
separate `WalProposer` struct. This makes it possible to run several
walproposers in the same process, in separate threads.

There were no logic changes and PR mostly consists of shuffling
functions between several files. We have a good test coverage for
walproposer code and I've seen no issues with tests while I was
refactoring it, so I don't expect any issues after merge.

ref https://github.com/neondatabase/neon/issues/547

---------

Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
---
 pgxn/neon/Makefile            |    4 +-
 pgxn/neon/libpagestore.c      |    2 +-
 pgxn/neon/libpqwalproposer.c  |  424 --------
 pgxn/neon/neon.h              |   10 +
 pgxn/neon/neon_utils.c        |  116 +++
 pgxn/neon/neon_utils.h        |   12 +
 pgxn/neon/walproposer.c       | 1777 ++++++++++++---------------------
 pgxn/neon/walproposer.h       |  367 +++++--
 pgxn/neon/walproposer_pg.c    | 1667 +++++++++++++++++++++++++++++++
 pgxn/neon/walproposer_utils.c |  659 ------------
 pgxn/neon/walproposer_utils.h |   19 -
 11 files changed, 2701 insertions(+), 2356 deletions(-)
 delete mode 100644 pgxn/neon/libpqwalproposer.c
 create mode 100644 pgxn/neon/neon_utils.c
 create mode 100644 pgxn/neon/neon_utils.h
 create mode 100644 pgxn/neon/walproposer_pg.c
 delete mode 100644 pgxn/neon/walproposer_utils.c
 delete mode 100644 pgxn/neon/walproposer_utils.h

diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile
index 53917d8bc4..e88901ed78 100644
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -7,12 +7,12 @@ OBJS = \
 	extension_server.o \
 	file_cache.o \
 	libpagestore.o \
-	libpqwalproposer.o \
 	neon.o \
+	neon_utils.o \
 	pagestore_smgr.o \
 	relsize_cache.o \
 	walproposer.o \
-	walproposer_utils.o \
+	walproposer_pg.o \
 	control_plane_connector.o
 
 PG_CPPFLAGS = -I$(libpq_srcdir)
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index c89de11594..ca24ec7586 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -30,7 +30,7 @@
 
 #include "neon.h"
 #include "walproposer.h"
-#include "walproposer_utils.h"
+#include "neon_utils.h"
 
 #define PageStoreTrace DEBUG5
 
diff --git a/pgxn/neon/libpqwalproposer.c b/pgxn/neon/libpqwalproposer.c
deleted file mode 100644
index ce9a1475d3..0000000000
--- a/pgxn/neon/libpqwalproposer.c
+++ /dev/null
@@ -1,424 +0,0 @@
-#include "postgres.h"
-
-#include "libpq-fe.h"
-#include "neon.h"
-#include "walproposer.h"
-
-/* Header in walproposer.h -- Wrapper struct to abstract away the libpq connection */
-struct WalProposerConn
-{
-	PGconn	   *pg_conn;
-	bool		is_nonblocking; /* whether the connection is non-blocking */
-	char	   *recvbuf;		/* last received data from
-								 * walprop_async_read */
-};
-
-/* Helper function */
-static bool
-ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
-{
-	/* If we're already correctly blocking or nonblocking, all good */
-	if (is_nonblocking == conn->is_nonblocking)
-		return true;
-
-	/* Otherwise, set it appropriately */
-	if (PQsetnonblocking(conn->pg_conn, is_nonblocking) == -1)
-		return false;
-
-	conn->is_nonblocking = is_nonblocking;
-	return true;
-}
-
-/* Exported function definitions */
-char *
-walprop_error_message(WalProposerConn *conn)
-{
-	return PQerrorMessage(conn->pg_conn);
-}
-
-WalProposerConnStatusType
-walprop_status(WalProposerConn *conn)
-{
-	switch (PQstatus(conn->pg_conn))
-	{
-		case CONNECTION_OK:
-			return WP_CONNECTION_OK;
-		case CONNECTION_BAD:
-			return WP_CONNECTION_BAD;
-		default:
-			return WP_CONNECTION_IN_PROGRESS;
-	}
-}
-
-WalProposerConn *
-walprop_connect_start(char *conninfo, char *password)
-{
-	WalProposerConn *conn;
-	PGconn	   *pg_conn;
-	const char *keywords[3];
-	const char *values[3];
-	int			n;
-
-	/*
-	 * Connect using the given connection string. If the
-	 * NEON_AUTH_TOKEN environment variable was set, use that as
-	 * the password.
-	 *
-	 * The connection options are parsed in the order they're given, so
-	 * when we set the password before the connection string, the
-	 * connection string can override the password from the env variable.
-	 * Seems useful, although we don't currently use that capability
-	 * anywhere.
-	 */
-	n = 0;
-	if (password)
-	{
-		keywords[n] = "password";
-		values[n] = password;
-		n++;
-	}
-	keywords[n] = "dbname";
-	values[n] = conninfo;
-	n++;
-	keywords[n] = NULL;
-	values[n] = NULL;
-	n++;
-	pg_conn = PQconnectStartParams(keywords, values, 1);
-
-	/*
-	 * Allocation of a PQconn can fail, and will return NULL. We want to fully
-	 * replicate the behavior of PQconnectStart here.
-	 */
-	if (!pg_conn)
-		return NULL;
-
-	/*
-	 * And in theory this allocation can fail as well, but it's incredibly
-	 * unlikely if we just successfully allocated a PGconn.
-	 *
-	 * palloc will exit on failure though, so there's not much we could do if
-	 * it *did* fail.
-	 */
-	conn = palloc(sizeof(WalProposerConn));
-	conn->pg_conn = pg_conn;
-	conn->is_nonblocking = false;	/* connections always start in blocking
-									 * mode */
-	conn->recvbuf = NULL;
-	return conn;
-}
-
-WalProposerConnectPollStatusType
-walprop_connect_poll(WalProposerConn *conn)
-{
-	WalProposerConnectPollStatusType return_val;
-
-	switch (PQconnectPoll(conn->pg_conn))
-	{
-		case PGRES_POLLING_FAILED:
-			return_val = WP_CONN_POLLING_FAILED;
-			break;
-		case PGRES_POLLING_READING:
-			return_val = WP_CONN_POLLING_READING;
-			break;
-		case PGRES_POLLING_WRITING:
-			return_val = WP_CONN_POLLING_WRITING;
-			break;
-		case PGRES_POLLING_OK:
-			return_val = WP_CONN_POLLING_OK;
-			break;
-
-			/*
-			 * There's a comment at its source about this constant being
-			 * unused. We'll expect it's never returned.
-			 */
-		case PGRES_POLLING_ACTIVE:
-			elog(FATAL, "Unexpected PGRES_POLLING_ACTIVE returned from PQconnectPoll");
-
-			/*
-			 * This return is never actually reached, but it's here to make
-			 * the compiler happy
-			 */
-			return WP_CONN_POLLING_FAILED;
-
-		default:
-			Assert(false);
-			return_val = WP_CONN_POLLING_FAILED;	/* keep the compiler quiet */
-	}
-
-	return return_val;
-}
-
-bool
-walprop_send_query(WalProposerConn *conn, char *query)
-{
-	/*
-	 * We need to be in blocking mode for sending the query to run without
-	 * requiring a call to PQflush
-	 */
-	if (!ensure_nonblocking_status(conn, false))
-		return false;
-
-	/* PQsendQuery returns 1 on success, 0 on failure */
-	if (!PQsendQuery(conn->pg_conn, query))
-		return false;
-
-	return true;
-}
-
-WalProposerExecStatusType
-walprop_get_query_result(WalProposerConn *conn)
-{
-	PGresult   *result;
-	WalProposerExecStatusType return_val;
-
-	/* Marker variable if we need to log an unexpected success result */
-	char	   *unexpected_success = NULL;
-
-	/* Consume any input that we might be missing */
-	if (!PQconsumeInput(conn->pg_conn))
-		return WP_EXEC_FAILED;
-
-	if (PQisBusy(conn->pg_conn))
-		return WP_EXEC_NEEDS_INPUT;
-
-
-	result = PQgetResult(conn->pg_conn);
-
-	/*
-	 * PQgetResult returns NULL only if getting the result was successful &
-	 * there's no more of the result to get.
-	 */
-	if (!result)
-	{
-		elog(WARNING, "[libpqwalproposer] Unexpected successful end of command results");
-		return WP_EXEC_UNEXPECTED_SUCCESS;
-	}
-
-	/* Helper macro to reduce boilerplate */
-#define UNEXPECTED_SUCCESS(msg) \
-		return_val = WP_EXEC_UNEXPECTED_SUCCESS; \
-		unexpected_success = msg; \
-		break;
-
-
-	switch (PQresultStatus(result))
-	{
-			/* "true" success case */
-		case PGRES_COPY_BOTH:
-			return_val = WP_EXEC_SUCCESS_COPYBOTH;
-			break;
-
-			/* Unexpected success case */
-		case PGRES_EMPTY_QUERY:
-			UNEXPECTED_SUCCESS("empty query return");
-		case PGRES_COMMAND_OK:
-			UNEXPECTED_SUCCESS("data-less command end");
-		case PGRES_TUPLES_OK:
-			UNEXPECTED_SUCCESS("tuples return");
-		case PGRES_COPY_OUT:
-			UNEXPECTED_SUCCESS("'Copy Out' response");
-		case PGRES_COPY_IN:
-			UNEXPECTED_SUCCESS("'Copy In' response");
-		case PGRES_SINGLE_TUPLE:
-			UNEXPECTED_SUCCESS("single tuple return");
-		case PGRES_PIPELINE_SYNC:
-			UNEXPECTED_SUCCESS("pipeline sync point");
-
-			/* Failure cases */
-		case PGRES_BAD_RESPONSE:
-		case PGRES_NONFATAL_ERROR:
-		case PGRES_FATAL_ERROR:
-		case PGRES_PIPELINE_ABORTED:
-			return_val = WP_EXEC_FAILED;
-			break;
-
-		default:
-			Assert(false);
-			return_val = WP_EXEC_FAILED;	/* keep the compiler quiet */
-	}
-
-	if (unexpected_success)
-		elog(WARNING, "[libpqwalproposer] Unexpected successful %s", unexpected_success);
-
-	return return_val;
-}
-
-pgsocket
-walprop_socket(WalProposerConn *conn)
-{
-	return PQsocket(conn->pg_conn);
-}
-
-int
-walprop_flush(WalProposerConn *conn)
-{
-	return (PQflush(conn->pg_conn));
-}
-
-void
-walprop_finish(WalProposerConn *conn)
-{
-	if (conn->recvbuf != NULL)
-		PQfreemem(conn->recvbuf);
-	PQfinish(conn->pg_conn);
-	pfree(conn);
-}
-
-/*
- * Receive a message from the safekeeper.
- *
- * On success, the data is placed in *buf. It is valid until the next call
- * to this function.
- */
-PGAsyncReadResult
-walprop_async_read(WalProposerConn *conn, char **buf, int *amount)
-{
-	int			result;
-
-	if (conn->recvbuf != NULL)
-	{
-		PQfreemem(conn->recvbuf);
-		conn->recvbuf = NULL;
-	}
-
-	/* Call PQconsumeInput so that we have the data we need */
-	if (!PQconsumeInput(conn->pg_conn))
-	{
-		*amount = 0;
-		*buf = NULL;
-		return PG_ASYNC_READ_FAIL;
-	}
-
-	/*
-	 * The docs for PQgetCopyData list the return values as: 0 if the copy is
-	 * still in progress, but no "complete row" is available -1 if the copy is
-	 * done -2 if an error occurred (> 0) if it was successful; that value is
-	 * the amount transferred.
-	 *
-	 * The protocol we use between walproposer and safekeeper means that we
-	 * *usually* wouldn't expect to see that the copy is done, but this can
-	 * sometimes be triggered by the server returning an ErrorResponse (which
-	 * also happens to have the effect that the copy is done).
-	 */
-	switch (result = PQgetCopyData(conn->pg_conn, &conn->recvbuf, true))
-	{
-		case 0:
-			*amount = 0;
-			*buf = NULL;
-			return PG_ASYNC_READ_TRY_AGAIN;
-		case -1:
-			{
-				/*
-				 * If we get -1, it's probably because of a server error; the
-				 * safekeeper won't normally send a CopyDone message.
-				 *
-				 * We can check PQgetResult to make sure that the server
-				 * failed; it'll always result in PGRES_FATAL_ERROR
-				 */
-				ExecStatusType status = PQresultStatus(PQgetResult(conn->pg_conn));
-
-				if (status != PGRES_FATAL_ERROR)
-					elog(FATAL, "unexpected result status %d after failed PQgetCopyData", status);
-
-				/*
-				 * If there was actually an error, it'll be properly reported
-				 * by calls to PQerrorMessage -- we don't have to do anything
-				 * else
-				 */
-				*amount = 0;
-				*buf = NULL;
-				return PG_ASYNC_READ_FAIL;
-			}
-		case -2:
-			*amount = 0;
-			*buf = NULL;
-			return PG_ASYNC_READ_FAIL;
-		default:
-			/* Positive values indicate the size of the returned result */
-			*amount = result;
-			*buf = conn->recvbuf;
-			return PG_ASYNC_READ_SUCCESS;
-	}
-}
-
-PGAsyncWriteResult
-walprop_async_write(WalProposerConn *conn, void const *buf, size_t size)
-{
-	int			result;
-
-	/* If we aren't in non-blocking mode, switch to it. */
-	if (!ensure_nonblocking_status(conn, true))
-		return PG_ASYNC_WRITE_FAIL;
-
-	/*
-	 * The docs for PQputcopyData list the return values as: 1 if the data was
-	 * queued, 0 if it was not queued because of full buffers, or -1 if an
-	 * error occurred
-	 */
-	result = PQputCopyData(conn->pg_conn, buf, size);
-
-	/*
-	 * We won't get a result of zero because walproposer always empties the
-	 * connection's buffers before sending more
-	 */
-	Assert(result != 0);
-
-	switch (result)
-	{
-		case 1:
-			/* good -- continue */
-			break;
-		case -1:
-			return PG_ASYNC_WRITE_FAIL;
-		default:
-			elog(FATAL, "invalid return %d from PQputCopyData", result);
-	}
-
-	/*
-	 * After queueing the data, we still need to flush to get it to send. This
-	 * might take multiple tries, but we don't want to wait around until it's
-	 * done.
-	 *
-	 * PQflush has the following returns (directly quoting the docs): 0 if
-	 * sucessful, 1 if it was unable to send all the data in the send queue
-	 * yet -1 if it failed for some reason
-	 */
-	switch (result = PQflush(conn->pg_conn))
-	{
-		case 0:
-			return PG_ASYNC_WRITE_SUCCESS;
-		case 1:
-			return PG_ASYNC_WRITE_TRY_FLUSH;
-		case -1:
-			return PG_ASYNC_WRITE_FAIL;
-		default:
-			elog(FATAL, "invalid return %d from PQflush", result);
-	}
-}
-
-/*
- * This function is very similar to walprop_async_write. For more
- * information, refer to the comments there.
- */
-bool
-walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size)
-{
-	int			result;
-
-	/* If we are in non-blocking mode, switch out of it. */
-	if (!ensure_nonblocking_status(conn, false))
-		return false;
-
-	if ((result = PQputCopyData(conn->pg_conn, buf, size)) == -1)
-		return false;
-
-	Assert(result == 1);
-
-	/* Because the connection is non-blocking, flushing returns 0 or -1 */
-
-	if ((result = PQflush(conn->pg_conn)) == -1)
-		return false;
-
-	Assert(result == 0);
-	return true;
-}
diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h
index 2610da4311..3300c67456 100644
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -18,6 +18,10 @@ extern char *neon_auth_token;
 extern char *neon_timeline;
 extern char *neon_tenant;
 
+extern char *wal_acceptors_list;
+extern int	wal_acceptor_reconnect_timeout;
+extern int	wal_acceptor_connection_timeout;
+
 extern void pg_init_libpagestore(void);
 extern void pg_init_walproposer(void);
 
@@ -30,4 +34,10 @@ extern void pg_init_extension_server(void);
 extern bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
 extern bool	(*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id);
 
+extern uint64 BackpressureThrottlingTime(void);
+extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
+
+extern void PGDLLEXPORT WalProposerSync(int argc, char *argv[]);
+extern void PGDLLEXPORT WalProposerMain(Datum main_arg);
+
 #endif							/* NEON_H */
diff --git a/pgxn/neon/neon_utils.c b/pgxn/neon/neon_utils.c
new file mode 100644
index 0000000000..06faea7490
--- /dev/null
+++ b/pgxn/neon/neon_utils.c
@@ -0,0 +1,116 @@
+#include "postgres.h"
+
+#include "access/timeline.h"
+#include "access/xlogutils.h"
+#include "common/logging.h"
+#include "common/ip.h"
+#include "funcapi.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "postmaster/interrupt.h"
+#include "replication/slot.h"
+#include "replication/walsender_private.h"
+
+#include "storage/ipc.h"
+#include "utils/builtins.h"
+#include "utils/ps_status.h"
+
+#include "libpq-fe.h"
+#include <netinet/tcp.h>
+#include <unistd.h>
+
+#if PG_VERSION_NUM >= 150000
+#include "access/xlogutils.h"
+#include "access/xlogrecovery.h"
+#endif
+#if PG_MAJORVERSION_NUM >= 16
+#include "utils/guc.h"
+#endif
+
+/*
+ * Convert a character which represents a hexadecimal digit to an integer.
+ *
+ * Returns -1 if the character is not a hexadecimal digit.
+ */
+int
+HexDecodeChar(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+
+	return -1;
+}
+
+/*
+ * Decode a hex string into a byte string, 2 hex chars per byte.
+ *
+ * Returns false if invalid characters are encountered; otherwise true.
+ */
+bool
+HexDecodeString(uint8 *result, char *input, int nbytes)
+{
+	int			i;
+
+	for (i = 0; i < nbytes; ++i)
+	{
+		int			n1 = HexDecodeChar(input[i * 2]);
+		int			n2 = HexDecodeChar(input[i * 2 + 1]);
+
+		if (n1 < 0 || n2 < 0)
+			return false;
+		result[i] = n1 * 16 + n2;
+	}
+
+	return true;
+}
+
+/* --------------------------------
+ *		pq_getmsgint32_le	- get a binary 4-byte int from a message buffer in native (LE) order
+ * --------------------------------
+ */
+uint32
+pq_getmsgint32_le(StringInfo msg)
+{
+	uint32		n32;
+
+	pq_copymsgbytes(msg, (char *) &n32, sizeof(n32));
+
+	return n32;
+}
+
+/* --------------------------------
+ *		pq_getmsgint64	- get a binary 8-byte int from a message buffer in native (LE) order
+ * --------------------------------
+ */
+uint64
+pq_getmsgint64_le(StringInfo msg)
+{
+	uint64		n64;
+
+	pq_copymsgbytes(msg, (char *) &n64, sizeof(n64));
+
+	return n64;
+}
+
+/* append a binary [u]int32 to a StringInfo buffer in native (LE) order */
+void
+pq_sendint32_le(StringInfo buf, uint32 i)
+{
+	enlargeStringInfo(buf, sizeof(uint32));
+	memcpy(buf->data + buf->len, &i, sizeof(uint32));
+	buf->len += sizeof(uint32);
+}
+
+/* append a binary [u]int64 to a StringInfo buffer in native (LE) order */
+void
+pq_sendint64_le(StringInfo buf, uint64 i)
+{
+	enlargeStringInfo(buf, sizeof(uint64));
+	memcpy(buf->data + buf->len, &i, sizeof(uint64));
+	buf->len += sizeof(uint64);
+}
diff --git a/pgxn/neon/neon_utils.h b/pgxn/neon/neon_utils.h
new file mode 100644
index 0000000000..e3fafc8d0f
--- /dev/null
+++ b/pgxn/neon/neon_utils.h
@@ -0,0 +1,12 @@
+#ifndef __NEON_UTILS_H__
+#define __NEON_UTILS_H__
+
+#include "postgres.h"
+
+bool		HexDecodeString(uint8 *result, char *input, int nbytes);
+uint32		pq_getmsgint32_le(StringInfo msg);
+uint64		pq_getmsgint64_le(StringInfo msg);
+void		pq_sendint32_le(StringInfo buf, uint32 i);
+void		pq_sendint64_le(StringInfo buf, uint64 i);
+
+#endif							/* __NEON_UTILS_H__ */
diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c
index a9342bd984..c1fd5e3ef3 100644
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -7,9 +7,9 @@
  *
  * We have two ways of launching WalProposer:
  *
- *   1. As a background worker which will run physical WalSender with
- *      am_wal_proposer flag set to true. WalSender in turn would handle WAL
- *      reading part and call WalProposer when ready to scatter WAL.
+ *   1. As a background worker which will pretend to be physical WalSender.
+ * 		WalProposer will receive notifications about new available WAL and
+ * 		will immediately broadcast it to alive safekeepers.
  *
  *   2. As a standalone utility by running `postgres --sync-safekeepers`. That
  *      is needed to create LSN from which it is safe to start postgres. More
@@ -29,107 +29,25 @@
  *         safekeepers, learn start LSN of future epoch and run basebackup'
  *         won't work.
  *
+ * Both ways are implemented in walproposer_pg.c file. This file contains
+ * generic part of walproposer which can be used in both cases, but can also
+ * be used as an independent library.
+ *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
-
-#include <signal.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include "access/xact.h"
-#include "access/xlogdefs.h"
-#include "access/xlogutils.h"
-#include "access/xloginsert.h"
-#if PG_VERSION_NUM >= 150000
-#include "access/xlogrecovery.h"
-#endif
-#include "storage/fd.h"
-#include "storage/latch.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "access/xlog.h"
 #include "libpq/pqformat.h"
-#include "replication/slot.h"
-#include "replication/walreceiver.h"
-#if PG_VERSION_NUM >= 160000
-#include "replication/walsender_private.h"
-#endif
-#include "postmaster/bgworker.h"
-#include "postmaster/interrupt.h"
-#include "postmaster/postmaster.h"
-#include "storage/pmsignal.h"
-#include "storage/proc.h"
-#include "storage/ipc.h"
-#include "storage/lwlock.h"
-#include "storage/shmem.h"
-#include "storage/spin.h"
-#include "tcop/tcopprot.h"
-#include "utils/builtins.h"
-#include "utils/guc.h"
-#include "utils/memutils.h"
-#include "utils/ps_status.h"
-#include "utils/timestamp.h"
-
 #include "neon.h"
 #include "walproposer.h"
-#include "walproposer_utils.h"
-
-static bool syncSafekeepers = false;
-
-char	   *wal_acceptors_list = "";
-int			wal_acceptor_reconnect_timeout = 1000;
-int			wal_acceptor_connection_timeout = 10000;
-bool		am_wal_proposer = false;
-
-#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
-
-static int	n_safekeepers = 0;
-static int	quorum = 0;
-static Safekeeper safekeeper[MAX_SAFEKEEPERS];
-static XLogRecPtr availableLsn; /* WAL has been generated up to this point */
-static XLogRecPtr lastSentCommitLsn;	/* last commitLsn broadcast to*
-										 * safekeepers */
-static ProposerGreeting greetRequest;
-static VoteRequest voteRequest; /* Vote request for safekeeper */
-static WaitEventSet *waitEvents;
-static AppendResponse quorumFeedback;
-/*
- *  Minimal LSN which may be needed for recovery of some safekeeper,
- *  record-aligned (first record which might not yet received by someone).
- */
-static XLogRecPtr truncateLsn;
-
-/*
- * Term of the proposer. We want our term to be highest and unique,
- * so we collect terms from safekeepers quorum, choose max and +1.
- * After that our term is fixed and must not change. If we observe
- * that some safekeeper has higher term, it means that we have another
- * running compute, so we must stop immediately.
- */
-static term_t propTerm;
-static TermHistory propTermHistory; /* term history of the proposer */
-static XLogRecPtr propEpochStartLsn;	/* epoch start lsn of the proposer */
-static term_t donorEpoch;		/* Most advanced acceptor epoch */
-static int	donor;				/* Most advanced acceptor */
-static XLogRecPtr timelineStartLsn; /* timeline globally starts at this LSN */
-static int	n_votes = 0;
-static int	n_connected = 0;
-static TimestampTz last_reconnect_attempt;
-
-static WalproposerShmemState * walprop_shared;
+#include "neon_utils.h"
 
 /* Prototypes for private functions */
-static void WalProposerRegister(void);
-static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
-static void WalProposerStart(void);
-static void WalProposerLoop(void);
-static void InitEventSet(void);
-static void UpdateEventSet(Safekeeper *sk, uint32 events);
+static void WalProposerLoop(WalProposer *wp);
 static void HackyRemoveWalProposerEvent(Safekeeper *to_remove);
 static void ShutdownConnection(Safekeeper *sk);
 static void ResetConnection(Safekeeper *sk);
-static long TimeToReconnect(TimestampTz now);
-static void ReconnectSafekeepers(void);
+static long TimeToReconnect(WalProposer *wp, TimestampTz now);
+static void ReconnectSafekeepers(WalProposer *wp);
 static void AdvancePollState(Safekeeper *sk, uint32 events);
 static void HandleConnectionEvent(Safekeeper *sk);
 static void SendStartWALPush(Safekeeper *sk);
@@ -138,403 +56,44 @@ static void SendProposerGreeting(Safekeeper *sk);
 static void RecvAcceptorGreeting(Safekeeper *sk);
 static void SendVoteRequest(Safekeeper *sk);
 static void RecvVoteResponse(Safekeeper *sk);
-static void HandleElectedProposer(void);
-static term_t GetHighestTerm(TermHistory * th);
+static void HandleElectedProposer(WalProposer *wp);
+static term_t GetHighestTerm(TermHistory *th);
 static term_t GetEpoch(Safekeeper *sk);
-static void DetermineEpochStartLsn(void);
-static bool WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos);
+static void DetermineEpochStartLsn(WalProposer *wp);
 static void SendProposerElected(Safekeeper *sk);
-static void WalProposerStartStreaming(XLogRecPtr startpos);
 static void StartStreaming(Safekeeper *sk);
 static void SendMessageToNode(Safekeeper *sk);
-static void BroadcastAppendRequest(void);
+static void BroadcastAppendRequest(WalProposer *wp);
 static void HandleActiveState(Safekeeper *sk, uint32 events);
 static bool SendAppendRequests(Safekeeper *sk);
 static bool RecvAppendResponses(Safekeeper *sk);
-static void CombineHotStanbyFeedbacks(HotStandbyFeedback * hs);
-static XLogRecPtr CalculateMinFlushLsn(void);
-static XLogRecPtr GetAcknowledgedByQuorumWALPosition(void);
-static void HandleSafekeeperResponse(void);
+static XLogRecPtr CalculateMinFlushLsn(WalProposer *wp);
+static XLogRecPtr GetAcknowledgedByQuorumWALPosition(WalProposer *wp);
+static void HandleSafekeeperResponse(WalProposer *wp);
 static bool AsyncRead(Safekeeper *sk, char **buf, int *buf_size);
-static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage * anymsg);
+static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg);
 static bool BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state);
 static bool AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state);
 static bool AsyncFlush(Safekeeper *sk);
+static int	CompareLsn(const void *a, const void *b);
+static char *FormatSafekeeperState(SafekeeperState state);
+static void AssertEventsOkForState(uint32 events, Safekeeper *sk);
+static uint32 SafekeeperStateDesiredEvents(SafekeeperState state);
+static char *FormatEvents(uint32 events);
 
-static void nwp_shmem_startup_hook(void);
-static void nwp_register_gucs(void);
-static void nwp_prepare_shmem(void);
-static uint64 backpressure_lag_impl(void);
-static bool backpressure_throttling_impl(void);
-
-static process_interrupts_callback_t PrevProcessInterruptsCallback;
-static shmem_startup_hook_type prev_shmem_startup_hook_type;
-#if PG_VERSION_NUM >= 150000
-static shmem_request_hook_type prev_shmem_request_hook = NULL;
-static void walproposer_shmem_request(void);
-#endif
-
-void
-pg_init_walproposer(void)
-{
-	if (!process_shared_preload_libraries_in_progress)
-		return;
-
-	nwp_register_gucs();
-
-	nwp_prepare_shmem();
-
-	delay_backend_us = &backpressure_lag_impl;
-	PrevProcessInterruptsCallback = ProcessInterruptsCallback;
-	ProcessInterruptsCallback = backpressure_throttling_impl;
-
-	WalProposerRegister();
-}
-
-/*
- * Entry point for `postgres --sync-safekeepers`.
- */
-PGDLLEXPORT void
-WalProposerSync(int argc, char *argv[])
-{
-	struct stat stat_buf;
-
-	syncSafekeepers = true;
-#if PG_VERSION_NUM < 150000
-	ThisTimeLineID = 1;
-#endif
-
-	/*
-	 * Initialize postmaster_alive_fds as WaitEventSet checks them.
-	 *
-	 * Copied from InitPostmasterDeathWatchHandle()
-	 */
-	if (pipe(postmaster_alive_fds) < 0)
-		ereport(FATAL,
-				(errcode_for_file_access(),
-					errmsg_internal("could not create pipe to monitor postmaster death: %m")));
-	if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
-		ereport(FATAL,
-				(errcode_for_socket_access(),
-					errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
-
-	ChangeToDataDir();
-
-	/* Create pg_wal directory, if it doesn't exist */
-	if (stat(XLOGDIR, &stat_buf) != 0)
-	{
-		ereport(LOG, (errmsg("creating missing WAL directory \"%s\"", XLOGDIR)));
-		if (MakePGDirectory(XLOGDIR) < 0)
-		{
-			ereport(ERROR,
-					(errcode_for_file_access(),
-						errmsg("could not create directory \"%s\": %m",
-							   XLOGDIR)));
-			exit(1);
-		}
-	}
-
-	WalProposerInit(0, 0);
-
-	BackgroundWorkerUnblockSignals();
-
-	WalProposerStart();
-}
-
-static void
-nwp_register_gucs(void)
-{
-	DefineCustomStringVariable(
-							   "neon.safekeepers",
-							   "List of Neon WAL acceptors (host:port)",
-							   NULL,	/* long_desc */
-							   &wal_acceptors_list, /* valueAddr */
-							   "",	/* bootValue */
-							   PGC_POSTMASTER,
-							   GUC_LIST_INPUT,	/* extensions can't use*
-												 * GUC_LIST_QUOTE */
-							   NULL, NULL, NULL);
-
-	DefineCustomIntVariable(
-							"neon.safekeeper_reconnect_timeout",
-							"Walproposer reconnects to offline safekeepers once in this interval.",
-							NULL,
-							&wal_acceptor_reconnect_timeout,
-							1000, 0, INT_MAX,	/* default, min, max */
-							PGC_SIGHUP, /* context */
-							GUC_UNIT_MS,	/* flags */
-							NULL, NULL, NULL);
-
-	DefineCustomIntVariable(
-							"neon.safekeeper_connect_timeout",
-							"Connection or connection attempt to safekeeper is terminated if no message is received (or connection attempt doesn't finish) within this period.",
-							NULL,
-							&wal_acceptor_connection_timeout,
-							10000, 0, INT_MAX,
-							PGC_SIGHUP,
-							GUC_UNIT_MS,
-							NULL, NULL, NULL);
-}
-
-/* shmem handling */
-
-static void
-nwp_prepare_shmem(void)
-{
-#if PG_VERSION_NUM >= 150000
-	prev_shmem_request_hook = shmem_request_hook;
-	shmem_request_hook = walproposer_shmem_request;
-#else
-	RequestAddinShmemSpace(WalproposerShmemSize());
-#endif
-	prev_shmem_startup_hook_type = shmem_startup_hook;
-	shmem_startup_hook = nwp_shmem_startup_hook;
-}
-
-#if PG_VERSION_NUM >= 150000
-/*
- * shmem_request hook: request additional shared resources.  We'll allocate or
- * attach to the shared resources in nwp_shmem_startup_hook().
- */
-static void
-walproposer_shmem_request(void)
-{
-	if (prev_shmem_request_hook)
-		prev_shmem_request_hook();
-
-	RequestAddinShmemSpace(WalproposerShmemSize());
-}
-#endif
-
-static void
-nwp_shmem_startup_hook(void)
-{
-	if (prev_shmem_startup_hook_type)
-		prev_shmem_startup_hook_type();
-
-	WalproposerShmemInit();
-}
-
-/*
- * WAL proposer bgworker entry point.
- */
-PGDLLEXPORT void
-WalProposerMain(Datum main_arg)
-{
-#if PG_VERSION_NUM >= 150000
-	TimeLineID	tli;
-#endif
-
-	/* Establish signal handlers. */
-	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
-	pqsignal(SIGHUP, SignalHandlerForConfigReload);
-	pqsignal(SIGTERM, die);
-
-	BackgroundWorkerUnblockSignals();
-
-#if PG_VERSION_NUM >= 150000
-	/* FIXME pass proper tli to WalProposerInit ? */
-	GetXLogReplayRecPtr(&tli);
-	WalProposerInit(GetFlushRecPtr(NULL), GetSystemIdentifier());
-#else
-	GetXLogReplayRecPtr(&ThisTimeLineID);
-	WalProposerInit(GetFlushRecPtr(), GetSystemIdentifier());
-#endif
-
-	last_reconnect_attempt = GetCurrentTimestamp();
-
-	application_name = (char *) "walproposer";	/* for
-												 * synchronous_standby_names */
-	am_wal_proposer = true;
-	am_walsender = true;
-	InitWalSender();
-	InitProcessPhase2();
-
-	/* Create replication slot for WAL proposer if not exists */
-	if (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL)
-	{
-		ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false);
-		ReplicationSlotReserveWal();
-		/* Write this slot to disk */
-		ReplicationSlotMarkDirty();
-		ReplicationSlotSave();
-		ReplicationSlotRelease();
-	}
-
-	WalProposerStart();
-}
-
-/*
- * Create new AppendRequest message and start sending it. This function is
- * called from walsender every time the new WAL is available.
- */
-void
-WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos)
-{
-	Assert(startpos == availableLsn && endpos >= availableLsn);
-	availableLsn = endpos;
-	BroadcastAppendRequest();
-}
-
-/*
- * Advance the WAL proposer state machine, waiting each time for events to occur.
- * Will exit only when latch is set, i.e. new WAL should be pushed from walsender
- * to walproposer.
- */
-void
-WalProposerPoll(void)
-{
-	while (true)
-	{
-		Safekeeper *sk = NULL;
-		bool		wait_timeout = false;
-		bool		late_cv_trigger = false;
-		WaitEvent	event = {0};
-		int			rc = 0;
-		TimestampTz now = GetCurrentTimestamp();
-		long		timeout = TimeToReconnect(now);
-
-#if PG_MAJORVERSION_NUM >= 16
-		if (WalSndCtl != NULL)
-			ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
-#endif
-
-		/*
-		 * Wait for a wait event to happen, or timeout:
-		 *  - Safekeeper socket can become available for READ or WRITE
-		 *  - Our latch got set, because
-		 *     * PG15-: We got woken up by a process triggering the WalSender
-		 *     * PG16+: WalSndCtl->wal_flush_cv was triggered
-		 */
-		rc = WaitEventSetWait(waitEvents, timeout,
-							  &event, 1, WAIT_EVENT_WAL_SENDER_MAIN);
-#if PG_MAJORVERSION_NUM >= 16
-		if (WalSndCtl != NULL)
-			late_cv_trigger = ConditionVariableCancelSleep();
-#endif
-
-		/*
-		 * If wait is terminated by latch set (walsenders' latch is set on
-		 * each wal flush), then exit loop. (no need for pm death check due to
-		 * WL_EXIT_ON_PM_DEATH)
-		 */
-		if ((rc == 1 && event.events & WL_LATCH_SET) || late_cv_trigger)
-		{
-			/* Reset our latch */
-			ResetLatch(MyLatch);
-
-			break;
-		}
-		
-		/*
-		 * If the event contains something that one of our safekeeper states
-		 * was waiting for, we'll advance its state.
-		 */
-		if (rc == 1 && (event.events & (WL_SOCKET_MASK)))
-		{
-			sk = (Safekeeper *) event.user_data;
-			AdvancePollState(sk, event.events);
-		}
-
-		/*
-		 * If the timeout expired, attempt to reconnect to any safekeepers
-		 * that we dropped
-		 */
-		ReconnectSafekeepers();
-
-		if (rc == 0) /* timeout expired */
-		{
-			wait_timeout = true;
-
-			/*
-			 * Ensure flushrecptr is set to a recent value. This fixes a case
-			 * where we've not been notified of new WAL records when we were
-			 * planning on consuming them.
-			 */
-			if (!syncSafekeepers) {
-				XLogRecPtr flushed;
-
-#if PG_MAJORVERSION_NUM < 15
-				flushed = GetFlushRecPtr();
-#else
-				flushed = GetFlushRecPtr(NULL);
-#endif
-				if (flushed > availableLsn)
-					break;
-			}
-		}
-
-		now = GetCurrentTimestamp();
-		if (rc == 0 || TimeToReconnect(now) <= 0)			/* timeout expired: poll state */
-		{
-			TimestampTz now;
-
-			/*
-			 * If no WAL was generated during timeout (and we have already
-			 * collected the quorum), then send pool message
-			 */
-			if (availableLsn != InvalidXLogRecPtr)
-			{
-				BroadcastAppendRequest();
-			}
-
-			/*
-			 * Abandon connection attempts which take too long.
-			 */
-			now = GetCurrentTimestamp();
-			for (int i = 0; i < n_safekeepers; i++)
-			{
-				Safekeeper *sk = &safekeeper[i];
-
-				if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
-											   wal_acceptor_connection_timeout))
-				{
-					elog(WARNING, "terminating connection to safekeeper '%s:%s' in '%s' state: no messages received during the last %dms or connection attempt took longer than that",
-						 sk->host, sk->port, FormatSafekeeperState(sk->state), wal_acceptor_connection_timeout);
-					ShutdownConnection(sk);
-				}
-			}
-		}
-	}
-}
-
-/*
- * Register a background worker proposing WAL to wal acceptors.
- */
-static void
-WalProposerRegister(void)
-{
-	BackgroundWorker bgw;
-
-	if (*wal_acceptors_list == '\0')
-		return;
-
-	memset(&bgw, 0, sizeof(bgw));
-	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
-	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
-	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon");
-	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "WalProposerMain");
-	snprintf(bgw.bgw_name, BGW_MAXLEN, "WAL proposer");
-	snprintf(bgw.bgw_type, BGW_MAXLEN, "WAL proposer");
-	bgw.bgw_restart_time = 5;
-	bgw.bgw_notify_pid = 0;
-	bgw.bgw_main_arg = (Datum) 0;
-
-	RegisterBackgroundWorker(&bgw);
-}
-
-static void
-WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId)
+WalProposer *
+WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 {
 	char	   *host;
 	char	   *sep;
 	char	   *port;
+	WalProposer *wp;
 
-	load_file("libpqwalreceiver", false);
-	if (WalReceiverFunctions == NULL)
-		elog(ERROR, "libpqwalreceiver didn't initialize correctly");
+	wp = palloc0(sizeof(WalProposer));
+	wp->config = config;
+	wp->api = api;
 
-	for (host = wal_acceptors_list; host != NULL && *host != '\0'; host = sep)
+	for (host = wp->config->safekeepers_list; host != NULL && *host != '\0'; host = sep)
 	{
 		port = strchr(host, ':');
 		if (port == NULL)
@@ -545,118 +104,186 @@ WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId)
 		sep = strchr(port, ',');
 		if (sep != NULL)
 			*sep++ = '\0';
-		if (n_safekeepers + 1 >= MAX_SAFEKEEPERS)
+		if (wp->n_safekeepers + 1 >= MAX_SAFEKEEPERS)
 		{
 			elog(FATAL, "Too many safekeepers");
 		}
-		safekeeper[n_safekeepers].host = host;
-		safekeeper[n_safekeepers].port = port;
-		safekeeper[n_safekeepers].state = SS_OFFLINE;
-		safekeeper[n_safekeepers].conn = NULL;
+		wp->safekeeper[wp->n_safekeepers].host = host;
+		wp->safekeeper[wp->n_safekeepers].port = port;
+		wp->safekeeper[wp->n_safekeepers].state = SS_OFFLINE;
+		wp->safekeeper[wp->n_safekeepers].conn = NULL;
+		wp->safekeeper[wp->n_safekeepers].wp = wp;
 
 		{
-			Safekeeper *sk = &safekeeper[n_safekeepers];
-			int written = 0;
+			Safekeeper *sk = &wp->safekeeper[wp->n_safekeepers];
+			int			written = 0;
 
 			written = snprintf((char *) &sk->conninfo, MAXCONNINFO,
 							   "host=%s port=%s dbname=replication options='-c timeline_id=%s tenant_id=%s'",
-							   sk->host, sk->port, neon_timeline, neon_tenant);
+							   sk->host, sk->port, wp->config->neon_timeline, wp->config->neon_tenant);
 			if (written > MAXCONNINFO || written < 0)
 				elog(FATAL, "could not create connection string for safekeeper %s:%s", sk->host, sk->port);
 		}
 
-		initStringInfo(&safekeeper[n_safekeepers].outbuf);
-		safekeeper[n_safekeepers].xlogreader = XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(.segment_open = wal_segment_open,.segment_close = wal_segment_close), NULL);
-		if (safekeeper[n_safekeepers].xlogreader == NULL)
+		initStringInfo(&wp->safekeeper[wp->n_safekeepers].outbuf);
+		wp->safekeeper[wp->n_safekeepers].xlogreader = wp->api.wal_reader_allocate();
+		if (wp->safekeeper[wp->n_safekeepers].xlogreader == NULL)
 			elog(FATAL, "Failed to allocate xlog reader");
-		safekeeper[n_safekeepers].flushWrite = false;
-		safekeeper[n_safekeepers].startStreamingAt = InvalidXLogRecPtr;
-		safekeeper[n_safekeepers].streamingAt = InvalidXLogRecPtr;
-		n_safekeepers += 1;
+		wp->safekeeper[wp->n_safekeepers].flushWrite = false;
+		wp->safekeeper[wp->n_safekeepers].startStreamingAt = InvalidXLogRecPtr;
+		wp->safekeeper[wp->n_safekeepers].streamingAt = InvalidXLogRecPtr;
+		wp->n_safekeepers += 1;
 	}
-	if (n_safekeepers < 1)
+	if (wp->n_safekeepers < 1)
 	{
 		elog(FATAL, "Safekeepers addresses are not specified");
 	}
-	quorum = n_safekeepers / 2 + 1;
+	wp->quorum = wp->n_safekeepers / 2 + 1;
 
 	/* Fill the greeting package */
-	greetRequest.tag = 'g';
-	greetRequest.protocolVersion = SK_PROTOCOL_VERSION;
-	greetRequest.pgVersion = PG_VERSION_NUM;
-	pg_strong_random(&greetRequest.proposerId, sizeof(greetRequest.proposerId));
-	greetRequest.systemId = systemId;
-	if (!neon_timeline)
+	wp->greetRequest.tag = 'g';
+	wp->greetRequest.protocolVersion = SK_PROTOCOL_VERSION;
+	wp->greetRequest.pgVersion = PG_VERSION_NUM;
+	wp->api.strong_random(&wp->greetRequest.proposerId, sizeof(wp->greetRequest.proposerId));
+	wp->greetRequest.systemId = wp->config->systemId;
+	if (!wp->config->neon_timeline)
 		elog(FATAL, "neon.timeline_id is not provided");
-	if (*neon_timeline != '\0' &&
-		!HexDecodeString(greetRequest.timeline_id, neon_timeline, 16))
-		elog(FATAL, "Could not parse neon.timeline_id, %s", neon_timeline);
-	if (!neon_tenant)
+	if (*wp->config->neon_timeline != '\0' &&
+		!HexDecodeString(wp->greetRequest.timeline_id, wp->config->neon_timeline, 16))
+		elog(FATAL, "Could not parse neon.timeline_id, %s", wp->config->neon_timeline);
+	if (!wp->config->neon_tenant)
 		elog(FATAL, "neon.tenant_id is not provided");
-	if (*neon_tenant != '\0' &&
-		!HexDecodeString(greetRequest.tenant_id, neon_tenant, 16))
-		elog(FATAL, "Could not parse neon.tenant_id, %s", neon_tenant);
+	if (*wp->config->neon_tenant != '\0' &&
+		!HexDecodeString(wp->greetRequest.tenant_id, wp->config->neon_tenant, 16))
+		elog(FATAL, "Could not parse neon.tenant_id, %s", wp->config->neon_tenant);
 
-#if PG_VERSION_NUM >= 150000
-	/* FIXME don't use hardcoded timeline id */
-	greetRequest.timeline = 1;
-#else
-	greetRequest.timeline = ThisTimeLineID;
-#endif
-	greetRequest.walSegSize = wal_segment_size;
+	wp->greetRequest.timeline = wp->api.get_timeline_id();
+	wp->greetRequest.walSegSize = wp->config->wal_segment_size;
 
-	InitEventSet();
-}
+	wp->api.init_event_set(wp->n_safekeepers);
 
-static void
-WalProposerStart(void)
-{
-
-	/* Initiate connections to all safekeeper nodes */
-	for (int i = 0; i < n_safekeepers; i++)
-	{
-		ResetConnection(&safekeeper[i]);
-	}
-
-	WalProposerLoop();
-}
-
-static void
-WalProposerLoop(void)
-{
-	while (true)
-		WalProposerPoll();
-}
-
-/* Initializes the internal event set, provided that it is currently null */
-static void
-InitEventSet(void)
-{
-	if (waitEvents)
-		elog(FATAL, "double-initialization of event set");
-
-	waitEvents = CreateWaitEventSet(TopMemoryContext, 2 + n_safekeepers);
-	AddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET,
-					  MyLatch, NULL);
-	AddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
-					  NULL, NULL);
+	return wp;
 }
 
 /*
- * Updates the events we're already waiting on for the safekeeper, setting it to
- * the provided `events`
- *
- * This function is called any time the safekeeper's state switches to one where
- * it has to wait to continue. This includes the full body of AdvancePollState
- * and calls to IO helper functions.
+ * Create new AppendRequest message and start sending it. This function is
+ * called from walsender every time the new WAL is available.
  */
-static void
-UpdateEventSet(Safekeeper *sk, uint32 events)
+void
+WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPtr endpos)
 {
-	/* eventPos = -1 when we don't have an event */
-	Assert(sk->eventPos != -1);
+	Assert(startpos == wp->availableLsn && endpos >= wp->availableLsn);
+	wp->availableLsn = endpos;
+	BroadcastAppendRequest(wp);
+}
 
-	ModifyWaitEvent(waitEvents, sk->eventPos, events, NULL);
+/*
+ * Advance the WAL proposer state machine, waiting each time for events to occur.
+ * Will exit only when latch is set, i.e. new WAL should be pushed from walsender
+ * to walproposer.
+ */
+void
+WalProposerPoll(WalProposer *wp)
+{
+	while (true)
+	{
+		Safekeeper *sk = NULL;
+		int			rc = 0;
+		uint32		events = 0;
+		TimestampTz now = wp->api.get_current_timestamp();
+		long		timeout = TimeToReconnect(wp, now);
+
+		rc = wp->api.wait_event_set(timeout, &sk, &events);
+
+		/* Exit loop if latch is set (we got new WAL) */
+		if ((rc == 1 && events & WL_LATCH_SET))
+			break;
+
+		/*
+		 * If the event contains something that one of our safekeeper states
+		 * was waiting for, we'll advance its state.
+		 */
+		if (rc == 1 && (events & WL_SOCKET_MASK))
+		{
+			Assert(sk != NULL);
+			AdvancePollState(sk, events);
+		}
+
+		/*
+		 * If the timeout expired, attempt to reconnect to any safekeepers
+		 * that we dropped
+		 */
+		ReconnectSafekeepers(wp);
+
+		if (rc == 0)			/* timeout expired */
+		{
+			/*
+			 * Ensure flushrecptr is set to a recent value. This fixes a case
+			 * where we've not been notified of new WAL records when we were
+			 * planning on consuming them.
+			 */
+			if (!wp->config->syncSafekeepers)
+			{
+				XLogRecPtr	flushed = wp->api.get_flush_rec_ptr();
+
+				if (flushed > wp->availableLsn)
+					break;
+			}
+		}
+
+		now = wp->api.get_current_timestamp();
+		/* timeout expired: poll state */
+		if (rc == 0 || TimeToReconnect(wp, now) <= 0)
+		{
+			TimestampTz now;
+
+			/*
+			 * If no WAL was generated during timeout (and we have already
+			 * collected the quorum), then send empty keepalive message
+			 */
+			if (wp->availableLsn != InvalidXLogRecPtr)
+			{
+				BroadcastAppendRequest(wp);
+			}
+
+			/*
+			 * Abandon connection attempts which take too long.
+			 */
+			now = wp->api.get_current_timestamp();
+			for (int i = 0; i < wp->n_safekeepers; i++)
+			{
+				Safekeeper *sk = &wp->safekeeper[i];
+
+				if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
+											   wp->config->safekeeper_connection_timeout))
+				{
+					elog(WARNING, "terminating connection to safekeeper '%s:%s' in '%s' state: no messages received during the last %dms or connection attempt took longer than that",
+						 sk->host, sk->port, FormatSafekeeperState(sk->state), wp->config->safekeeper_connection_timeout);
+					ShutdownConnection(sk);
+				}
+			}
+		}
+	}
+}
+
+void
+WalProposerStart(WalProposer *wp)
+{
+
+	/* Initiate connections to all safekeeper nodes */
+	for (int i = 0; i < wp->n_safekeepers; i++)
+	{
+		ResetConnection(&wp->safekeeper[i]);
+	}
+
+	WalProposerLoop(wp);
+}
+
+static void
+WalProposerLoop(WalProposer *wp)
+{
+	while (true)
+		WalProposerPoll(wp);
 }
 
 /*
@@ -667,24 +294,22 @@ UpdateEventSet(Safekeeper *sk, uint32 events)
 static void
 HackyRemoveWalProposerEvent(Safekeeper *to_remove)
 {
+	WalProposer *wp = to_remove->wp;
+
 	/* Remove the existing event set */
-	if (waitEvents)
-	{
-		FreeWaitEventSet(waitEvents);
-		waitEvents = NULL;
-	}
+	wp->api.free_event_set();
 	/* Re-initialize it without adding any safekeeper events */
-	InitEventSet();
+	wp->api.init_event_set(wp->n_safekeepers);
 
 	/*
 	 * loop through the existing safekeepers. If they aren't the one we're
 	 * removing, and if they have a socket we can use, re-add the applicable
 	 * events.
 	 */
-	for (int i = 0; i < n_safekeepers; i++)
+	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
 		uint32		desired_events = WL_NO_EVENTS;
-		Safekeeper *sk = &safekeeper[i];
+		Safekeeper *sk = &wp->safekeeper[i];
 
 		sk->eventPos = -1;
 
@@ -695,7 +320,8 @@ HackyRemoveWalProposerEvent(Safekeeper *to_remove)
 		if (sk->conn != NULL)
 		{
 			desired_events = SafekeeperStateDesiredEvents(sk->state);
-			sk->eventPos = AddWaitEventToSet(waitEvents, desired_events, walprop_socket(sk->conn), NULL, sk);
+			/* will set sk->eventPos */
+			wp->api.add_safekeeper_event_set(sk, desired_events);
 		}
 	}
 }
@@ -705,7 +331,7 @@ static void
 ShutdownConnection(Safekeeper *sk)
 {
 	if (sk->conn)
-		walprop_finish(sk->conn);
+		sk->wp->api.conn_finish(sk->conn);
 	sk->conn = NULL;
 	sk->state = SS_OFFLINE;
 	sk->flushWrite = false;
@@ -727,7 +353,7 @@ ShutdownConnection(Safekeeper *sk)
 static void
 ResetConnection(Safekeeper *sk)
 {
-	pgsocket	sock;			/* socket of the new connection */
+	WalProposer *wp = sk->wp;
 
 	if (sk->state != SS_OFFLINE)
 	{
@@ -737,7 +363,7 @@ ResetConnection(Safekeeper *sk)
 	/*
 	 * Try to establish new connection
 	 */
-	sk->conn = walprop_connect_start((char *) &sk->conninfo, neon_auth_token);
+	sk->conn = wp->api.conn_connect_start((char *) &sk->conninfo);
 
 	/*
 	 * "If the result is null, then libpq has been unable to allocate a new
@@ -751,7 +377,7 @@ ResetConnection(Safekeeper *sk)
 	 * PQconnectPoll. Before we do that though, we need to check that it
 	 * didn't immediately fail.
 	 */
-	if (walprop_status(sk->conn) == WP_CONNECTION_BAD)
+	if (wp->api.conn_status(sk->conn) == WP_CONNECTION_BAD)
 	{
 		/*---
 		 * According to libpq docs:
@@ -763,13 +389,13 @@ ResetConnection(Safekeeper *sk)
 		 * https://www.postgresql.org/docs/devel/libpq-connect.html#LIBPQ-PQCONNECTSTARTPARAMS
 		 */
 		elog(WARNING, "Immediate failure to connect with node '%s:%s':\n\terror: %s",
-			 sk->host, sk->port, walprop_error_message(sk->conn));
+			 sk->host, sk->port, wp->api.conn_error_message(sk->conn));
 
 		/*
 		 * Even though the connection failed, we still need to clean up the
 		 * object
 		 */
-		walprop_finish(sk->conn);
+		wp->api.conn_finish(sk->conn);
 		sk->conn = NULL;
 		return;
 	}
@@ -790,10 +416,9 @@ ResetConnection(Safekeeper *sk)
 	elog(LOG, "connecting with node %s:%s", sk->host, sk->port);
 
 	sk->state = SS_CONNECTING_WRITE;
-	sk->latestMsgReceivedAt = GetCurrentTimestamp();
+	sk->latestMsgReceivedAt = wp->api.get_current_timestamp();
 
-	sock = walprop_socket(sk->conn);
-	sk->eventPos = AddWaitEventToSet(waitEvents, WL_SOCKET_WRITEABLE, sock, NULL, sk);
+	wp->api.add_safekeeper_event_set(sk, WL_SOCKET_WRITEABLE);
 	return;
 }
 
@@ -803,16 +428,16 @@ ResetConnection(Safekeeper *sk)
  * (do we actually need this?).
  */
 static long
-TimeToReconnect(TimestampTz now)
+TimeToReconnect(WalProposer *wp, TimestampTz now)
 {
 	TimestampTz passed;
 	TimestampTz till_reconnect;
 
-	if (wal_acceptor_reconnect_timeout <= 0)
+	if (wp->config->safekeeper_reconnect_timeout <= 0)
 		return -1;
 
-	passed = now - last_reconnect_attempt;
-	till_reconnect = wal_acceptor_reconnect_timeout * 1000 - passed;
+	passed = now - wp->last_reconnect_attempt;
+	till_reconnect = wp->config->safekeeper_reconnect_timeout * 1000 - passed;
 	if (till_reconnect <= 0)
 		return 0;
 	return (long) (till_reconnect / 1000);
@@ -820,17 +445,17 @@ TimeToReconnect(TimestampTz now)
 
 /* If the timeout has expired, attempt to reconnect to all offline safekeepers */
 static void
-ReconnectSafekeepers(void)
+ReconnectSafekeepers(WalProposer *wp)
 {
-	TimestampTz now = GetCurrentTimestamp();
+	TimestampTz now = wp->api.get_current_timestamp();
 
-	if (TimeToReconnect(now) == 0)
+	if (TimeToReconnect(wp, now) == 0)
 	{
-		last_reconnect_attempt = now;
-		for (int i = 0; i < n_safekeepers; i++)
+		wp->last_reconnect_attempt = now;
+		for (int i = 0; i < wp->n_safekeepers; i++)
 		{
-			if (safekeeper[i].state == SS_OFFLINE)
-				ResetConnection(&safekeeper[i]);
+			if (wp->safekeeper[i].state == SS_OFFLINE)
+				ResetConnection(&wp->safekeeper[i]);
 		}
 	}
 }
@@ -938,7 +563,8 @@ AdvancePollState(Safekeeper *sk, uint32 events)
 static void
 HandleConnectionEvent(Safekeeper *sk)
 {
-	WalProposerConnectPollStatusType result = walprop_connect_poll(sk->conn);
+	WalProposer *wp = sk->wp;
+	WalProposerConnectPollStatusType result = wp->api.conn_connect_poll(sk->conn);
 
 	/* The new set of events we'll wait on, after updating */
 	uint32		new_events = WL_NO_EVENTS;
@@ -948,7 +574,8 @@ HandleConnectionEvent(Safekeeper *sk)
 		case WP_CONN_POLLING_OK:
 			elog(LOG, "connected with node %s:%s", sk->host,
 				 sk->port);
-			sk->latestMsgReceivedAt = GetCurrentTimestamp();
+			sk->latestMsgReceivedAt = wp->api.get_current_timestamp();
+
 			/*
 			 * We have to pick some event to update event set. We'll
 			 * eventually need the socket to be readable, so we go with that.
@@ -970,7 +597,7 @@ HandleConnectionEvent(Safekeeper *sk)
 
 		case WP_CONN_POLLING_FAILED:
 			elog(WARNING, "failed to connect to node '%s:%s': %s",
-				 sk->host, sk->port, walprop_error_message(sk->conn));
+				 sk->host, sk->port, wp->api.conn_error_message(sk->conn));
 
 			/*
 			 * If connecting failed, we don't want to restart the connection
@@ -987,7 +614,7 @@ HandleConnectionEvent(Safekeeper *sk)
 	 * old event and re-register an event on the new socket.
 	 */
 	HackyRemoveWalProposerEvent(sk);
-	sk->eventPos = AddWaitEventToSet(waitEvents, new_events, walprop_socket(sk->conn), NULL, sk);
+	wp->api.add_safekeeper_event_set(sk, new_events);
 
 	/* If we successfully connected, send START_WAL_PUSH query */
 	if (result == WP_CONN_POLLING_OK)
@@ -1002,21 +629,25 @@ HandleConnectionEvent(Safekeeper *sk)
 static void
 SendStartWALPush(Safekeeper *sk)
 {
-	if (!walprop_send_query(sk->conn, "START_WAL_PUSH"))
+	WalProposer *wp = sk->wp;
+
+	if (!wp->api.conn_send_query(sk->conn, "START_WAL_PUSH"))
 	{
 		elog(WARNING, "Failed to send 'START_WAL_PUSH' query to safekeeper %s:%s: %s",
-			 sk->host, sk->port, walprop_error_message(sk->conn));
+			 sk->host, sk->port, wp->api.conn_error_message(sk->conn));
 		ShutdownConnection(sk);
 		return;
 	}
 	sk->state = SS_WAIT_EXEC_RESULT;
-	UpdateEventSet(sk, WL_SOCKET_READABLE);
+	wp->api.update_event_set(sk, WL_SOCKET_READABLE);
 }
 
 static void
 RecvStartWALPushResult(Safekeeper *sk)
 {
-	switch (walprop_get_query_result(sk->conn))
+	WalProposer *wp = sk->wp;
+
+	switch (wp->api.conn_get_query_result(sk->conn))
 	{
 			/*
 			 * Successful result, move on to starting the handshake
@@ -1040,7 +671,7 @@ RecvStartWALPushResult(Safekeeper *sk)
 
 		case WP_EXEC_FAILED:
 			elog(WARNING, "Failed to send query to safekeeper %s:%s: %s",
-				 sk->host, sk->port, walprop_error_message(sk->conn));
+				 sk->host, sk->port, wp->api.conn_error_message(sk->conn));
 			ShutdownConnection(sk);
 			return;
 
@@ -1069,19 +700,21 @@ SendProposerGreeting(Safekeeper *sk)
 	 * On failure, logging & resetting the connection is handled. We just need
 	 * to handle the control flow.
 	 */
-	BlockingWrite(sk, &greetRequest, sizeof(greetRequest), SS_HANDSHAKE_RECV);
+	BlockingWrite(sk, &sk->wp->greetRequest, sizeof(sk->wp->greetRequest), SS_HANDSHAKE_RECV);
 }
 
 static void
 RecvAcceptorGreeting(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
+
 	/*
 	 * If our reading doesn't immediately succeed, any necessary error
 	 * handling or state setting is taken care of. We can leave any other work
 	 * until later.
 	 */
 	sk->greetResponse.apm.tag = 'g';
-	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) & sk->greetResponse))
+	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->greetResponse))
 		return;
 
 	elog(LOG, "received AcceptorGreeting from safekeeper %s:%s", sk->host, sk->port);
@@ -1089,37 +722,37 @@ RecvAcceptorGreeting(Safekeeper *sk)
 	/* Protocol is all good, move to voting. */
 	sk->state = SS_VOTING;
 
-	/* 
+	/*
 	 * Note: it would be better to track the counter on per safekeeper basis,
-	 * but at worst walproposer would restart with 'term rejected', so leave as
-	 * is for now.
+	 * but at worst walproposer would restart with 'term rejected', so leave
+	 * as is for now.
 	 */
-	++n_connected;
-	if (n_connected <= quorum)
+	++wp->n_connected;
+	if (wp->n_connected <= wp->quorum)
 	{
 		/* We're still collecting terms from the majority. */
-		propTerm = Max(sk->greetResponse.term, propTerm);
+		wp->propTerm = Max(sk->greetResponse.term, wp->propTerm);
 
 		/* Quorum is acquried, prepare the vote request. */
-		if (n_connected == quorum)
+		if (wp->n_connected == wp->quorum)
 		{
-			propTerm++;
-			elog(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, quorum, propTerm);
+			wp->propTerm++;
+			elog(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, wp->quorum, wp->propTerm);
 
-			voteRequest = (VoteRequest)
+			wp->voteRequest = (VoteRequest)
 			{
 				.tag = 'v',
-					.term = propTerm
+					.term = wp->propTerm
 			};
-			memcpy(voteRequest.proposerId.data, greetRequest.proposerId.data, UUID_LEN);
+			memcpy(wp->voteRequest.proposerId.data, wp->greetRequest.proposerId.data, UUID_LEN);
 		}
 	}
-	else if (sk->greetResponse.term > propTerm)
+	else if (sk->greetResponse.term > wp->propTerm)
 	{
 		/* Another compute with higher term is running. */
 		elog(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejects our connection request with term " INT64_FORMAT "",
 			 sk->host, sk->port,
-			 sk->greetResponse.term, propTerm);
+			 sk->greetResponse.term, wp->propTerm);
 	}
 
 	/*
@@ -1128,27 +761,27 @@ RecvAcceptorGreeting(Safekeeper *sk)
 	 *
 	 * If we do have quorum, we can start an election.
 	 */
-	if (n_connected < quorum)
+	if (wp->n_connected < wp->quorum)
 	{
 		/*
 		 * SS_VOTING is an idle state; read-ready indicates the connection
 		 * closed.
 		 */
-		UpdateEventSet(sk, WL_SOCKET_READABLE);
+		wp->api.update_event_set(sk, WL_SOCKET_READABLE);
 	}
 	else
 	{
 		/*
 		 * Now send voting request to the cohort and wait responses
 		 */
-		for (int j = 0; j < n_safekeepers; j++)
+		for (int j = 0; j < wp->n_safekeepers; j++)
 		{
 			/*
 			 * Remember: SS_VOTING indicates that the safekeeper is
 			 * participating in voting, but hasn't sent anything yet.
 			 */
-			if (safekeeper[j].state == SS_VOTING)
-				SendVoteRequest(&safekeeper[j]);
+			if (wp->safekeeper[j].state == SS_VOTING)
+				SendVoteRequest(&wp->safekeeper[j]);
 		}
 	}
 }
@@ -1156,10 +789,12 @@ RecvAcceptorGreeting(Safekeeper *sk)
 static void
 SendVoteRequest(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
+
 	/* We have quorum for voting, send our vote request */
-	elog(LOG, "requesting vote from %s:%s for term " UINT64_FORMAT, sk->host, sk->port, voteRequest.term);
+	elog(LOG, "requesting vote from %s:%s for term " UINT64_FORMAT, sk->host, sk->port, wp->voteRequest.term);
 	/* On failure, logging & resetting is handled */
-	if (!BlockingWrite(sk, &voteRequest, sizeof(voteRequest), SS_WAIT_VERDICT))
+	if (!BlockingWrite(sk, &wp->voteRequest, sizeof(wp->voteRequest), SS_WAIT_VERDICT))
 		return;
 
 	/* If successful, wait for read-ready with SS_WAIT_VERDICT */
@@ -1168,8 +803,10 @@ SendVoteRequest(Safekeeper *sk)
 static void
 RecvVoteResponse(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
+
 	sk->voteResponse.apm.tag = 'v';
-	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) & sk->voteResponse))
+	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->voteResponse))
 		return;
 
 	elog(LOG,
@@ -1185,21 +822,21 @@ RecvVoteResponse(Safekeeper *sk)
 	 * we are not elected yet and thus need the vote.
 	 */
 	if ((!sk->voteResponse.voteGiven) &&
-		(sk->voteResponse.term > propTerm || n_votes < quorum))
+		(sk->voteResponse.term > wp->propTerm || wp->n_votes < wp->quorum))
 	{
 		elog(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejects our connection request with term " INT64_FORMAT "",
 			 sk->host, sk->port,
-			 sk->voteResponse.term, propTerm);
+			 sk->voteResponse.term, wp->propTerm);
 	}
-	Assert(sk->voteResponse.term == propTerm);
+	Assert(sk->voteResponse.term == wp->propTerm);
 
 	/* Handshake completed, do we have quorum? */
-	n_votes++;
-	if (n_votes < quorum)
+	wp->n_votes++;
+	if (wp->n_votes < wp->quorum)
 	{
 		sk->state = SS_IDLE;	/* can't do much yet, no quorum */
 	}
-	else if (n_votes > quorum)
+	else if (wp->n_votes > wp->quorum)
 	{
 		/* recovery already performed, just start streaming */
 		SendProposerElected(sk);
@@ -1207,10 +844,10 @@ RecvVoteResponse(Safekeeper *sk)
 	else
 	{
 		sk->state = SS_IDLE;
-		UpdateEventSet(sk, WL_SOCKET_READABLE); /* Idle states wait for
-												 * read-ready */
+		/* Idle state waits for read-ready events */
+		wp->api.update_event_set(sk, WL_SOCKET_READABLE);
 
-		HandleElectedProposer();
+		HandleElectedProposer(sk->wp);
 	}
 }
 
@@ -1222,36 +859,36 @@ RecvVoteResponse(Safekeeper *sk)
  * replication from walsender.
  */
 static void
-HandleElectedProposer(void)
+HandleElectedProposer(WalProposer *wp)
 {
-	DetermineEpochStartLsn();
+	DetermineEpochStartLsn(wp);
 
 	/*
 	 * Check if not all safekeepers are up-to-date, we need to download WAL
 	 * needed to synchronize them
 	 */
-	if (truncateLsn < propEpochStartLsn)
+	if (wp->truncateLsn < wp->propEpochStartLsn)
 	{
 		elog(LOG,
 			 "start recovery because truncateLsn=%X/%X is not "
 			 "equal to epochStartLsn=%X/%X",
-			 LSN_FORMAT_ARGS(truncateLsn),
-			 LSN_FORMAT_ARGS(propEpochStartLsn));
+			 LSN_FORMAT_ARGS(wp->truncateLsn),
+			 LSN_FORMAT_ARGS(wp->propEpochStartLsn));
 		/* Perform recovery */
-		if (!WalProposerRecovery(donor, greetRequest.timeline, truncateLsn, propEpochStartLsn))
+		if (!wp->api.recovery_download(&wp->safekeeper[wp->donor], wp->greetRequest.timeline, wp->truncateLsn, wp->propEpochStartLsn))
 			elog(FATAL, "Failed to recover state");
 	}
-	else if (syncSafekeepers)
+	else if (wp->config->syncSafekeepers)
 	{
 		/* Sync is not needed: just exit */
-		fprintf(stdout, "%X/%X\n", LSN_FORMAT_ARGS(propEpochStartLsn));
-		exit(0);
+		wp->api.finish_sync_safekeepers(wp->propEpochStartLsn);
+		/* unreachable */
 	}
 
-	for (int i = 0; i < n_safekeepers; i++)
+	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
-		if (safekeeper[i].state == SS_IDLE)
-			SendProposerElected(&safekeeper[i]);
+		if (wp->safekeeper[i].state == SS_IDLE)
+			SendProposerElected(&wp->safekeeper[i]);
 	}
 
 	/*
@@ -1260,7 +897,7 @@ HandleElectedProposer(void)
 	 * because that state is used only for quorum waiting.
 	 */
 
-	if (syncSafekeepers)
+	if (wp->config->syncSafekeepers)
 	{
 		/*
 		 * Send empty message to enforce receiving feedback even from nodes
@@ -1268,19 +905,19 @@ HandleElectedProposer(void)
 		 * epoch which finishes sync-safeekepers who doesn't generate any real
 		 * new records. Will go away once we switch to async acks.
 		 */
-		BroadcastAppendRequest();
+		BroadcastAppendRequest(wp);
 
 		/* keep polling until all safekeepers are synced */
 		return;
 	}
 
-	WalProposerStartStreaming(propEpochStartLsn);
+	wp->api.start_streaming(wp, wp->propEpochStartLsn);
 	/* Should not return here */
 }
 
 /* latest term in TermHistory, or 0 is there is no entries */
 static term_t
-GetHighestTerm(TermHistory * th)
+GetHighestTerm(TermHistory *th)
 {
 	return th->n_entries > 0 ? th->entries[th->n_entries - 1].term : 0;
 }
@@ -1294,9 +931,9 @@ GetEpoch(Safekeeper *sk)
 
 /* If LSN points to the page header, skip it */
 static XLogRecPtr
-SkipXLogPageHeader(XLogRecPtr lsn)
+SkipXLogPageHeader(WalProposer *wp, XLogRecPtr lsn)
 {
-	if (XLogSegmentOffset(lsn, wal_segment_size) == 0)
+	if (XLogSegmentOffset(lsn, wp->config->wal_segment_size) == 0)
 	{
 		lsn += SizeOfXLogLongPHD;
 	}
@@ -1316,41 +953,41 @@ SkipXLogPageHeader(XLogRecPtr lsn)
  * only for skipping recovery).
  */
 static void
-DetermineEpochStartLsn(void)
+DetermineEpochStartLsn(WalProposer *wp)
 {
 	TermHistory *dth;
 
-	propEpochStartLsn = InvalidXLogRecPtr;
-	donorEpoch = 0;
-	truncateLsn = InvalidXLogRecPtr;
-	timelineStartLsn = InvalidXLogRecPtr;
+	wp->propEpochStartLsn = InvalidXLogRecPtr;
+	wp->donorEpoch = 0;
+	wp->truncateLsn = InvalidXLogRecPtr;
+	wp->timelineStartLsn = InvalidXLogRecPtr;
 
-	for (int i = 0; i < n_safekeepers; i++)
+	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
-		if (safekeeper[i].state == SS_IDLE)
+		if (wp->safekeeper[i].state == SS_IDLE)
 		{
-			if (GetEpoch(&safekeeper[i]) > donorEpoch ||
-				(GetEpoch(&safekeeper[i]) == donorEpoch &&
-				 safekeeper[i].voteResponse.flushLsn > propEpochStartLsn))
+			if (GetEpoch(&wp->safekeeper[i]) > wp->donorEpoch ||
+				(GetEpoch(&wp->safekeeper[i]) == wp->donorEpoch &&
+				 wp->safekeeper[i].voteResponse.flushLsn > wp->propEpochStartLsn))
 			{
-				donorEpoch = GetEpoch(&safekeeper[i]);
-				propEpochStartLsn = safekeeper[i].voteResponse.flushLsn;
-				donor = i;
+				wp->donorEpoch = GetEpoch(&wp->safekeeper[i]);
+				wp->propEpochStartLsn = wp->safekeeper[i].voteResponse.flushLsn;
+				wp->donor = i;
 			}
-			truncateLsn = Max(safekeeper[i].voteResponse.truncateLsn, truncateLsn);
+			wp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn);
 
-			if (safekeeper[i].voteResponse.timelineStartLsn != InvalidXLogRecPtr)
+			if (wp->safekeeper[i].voteResponse.timelineStartLsn != InvalidXLogRecPtr)
 			{
 				/* timelineStartLsn should be the same everywhere or unknown */
-				if (timelineStartLsn != InvalidXLogRecPtr &&
-					timelineStartLsn != safekeeper[i].voteResponse.timelineStartLsn)
+				if (wp->timelineStartLsn != InvalidXLogRecPtr &&
+					wp->timelineStartLsn != wp->safekeeper[i].voteResponse.timelineStartLsn)
 				{
 					elog(WARNING,
 						 "inconsistent timelineStartLsn: current %X/%X, received %X/%X",
-						 LSN_FORMAT_ARGS(timelineStartLsn),
-						 LSN_FORMAT_ARGS(safekeeper[i].voteResponse.timelineStartLsn));
+						 LSN_FORMAT_ARGS(wp->timelineStartLsn),
+						 LSN_FORMAT_ARGS(wp->safekeeper[i].voteResponse.timelineStartLsn));
 				}
-				timelineStartLsn = safekeeper[i].voteResponse.timelineStartLsn;
+				wp->timelineStartLsn = wp->safekeeper[i].voteResponse.timelineStartLsn;
 			}
 		}
 	}
@@ -1359,14 +996,14 @@ DetermineEpochStartLsn(void)
 	 * If propEpochStartLsn is 0 everywhere, we are bootstrapping -- nothing
 	 * was committed yet. Start streaming then from the basebackup LSN.
 	 */
-	if (propEpochStartLsn == InvalidXLogRecPtr && !syncSafekeepers)
+	if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers)
 	{
-		propEpochStartLsn = truncateLsn = GetRedoStartLsn();
-		if (timelineStartLsn == InvalidXLogRecPtr)
+		wp->propEpochStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn();
+		if (wp->timelineStartLsn == InvalidXLogRecPtr)
 		{
-			timelineStartLsn = GetRedoStartLsn();
+			wp->timelineStartLsn = wp->api.get_redo_start_lsn();
 		}
-		elog(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(propEpochStartLsn));
+		elog(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn));
 	}
 
 	/*
@@ -1374,46 +1011,48 @@ DetermineEpochStartLsn(void)
 	 * some connected safekeeper; it must have carried truncateLsn pointing to
 	 * the first record.
 	 */
-	Assert((truncateLsn != InvalidXLogRecPtr) ||
-		   (syncSafekeepers && truncateLsn == propEpochStartLsn));
+	Assert((wp->truncateLsn != InvalidXLogRecPtr) ||
+		   (wp->config->syncSafekeepers && wp->truncateLsn == wp->propEpochStartLsn));
 
 	/*
 	 * We will be generating WAL since propEpochStartLsn, so we should set
 	 * availableLsn to mark this LSN as the latest available position.
 	 */
-	availableLsn = propEpochStartLsn;
+	wp->availableLsn = wp->propEpochStartLsn;
 
 	/*
 	 * Proposer's term history is the donor's + its own entry.
 	 */
-	dth = &safekeeper[donor].voteResponse.termHistory;
-	propTermHistory.n_entries = dth->n_entries + 1;
-	propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * propTermHistory.n_entries);
-	memcpy(propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries);
-	propTermHistory.entries[propTermHistory.n_entries - 1].term = propTerm;
-	propTermHistory.entries[propTermHistory.n_entries - 1].lsn = propEpochStartLsn;
+	dth = &wp->safekeeper[wp->donor].voteResponse.termHistory;
+	wp->propTermHistory.n_entries = dth->n_entries + 1;
+	wp->propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * wp->propTermHistory.n_entries);
+	memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries);
+	wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].term = wp->propTerm;
+	wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propEpochStartLsn;
 
 	elog(LOG, "got votes from majority (%d) of nodes, term " UINT64_FORMAT ", epochStartLsn %X/%X, donor %s:%s, truncate_lsn %X/%X",
-		 quorum,
-		 propTerm,
-		 LSN_FORMAT_ARGS(propEpochStartLsn),
-		 safekeeper[donor].host, safekeeper[donor].port,
-		 LSN_FORMAT_ARGS(truncateLsn));
+		 wp->quorum,
+		 wp->propTerm,
+		 LSN_FORMAT_ARGS(wp->propEpochStartLsn),
+		 wp->safekeeper[wp->donor].host, wp->safekeeper[wp->donor].port,
+		 LSN_FORMAT_ARGS(wp->truncateLsn));
 
 	/*
 	 * Ensure the basebackup we are running (at RedoStartLsn) matches LSN
 	 * since which we are going to write according to the consensus. If not,
 	 * we must bail out, as clog and other non rel data is inconsistent.
 	 */
-	if (!syncSafekeepers)
+	if (!wp->config->syncSafekeepers)
 	{
+		WalproposerShmemState *walprop_shared = wp->api.get_shmem_state();
+
 		/*
 		 * Basebackup LSN always points to the beginning of the record (not
 		 * the page), as StartupXLOG most probably wants it this way.
 		 * Safekeepers don't skip header as they need continious stream of
 		 * data, so correct LSN for comparison.
 		 */
-		if (SkipXLogPageHeader(propEpochStartLsn) != GetRedoStartLsn())
+		if (SkipXLogPageHeader(wp, wp->propEpochStartLsn) != wp->api.get_redo_start_lsn())
 		{
 			/*
 			 * However, allow to proceed if previously elected leader was me;
@@ -1425,119 +1064,14 @@ DetermineEpochStartLsn(void)
 			{
 				elog(PANIC,
 					 "collected propEpochStartLsn %X/%X, but basebackup LSN %X/%X",
-					 LSN_FORMAT_ARGS(propEpochStartLsn),
-					 LSN_FORMAT_ARGS(GetRedoStartLsn()));
+					 LSN_FORMAT_ARGS(wp->propEpochStartLsn),
+					 LSN_FORMAT_ARGS(wp->api.get_redo_start_lsn()));
 			}
 		}
-		walprop_shared->mineLastElectedTerm = propTerm;
+		walprop_shared->mineLastElectedTerm = wp->propTerm;
 	}
 }
 
-/*
- * Receive WAL from most advanced safekeeper
- */
-static bool
-WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos)
-{
-	char	   *err;
-	WalReceiverConn *wrconn;
-	WalRcvStreamOptions options;
-	char conninfo[MAXCONNINFO];
-
-	if (!neon_auth_token)
-	{
-		memcpy(conninfo, safekeeper[donor].conninfo, MAXCONNINFO);
-	}
-	else
-	{
-		int written = 0;
-
-		written = snprintf((char *) conninfo, MAXCONNINFO, "password=%s %s", neon_auth_token, safekeeper[donor].conninfo);
-		if (written > MAXCONNINFO || written < 0)
-			elog(FATAL, "could not append password to the safekeeper connection string");
-	}
-
-#if PG_MAJORVERSION_NUM < 16
-	wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err);
-#else
-	wrconn = walrcv_connect(conninfo, false, false, "wal_proposer_recovery", &err);
-#endif
-
-	if (!wrconn)
-	{
-		ereport(WARNING,
-				(errmsg("could not connect to WAL acceptor %s:%s: %s",
-						safekeeper[donor].host, safekeeper[donor].port,
-						err)));
-		return false;
-	}
-	elog(LOG,
-		 "start recovery from %s:%s starting from %X/%08X till %X/%08X timeline "
-		 "%d",
-		 safekeeper[donor].host, safekeeper[donor].port, (uint32) (startpos >> 32),
-		 (uint32) startpos, (uint32) (endpos >> 32), (uint32) endpos, timeline);
-
-	options.logical = false;
-	options.startpoint = startpos;
-	options.slotname = NULL;
-	options.proto.physical.startpointTLI = timeline;
-
-	if (walrcv_startstreaming(wrconn, &options))
-	{
-		XLogRecPtr	rec_start_lsn;
-		XLogRecPtr	rec_end_lsn = 0;
-		int			len;
-		char	   *buf;
-		pgsocket	wait_fd = PGINVALID_SOCKET;
-
-		while ((len = walrcv_receive(wrconn, &buf, &wait_fd)) >= 0)
-		{
-			if (len == 0)
-			{
-				(void) WaitLatchOrSocket(
-										 MyLatch, WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE, wait_fd,
-										 -1, WAIT_EVENT_WAL_RECEIVER_MAIN);
-			}
-			else
-			{
-				Assert(buf[0] == 'w' || buf[0] == 'k');
-				if (buf[0] == 'k')
-					continue;	/* keepalive */
-				memcpy(&rec_start_lsn, &buf[XLOG_HDR_START_POS],
-					   sizeof rec_start_lsn);
-				rec_start_lsn = pg_ntoh64(rec_start_lsn);
-				rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE;
-
-				/* write WAL to disk */
-				XLogWalPropWrite(&buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
-
-				ereport(DEBUG1,
-						(errmsg("Recover message %X/%X length %d",
-								LSN_FORMAT_ARGS(rec_start_lsn), len)));
-				if (rec_end_lsn >= endpos)
-					break;
-			}
-		}
-		ereport(LOG,
-				(errmsg("end of replication stream at %X/%X: %m",
-						LSN_FORMAT_ARGS(rec_end_lsn))));
-		walrcv_disconnect(wrconn);
-
-		/* failed to receive all WAL till endpos */
-		if (rec_end_lsn < endpos)
-			return false;
-	}
-	else
-	{
-		ereport(LOG,
-				(errmsg("primary server contains no more WAL on requested timeline %u LSN %X/%08X",
-						timeline, (uint32) (startpos >> 32), (uint32) startpos)));
-		return false;
-	}
-
-	return true;
-}
-
 /*
  * Determine for sk the starting streaming point and send it message
  * 1) Announcing we are elected proposer (which immediately advances epoch if
@@ -1550,6 +1084,7 @@ WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRec
 static void
 SendProposerElected(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
 	ProposerElected msg;
 	TermHistory *th;
 	term_t		lastCommonTerm;
@@ -1567,22 +1102,22 @@ SendProposerElected(Safekeeper *sk)
 	th = &sk->voteResponse.termHistory;
 
 	/* We must start somewhere. */
-	Assert(propTermHistory.n_entries >= 1);
+	Assert(wp->propTermHistory.n_entries >= 1);
 
-	for (i = 0; i < Min(propTermHistory.n_entries, th->n_entries); i++)
+	for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++)
 	{
-		if (propTermHistory.entries[i].term != th->entries[i].term)
+		if (wp->propTermHistory.entries[i].term != th->entries[i].term)
 			break;
 		/* term must begin everywhere at the same point */
-		Assert(propTermHistory.entries[i].lsn == th->entries[i].lsn);
+		Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn);
 	}
 	i--;						/* step back to the last common term */
 	if (i < 0)
 	{
 		/* safekeeper is empty or no common point, start from the beginning */
-		sk->startStreamingAt = propTermHistory.entries[0].lsn;
+		sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
 
-		if (sk->startStreamingAt < truncateLsn)
+		if (sk->startStreamingAt < wp->truncateLsn)
 		{
 			/*
 			 * There's a gap between the WAL starting point and a truncateLsn,
@@ -1603,10 +1138,10 @@ SendProposerElected(Safekeeper *sk)
 			 * safekeeper, and it's aligned to the WAL record, so we can
 			 * safely start streaming from this point.
 			 */
-			sk->startStreamingAt = truncateLsn;
+			sk->startStreamingAt = wp->truncateLsn;
 
 			elog(WARNING, "empty safekeeper joined cluster as %s:%s, historyStart=%X/%X, sk->startStreamingAt=%X/%X",
-				 sk->host, sk->port, LSN_FORMAT_ARGS(propTermHistory.entries[0].lsn),
+				 sk->host, sk->port, LSN_FORMAT_ARGS(wp->propTermHistory.entries[0].lsn),
 				 LSN_FORMAT_ARGS(sk->startStreamingAt));
 		}
 	}
@@ -1618,28 +1153,28 @@ SendProposerElected(Safekeeper *sk)
 		 * proposer, LSN it is currently writing, but then we just pick
 		 * safekeeper pos as it obviously can't be higher.
 		 */
-		if (propTermHistory.entries[i].term == propTerm)
+		if (wp->propTermHistory.entries[i].term == wp->propTerm)
 		{
 			sk->startStreamingAt = sk->voteResponse.flushLsn;
 		}
 		else
 		{
-			XLogRecPtr	propEndLsn = propTermHistory.entries[i + 1].lsn;
+			XLogRecPtr	propEndLsn = wp->propTermHistory.entries[i + 1].lsn;
 			XLogRecPtr	skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn);
 
 			sk->startStreamingAt = Min(propEndLsn, skEndLsn);
 		}
 	}
 
-	Assert(sk->startStreamingAt >= truncateLsn && sk->startStreamingAt <= availableLsn);
+	Assert(sk->startStreamingAt >= wp->truncateLsn && sk->startStreamingAt <= wp->availableLsn);
 
 	msg.tag = 'e';
-	msg.term = propTerm;
+	msg.term = wp->propTerm;
 	msg.startStreamingAt = sk->startStreamingAt;
-	msg.termHistory = &propTermHistory;
-	msg.timelineStartLsn = timelineStartLsn;
+	msg.termHistory = &wp->propTermHistory;
+	msg.timelineStartLsn = wp->timelineStartLsn;
 
-	lastCommonTerm = i >= 0 ? propTermHistory.entries[i].term : 0;
+	lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0;
 	elog(LOG,
 		 "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
 		 sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
@@ -1662,22 +1197,6 @@ SendProposerElected(Safekeeper *sk)
 	StartStreaming(sk);
 }
 
-/*
- * Start walsender streaming replication
- */
-static void
-WalProposerStartStreaming(XLogRecPtr startpos)
-{
-	StartReplicationCmd cmd;
-
-	elog(LOG, "WAL proposer starts streaming at %X/%X",
-		 LSN_FORMAT_ARGS(startpos));
-	cmd.slotname = WAL_PROPOSER_SLOT_NAME;
-	cmd.timeline = greetRequest.timeline;
-	cmd.startpoint = startpos;
-	StartProposerReplication(&cmd);
-}
-
 /*
  * Start streaming to safekeeper sk, always updates state to SS_ACTIVE and sets
  * correct event set.
@@ -1719,25 +1238,25 @@ SendMessageToNode(Safekeeper *sk)
  * Broadcast new message to all caught-up safekeepers
  */
 static void
-BroadcastAppendRequest()
+BroadcastAppendRequest(WalProposer *wp)
 {
-	for (int i = 0; i < n_safekeepers; i++)
-		if (safekeeper[i].state == SS_ACTIVE)
-			SendMessageToNode(&safekeeper[i]);
+	for (int i = 0; i < wp->n_safekeepers; i++)
+		if (wp->safekeeper[i].state == SS_ACTIVE)
+			SendMessageToNode(&wp->safekeeper[i]);
 }
 
 static void
-PrepareAppendRequest(AppendRequestHeader * req, XLogRecPtr beginLsn, XLogRecPtr endLsn)
+PrepareAppendRequest(WalProposer *wp, AppendRequestHeader *req, XLogRecPtr beginLsn, XLogRecPtr endLsn)
 {
 	Assert(endLsn >= beginLsn);
 	req->tag = 'a';
-	req->term = propTerm;
-	req->epochStartLsn = propEpochStartLsn;
+	req->term = wp->propTerm;
+	req->epochStartLsn = wp->propEpochStartLsn;
 	req->beginLsn = beginLsn;
 	req->endLsn = endLsn;
-	req->commitLsn = GetAcknowledgedByQuorumWALPosition();
-	req->truncateLsn = truncateLsn;
-	req->proposerId = greetRequest.proposerId;
+	req->commitLsn = GetAcknowledgedByQuorumWALPosition(wp);
+	req->truncateLsn = wp->truncateLsn;
+	req->proposerId = wp->greetRequest.proposerId;
 }
 
 /*
@@ -1746,6 +1265,8 @@ PrepareAppendRequest(AppendRequestHeader * req, XLogRecPtr beginLsn, XLogRecPtr
 static void
 HandleActiveState(Safekeeper *sk, uint32 events)
 {
+	WalProposer *wp = sk->wp;
+
 	uint32		newEvents = WL_SOCKET_READABLE;
 
 	if (events & WL_SOCKET_WRITEABLE)
@@ -1765,10 +1286,10 @@ HandleActiveState(Safekeeper *sk, uint32 events)
 	 * after arrival. But it's good to have it here in case we change this
 	 * behavior in the future.
 	 */
-	if (sk->streamingAt != availableLsn || sk->flushWrite)
+	if (sk->streamingAt != wp->availableLsn || sk->flushWrite)
 		newEvents |= WL_SOCKET_WRITEABLE;
 
-	UpdateEventSet(sk, newEvents);
+	wp->api.update_event_set(sk, newEvents);
 }
 
 /*
@@ -1783,10 +1304,10 @@ HandleActiveState(Safekeeper *sk, uint32 events)
 static bool
 SendAppendRequests(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
 	XLogRecPtr	endLsn;
 	AppendRequestHeader *req;
 	PGAsyncWriteResult writeResult;
-	WALReadError errinfo;
 	bool		sentAnything = false;
 
 	if (sk->flushWrite)
@@ -1803,7 +1324,7 @@ SendAppendRequests(Safekeeper *sk)
 		sk->flushWrite = false;
 	}
 
-	while (sk->streamingAt != availableLsn || !sentAnything)
+	while (sk->streamingAt != wp->availableLsn || !sentAnything)
 	{
 		sentAnything = true;
 
@@ -1811,13 +1332,13 @@ SendAppendRequests(Safekeeper *sk)
 		endLsn += MAX_SEND_SIZE;
 
 		/* if we went beyond available WAL, back off */
-		if (endLsn > availableLsn)
+		if (endLsn > wp->availableLsn)
 		{
-			endLsn = availableLsn;
+			endLsn = wp->availableLsn;
 		}
 
 		req = &sk->appendRequest;
-		PrepareAppendRequest(&sk->appendRequest, sk->streamingAt, endLsn);
+		PrepareAppendRequest(sk->wp, &sk->appendRequest, sk->streamingAt, endLsn);
 
 		ereport(DEBUG2,
 				(errmsg("sending message len %ld beginLsn=%X/%X endLsn=%X/%X commitLsn=%X/%X truncateLsn=%X/%X to %s:%s",
@@ -1825,7 +1346,7 @@ SendAppendRequests(Safekeeper *sk)
 						LSN_FORMAT_ARGS(req->beginLsn),
 						LSN_FORMAT_ARGS(req->endLsn),
 						LSN_FORMAT_ARGS(req->commitLsn),
-						LSN_FORMAT_ARGS(truncateLsn), sk->host, sk->port)));
+						LSN_FORMAT_ARGS(wp->truncateLsn), sk->host, sk->port)));
 
 		resetStringInfo(&sk->outbuf);
 
@@ -1834,23 +1355,14 @@ SendAppendRequests(Safekeeper *sk)
 
 		/* write the WAL itself */
 		enlargeStringInfo(&sk->outbuf, req->endLsn - req->beginLsn);
-		if (!WALRead(sk->xlogreader,
-					 &sk->outbuf.data[sk->outbuf.len],
-					 req->beginLsn,
-					 req->endLsn - req->beginLsn,
-#if PG_VERSION_NUM >= 150000
-		/* FIXME don't use hardcoded timeline_id here */
-					 1,
-#else
-					 ThisTimeLineID,
-#endif
-					 &errinfo))
-		{
-			WALReadRaiseError(&errinfo);
-		}
+		/* wal_read will raise error on failure */
+		wp->api.wal_read(sk->xlogreader,
+						 &sk->outbuf.data[sk->outbuf.len],
+						 req->beginLsn,
+						 req->endLsn - req->beginLsn);
 		sk->outbuf.len += req->endLsn - req->beginLsn;
 
-		writeResult = walprop_async_write(sk->conn, sk->outbuf.data, sk->outbuf.len);
+		writeResult = wp->api.conn_async_write(sk->conn, sk->outbuf.data, sk->outbuf.len);
 
 		/* Mark current message as sent, whatever the result is */
 		sk->streamingAt = endLsn;
@@ -1874,7 +1386,7 @@ SendAppendRequests(Safekeeper *sk)
 			case PG_ASYNC_WRITE_FAIL:
 				elog(WARNING, "Failed to send to node %s:%s in %s state: %s",
 					 sk->host, sk->port, FormatSafekeeperState(sk->state),
-					 walprop_error_message(sk->conn));
+					 wp->api.conn_error_message(sk->conn));
 				ShutdownConnection(sk);
 				return false;
 			default:
@@ -1897,6 +1409,7 @@ SendAppendRequests(Safekeeper *sk)
 static bool
 RecvAppendResponses(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
 	XLogRecPtr	minQuorumLsn;
 	bool		readAnything = false;
 
@@ -1908,7 +1421,7 @@ RecvAppendResponses(Safekeeper *sk)
 		 * work until later.
 		 */
 		sk->appendResponse.apm.tag = 'a';
-		if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) & sk->appendResponse))
+		if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->appendResponse))
 			break;
 
 		ereport(DEBUG2,
@@ -1918,12 +1431,12 @@ RecvAppendResponses(Safekeeper *sk)
 						LSN_FORMAT_ARGS(sk->appendResponse.commitLsn),
 						sk->host, sk->port)));
 
-		if (sk->appendResponse.term > propTerm)
+		if (sk->appendResponse.term > wp->propTerm)
 		{
 			/* Another compute with higher term is running. */
 			elog(PANIC, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT "",
 				 sk->host, sk->port,
-				 sk->appendResponse.term, propTerm);
+				 sk->appendResponse.term, wp->propTerm);
 		}
 
 		readAnything = true;
@@ -1932,16 +1445,16 @@ RecvAppendResponses(Safekeeper *sk)
 	if (!readAnything)
 		return sk->state == SS_ACTIVE;
 
-	HandleSafekeeperResponse();
+	HandleSafekeeperResponse(wp);
 
 	/*
 	 * Also send the new commit lsn to all the safekeepers.
 	 */
-	minQuorumLsn = GetAcknowledgedByQuorumWALPosition();
-	if (minQuorumLsn > lastSentCommitLsn)
+	minQuorumLsn = GetAcknowledgedByQuorumWALPosition(wp);
+	if (minQuorumLsn > wp->lastSentCommitLsn)
 	{
-		BroadcastAppendRequest();
-		lastSentCommitLsn = minQuorumLsn;
+		BroadcastAppendRequest(wp);
+		wp->lastSentCommitLsn = minQuorumLsn;
 	}
 
 	return sk->state == SS_ACTIVE;
@@ -1949,7 +1462,7 @@ RecvAppendResponses(Safekeeper *sk)
 
 /* Parse a PageserverFeedback message, or the PageserverFeedback part of an AppendResponse */
 void
-ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback * rf)
+ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback *rf)
 {
 	uint8		nkeys;
 	int			i;
@@ -2025,56 +1538,20 @@ ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback * rf
 	}
 }
 
-/*
- * Combine hot standby feedbacks from all safekeepers.
- */
-static void
-CombineHotStanbyFeedbacks(HotStandbyFeedback * hs)
-{
-	hs->ts = 0;
-	hs->xmin.value = ~0;		/* largest unsigned value */
-	hs->catalog_xmin.value = ~0;	/* largest unsigned value */
-
-	for (int i = 0; i < n_safekeepers; i++)
-	{
-		if (safekeeper[i].appendResponse.hs.ts != 0)
-		{
-			HotStandbyFeedback *skhs = &safekeeper[i].appendResponse.hs;
-			if (FullTransactionIdIsNormal(skhs->xmin)
-				&& FullTransactionIdPrecedes(skhs->xmin, hs->xmin))
-			{
-				hs->xmin = skhs->xmin;
-				hs->ts = skhs->ts;
-			}
-			if (FullTransactionIdIsNormal(skhs->catalog_xmin)
-				&& FullTransactionIdPrecedes(skhs->catalog_xmin, hs->xmin))
-			{
-				hs->catalog_xmin = skhs->catalog_xmin;
-				hs->ts = skhs->ts;
-			}
-		}
-	}
-
-	if (hs->xmin.value == ~0)
-		hs->xmin = InvalidFullTransactionId;
-	if (hs->catalog_xmin.value == ~0)
-		hs->catalog_xmin = InvalidFullTransactionId;
-}
-
 /*
  * Get minimum of flushed LSNs of all safekeepers, which is the LSN of the
  * last WAL record that can be safely discarded.
  */
 static XLogRecPtr
-CalculateMinFlushLsn(void)
+CalculateMinFlushLsn(WalProposer *wp)
 {
-	XLogRecPtr	lsn = n_safekeepers > 0
-	? safekeeper[0].appendResponse.flushLsn
-	: InvalidXLogRecPtr;
+	XLogRecPtr	lsn = wp->n_safekeepers > 0
+		? wp->safekeeper[0].appendResponse.flushLsn
+		: InvalidXLogRecPtr;
 
-	for (int i = 1; i < n_safekeepers; i++)
+	for (int i = 1; i < wp->n_safekeepers; i++)
 	{
-		lsn = Min(lsn, safekeeper[i].appendResponse.flushLsn);
+		lsn = Min(lsn, wp->safekeeper[i].appendResponse.flushLsn);
 	}
 	return lsn;
 }
@@ -2083,163 +1560,37 @@ CalculateMinFlushLsn(void)
  * Calculate WAL position acknowledged by quorum
  */
 static XLogRecPtr
-GetAcknowledgedByQuorumWALPosition(void)
+GetAcknowledgedByQuorumWALPosition(WalProposer *wp)
 {
 	XLogRecPtr	responses[MAX_SAFEKEEPERS];
 
 	/*
 	 * Sort acknowledged LSNs
 	 */
-	for (int i = 0; i < n_safekeepers; i++)
+	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
 		/*
 		 * Like in Raft, we aren't allowed to commit entries from previous
 		 * terms, so ignore reported LSN until it gets to epochStartLsn.
 		 */
-		responses[i] = safekeeper[i].appendResponse.flushLsn >= propEpochStartLsn ? safekeeper[i].appendResponse.flushLsn : 0;
+		responses[i] = wp->safekeeper[i].appendResponse.flushLsn >= wp->propEpochStartLsn ? wp->safekeeper[i].appendResponse.flushLsn : 0;
 	}
-	qsort(responses, n_safekeepers, sizeof(XLogRecPtr), CompareLsn);
+	qsort(responses, wp->n_safekeepers, sizeof(XLogRecPtr), CompareLsn);
 
 	/*
 	 * Get the smallest LSN committed by quorum
 	 */
-	return responses[n_safekeepers - quorum];
-}
-
-/*
- * WalproposerShmemSize --- report amount of shared memory space needed
- */
-Size
-WalproposerShmemSize(void)
-{
-	return sizeof(WalproposerShmemState);
-}
-
-bool
-WalproposerShmemInit(void)
-{
-	bool		found;
-
-	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
-	walprop_shared = ShmemInitStruct("Walproposer shared state",
-									 sizeof(WalproposerShmemState),
-									 &found);
-
-	if (!found)
-	{
-		memset(walprop_shared, 0, WalproposerShmemSize());
-		SpinLockInit(&walprop_shared->mutex);
-		pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
-	}
-	LWLockRelease(AddinShmemInitLock);
-
-	return found;
-}
-
-void
-replication_feedback_set(PageserverFeedback * rf)
-{
-	SpinLockAcquire(&walprop_shared->mutex);
-	memcpy(&walprop_shared->feedback, rf, sizeof(PageserverFeedback));
-	SpinLockRelease(&walprop_shared->mutex);
-}
-
-void
-replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn)
-{
-	SpinLockAcquire(&walprop_shared->mutex);
-	*writeLsn = walprop_shared->feedback.last_received_lsn;
-	*flushLsn = walprop_shared->feedback.disk_consistent_lsn;
-	*applyLsn = walprop_shared->feedback.remote_consistent_lsn;
-	SpinLockRelease(&walprop_shared->mutex);
-}
-
-/*
- * Get PageserverFeedback fields from the most advanced safekeeper
- */
-static void
-GetLatestNeonFeedback(PageserverFeedback * rf)
-{
-	int			latest_safekeeper = 0;
-	XLogRecPtr	last_received_lsn = InvalidXLogRecPtr;
-
-	for (int i = 0; i < n_safekeepers; i++)
-	{
-		if (safekeeper[i].appendResponse.rf.last_received_lsn > last_received_lsn)
-		{
-			latest_safekeeper = i;
-			last_received_lsn = safekeeper[i].appendResponse.rf.last_received_lsn;
-		}
-	}
-
-	rf->currentClusterSize = safekeeper[latest_safekeeper].appendResponse.rf.currentClusterSize;
-	rf->last_received_lsn = safekeeper[latest_safekeeper].appendResponse.rf.last_received_lsn;
-	rf->disk_consistent_lsn = safekeeper[latest_safekeeper].appendResponse.rf.disk_consistent_lsn;
-	rf->remote_consistent_lsn = safekeeper[latest_safekeeper].appendResponse.rf.remote_consistent_lsn;
-	rf->replytime = safekeeper[latest_safekeeper].appendResponse.rf.replytime;
-
-	elog(DEBUG2, "GetLatestNeonFeedback: currentClusterSize %lu,"
-		 " last_received_lsn %X/%X, disk_consistent_lsn %X/%X, remote_consistent_lsn %X/%X, replytime %lu",
-		 rf->currentClusterSize,
-		 LSN_FORMAT_ARGS(rf->last_received_lsn),
-		 LSN_FORMAT_ARGS(rf->disk_consistent_lsn),
-		 LSN_FORMAT_ARGS(rf->remote_consistent_lsn),
-		 rf->replytime);
-
-	replication_feedback_set(rf);
+	return responses[wp->n_safekeepers - wp->quorum];
 }
 
 static void
-HandleSafekeeperResponse(void)
+HandleSafekeeperResponse(WalProposer *wp)
 {
-	HotStandbyFeedback hsFeedback;
 	XLogRecPtr	minQuorumLsn;
-	XLogRecPtr	diskConsistentLsn;
 	XLogRecPtr	minFlushLsn;
 
-	minQuorumLsn = GetAcknowledgedByQuorumWALPosition();
-	diskConsistentLsn = quorumFeedback.rf.disk_consistent_lsn;
-
-	if (!syncSafekeepers)
-	{
-		/* Get PageserverFeedback fields from the most advanced safekeeper */
-		GetLatestNeonFeedback(&quorumFeedback.rf);
-		SetZenithCurrentClusterSize(quorumFeedback.rf.currentClusterSize);
-	}
-
-	if (minQuorumLsn > quorumFeedback.flushLsn || diskConsistentLsn != quorumFeedback.rf.disk_consistent_lsn)
-	{
-
-		if (minQuorumLsn > quorumFeedback.flushLsn)
-			quorumFeedback.flushLsn = minQuorumLsn;
-
-		/* advance the replication slot */
-		if (!syncSafekeepers)
-			ProcessStandbyReply(
-			/* write_lsn -  This is what durably stored in WAL service. */
-								quorumFeedback.flushLsn,
-			/* flush_lsn - This is what durably stored in WAL service. */
-								quorumFeedback.flushLsn,
-
-			/*
-			 * apply_lsn - This is what processed and durably saved at*
-			 * pageserver.
-			 */
-								quorumFeedback.rf.disk_consistent_lsn,
-								GetCurrentTimestamp(), false);
-	}
-
-	CombineHotStanbyFeedbacks(&hsFeedback);
-	if (hsFeedback.ts != 0 && memcmp(&hsFeedback, &quorumFeedback.hs, sizeof hsFeedback) != 0)
-	{
-		quorumFeedback.hs = hsFeedback;
-		if (!syncSafekeepers)
-			ProcessStandbyHSFeedback(hsFeedback.ts,
-									 XidFromFullTransactionId(hsFeedback.xmin),
-									 EpochFromFullTransactionId(hsFeedback.xmin),
-									 XidFromFullTransactionId(hsFeedback.catalog_xmin),
-									 EpochFromFullTransactionId(hsFeedback.catalog_xmin));
-	}
+	minQuorumLsn = GetAcknowledgedByQuorumWALPosition(wp);
+	wp->api.process_safekeeper_feedback(wp, minQuorumLsn);
 
 	/*
 	 * Try to advance truncateLsn to minFlushLsn, which is the last record
@@ -2255,17 +1606,16 @@ HandleSafekeeperResponse(void)
 	 * term' in Raft); 2) chunks we read from WAL and send are plain sheets of
 	 * bytes, but safekeepers ack only on record boundaries.
 	 */
-	minFlushLsn = CalculateMinFlushLsn();
-	if (minFlushLsn > truncateLsn)
+	minFlushLsn = CalculateMinFlushLsn(wp);
+	if (minFlushLsn > wp->truncateLsn)
 	{
-		truncateLsn = minFlushLsn;
+		wp->truncateLsn = minFlushLsn;
 
 		/*
 		 * Advance the replication slot to free up old WAL files. Note that
 		 * slot doesn't exist if we are in syncSafekeepers mode.
 		 */
-		if (MyReplicationSlot)
-			PhysicalConfirmReceivedLocation(truncateLsn);
+		wp->api.confirm_wal_streamed(wp->truncateLsn);
 	}
 
 	/*
@@ -2280,15 +1630,15 @@ HandleSafekeeperResponse(void)
 	 * (due to pageserver connecting to not-synced-safekeeper) we currently
 	 * wait for all seemingly alive safekeepers to get synced.
 	 */
-	if (syncSafekeepers)
+	if (wp->config->syncSafekeepers)
 	{
 		int			n_synced;
 
 		n_synced = 0;
-		for (int i = 0; i < n_safekeepers; i++)
+		for (int i = 0; i < wp->n_safekeepers; i++)
 		{
-			Safekeeper *sk = &safekeeper[i];
-			bool		synced = sk->appendResponse.commitLsn >= propEpochStartLsn;
+			Safekeeper *sk = &wp->safekeeper[i];
+			bool		synced = sk->appendResponse.commitLsn >= wp->propEpochStartLsn;
 
 			/* alive safekeeper which is not synced yet; wait for it */
 			if (sk->state != SS_OFFLINE && !synced)
@@ -2297,23 +1647,23 @@ HandleSafekeeperResponse(void)
 				n_synced++;
 		}
 
-		if (n_synced >= quorum)
+		if (n_synced >= wp->quorum)
 		{
 			/* A quorum of safekeepers has been synced! */
-			
-			/*
-			 * Send empty message to broadcast latest truncateLsn to all safekeepers.
-			 * This helps to finish next sync-safekeepers eailier, by skipping recovery
-			 * step.
-			 * 
-			 * We don't need to wait for response because it doesn't affect correctness,
-			 * and TCP should be able to deliver the message to safekeepers in case of
-			 * network working properly.
-			 */
-			BroadcastAppendRequest();
 
-			fprintf(stdout, "%X/%X\n", LSN_FORMAT_ARGS(propEpochStartLsn));
-			exit(0);
+			/*
+			 * Send empty message to broadcast latest truncateLsn to all
+			 * safekeepers. This helps to finish next sync-safekeepers
+			 * eailier, by skipping recovery step.
+			 *
+			 * We don't need to wait for response because it doesn't affect
+			 * correctness, and TCP should be able to deliver the message to
+			 * safekeepers in case of network working properly.
+			 */
+			BroadcastAppendRequest(wp);
+
+			wp->api.finish_sync_safekeepers(wp->propEpochStartLsn);
+			/* unreachable */
 		}
 	}
 }
@@ -2325,7 +1675,9 @@ HandleSafekeeperResponse(void)
 static bool
 AsyncRead(Safekeeper *sk, char **buf, int *buf_size)
 {
-	switch (walprop_async_read(sk->conn, buf, buf_size))
+	WalProposer *wp = sk->wp;
+
+	switch (wp->api.conn_async_read(sk->conn, buf, buf_size))
 	{
 		case PG_ASYNC_READ_SUCCESS:
 			return true;
@@ -2337,7 +1689,7 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size)
 		case PG_ASYNC_READ_FAIL:
 			elog(WARNING, "Failed to read from node %s:%s in %s state: %s", sk->host,
 				 sk->port, FormatSafekeeperState(sk->state),
-				 walprop_error_message(sk->conn));
+				 wp->api.conn_error_message(sk->conn));
 			ShutdownConnection(sk);
 			return false;
 	}
@@ -2355,8 +1707,10 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size)
  * failed, a warning is emitted and the connection is reset.
  */
 static bool
-AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage * anymsg)
+AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg)
 {
+	WalProposer *wp = sk->wp;
+
 	char	   *buf;
 	int			buf_size;
 	uint64		tag;
@@ -2378,7 +1732,7 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage * anymsg)
 		ResetConnection(sk);
 		return false;
 	}
-	sk->latestMsgReceivedAt = GetCurrentTimestamp();
+	sk->latestMsgReceivedAt = wp->api.get_current_timestamp();
 	switch (tag)
 	{
 		case 'g':
@@ -2444,13 +1798,14 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage * anymsg)
 static bool
 BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state)
 {
+	WalProposer *wp = sk->wp;
 	uint32		events;
 
-	if (!walprop_blocking_write(sk->conn, msg, msg_size))
+	if (!wp->api.conn_blocking_write(sk->conn, msg, msg_size))
 	{
 		elog(WARNING, "Failed to send to node %s:%s in %s state: %s",
 			 sk->host, sk->port, FormatSafekeeperState(sk->state),
-			 walprop_error_message(sk->conn));
+			 wp->api.conn_error_message(sk->conn));
 		ShutdownConnection(sk);
 		return false;
 	}
@@ -2463,7 +1818,7 @@ BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState succes
 	 */
 	events = SafekeeperStateDesiredEvents(success_state);
 	if (events)
-		UpdateEventSet(sk, events);
+		wp->api.update_event_set(sk, events);
 
 	return true;
 }
@@ -2478,7 +1833,9 @@ BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState succes
 static bool
 AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state)
 {
-	switch (walprop_async_write(sk->conn, msg, msg_size))
+	WalProposer *wp = sk->wp;
+
+	switch (wp->api.conn_async_write(sk->conn, msg, msg_size))
 	{
 		case PG_ASYNC_WRITE_SUCCESS:
 			return true;
@@ -2490,12 +1847,12 @@ AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_sta
 			 * this function
 			 */
 			sk->state = flush_state;
-			UpdateEventSet(sk, WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
+			wp->api.update_event_set(sk, WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
 			return false;
 		case PG_ASYNC_WRITE_FAIL:
 			elog(WARNING, "Failed to send to node %s:%s in %s state: %s",
 				 sk->host, sk->port, FormatSafekeeperState(sk->state),
-				 walprop_error_message(sk->conn));
+				 wp->api.conn_error_message(sk->conn));
 			ShutdownConnection(sk);
 			return false;
 		default:
@@ -2515,13 +1872,15 @@ AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_sta
 static bool
 AsyncFlush(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
+
 	/*---
 	 * PQflush returns:
 	 *   0 if successful                    [we're good to move on]
 	 *   1 if unable to send everything yet [call PQflush again]
 	 *  -1 if it failed                     [emit an error]
 	 */
-	switch (walprop_flush(sk->conn))
+	switch (wp->api.conn_flush(sk->conn))
 	{
 		case 0:
 			/* flush is done */
@@ -2532,7 +1891,7 @@ AsyncFlush(Safekeeper *sk)
 		case -1:
 			elog(WARNING, "Failed to flush write to node %s:%s in %s state: %s",
 				 sk->host, sk->port, FormatSafekeeperState(sk->state),
-				 walprop_error_message(sk->conn));
+				 wp->api.conn_error_message(sk->conn));
 			ResetConnection(sk);
 			return false;
 		default:
@@ -2541,88 +1900,210 @@ AsyncFlush(Safekeeper *sk)
 	}
 }
 
-/*  Check if we need to suspend inserts because of lagging replication. */
-static uint64
-backpressure_lag_impl(void)
+static int
+CompareLsn(const void *a, const void *b)
 {
-	if (max_replication_apply_lag > 0 || max_replication_flush_lag > 0 || max_replication_write_lag > 0)
-	{
-		XLogRecPtr	writePtr;
-		XLogRecPtr	flushPtr;
-		XLogRecPtr	applyPtr;
-#if PG_VERSION_NUM >= 150000
-		XLogRecPtr	myFlushLsn = GetFlushRecPtr(NULL);
-#else
-		XLogRecPtr	myFlushLsn = GetFlushRecPtr();
-#endif
-		replication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr);
-#define MB ((XLogRecPtr)1024 * 1024)
+	XLogRecPtr	lsn1 = *((const XLogRecPtr *) a);
+	XLogRecPtr	lsn2 = *((const XLogRecPtr *) b);
 
-		elog(DEBUG2, "current flushLsn %X/%X PageserverFeedback: write %X/%X flush %X/%X apply %X/%X",
-			 LSN_FORMAT_ARGS(myFlushLsn),
-			 LSN_FORMAT_ARGS(writePtr),
-			 LSN_FORMAT_ARGS(flushPtr),
-			 LSN_FORMAT_ARGS(applyPtr));
-
-		if ((writePtr != InvalidXLogRecPtr && max_replication_write_lag > 0 && myFlushLsn > writePtr + max_replication_write_lag * MB))
-		{
-			return (myFlushLsn - writePtr - max_replication_write_lag * MB);
-		}
-
-		if ((flushPtr != InvalidXLogRecPtr && max_replication_flush_lag > 0 && myFlushLsn > flushPtr + max_replication_flush_lag * MB))
-		{
-			return (myFlushLsn - flushPtr - max_replication_flush_lag * MB);
-		}
-
-		if ((applyPtr != InvalidXLogRecPtr && max_replication_apply_lag > 0 && myFlushLsn > applyPtr + max_replication_apply_lag * MB))
-		{
-			return (myFlushLsn - applyPtr - max_replication_apply_lag * MB);
-		}
-	}
-	return 0;
+	if (lsn1 < lsn2)
+		return -1;
+	else if (lsn1 == lsn2)
+		return 0;
+	else
+		return 1;
 }
 
-#define BACK_PRESSURE_DELAY 10000L // 0.01 sec
-
-static bool
-backpressure_throttling_impl(void)
+/* Returns a human-readable string corresonding to the SafekeeperState
+ *
+ * The string should not be freed.
+ *
+ * The strings are intended to be used as a prefix to "state", e.g.:
+ *
+ *   elog(LOG, "currently in %s state", FormatSafekeeperState(sk->state));
+ *
+ * If this sort of phrasing doesn't fit the message, instead use something like:
+ *
+ *   elog(LOG, "currently in state [%s]", FormatSafekeeperState(sk->state));
+ */
+static char *
+FormatSafekeeperState(SafekeeperState state)
 {
-	int64		lag;
-	TimestampTz start,
-				stop;
-	bool		retry = PrevProcessInterruptsCallback
-	? PrevProcessInterruptsCallback()
-	: false;
+	char	   *return_val = NULL;
+
+	switch (state)
+	{
+		case SS_OFFLINE:
+			return_val = "offline";
+			break;
+		case SS_CONNECTING_READ:
+		case SS_CONNECTING_WRITE:
+			return_val = "connecting";
+			break;
+		case SS_WAIT_EXEC_RESULT:
+			return_val = "receiving query result";
+			break;
+		case SS_HANDSHAKE_RECV:
+			return_val = "handshake (receiving)";
+			break;
+		case SS_VOTING:
+			return_val = "voting";
+			break;
+		case SS_WAIT_VERDICT:
+			return_val = "wait-for-verdict";
+			break;
+		case SS_SEND_ELECTED_FLUSH:
+			return_val = "send-announcement-flush";
+			break;
+		case SS_IDLE:
+			return_val = "idle";
+			break;
+		case SS_ACTIVE:
+			return_val = "active";
+			break;
+	}
+
+	Assert(return_val != NULL);
+
+	return return_val;
+}
+
+/* Asserts that the provided events are expected for given safekeeper's state */
+static void
+AssertEventsOkForState(uint32 events, Safekeeper *sk)
+{
+	uint32		expected = SafekeeperStateDesiredEvents(sk->state);
 
 	/*
-	 * Don't throttle read only transactions or wal sender.
-	 * Do throttle CREATE INDEX CONCURRENTLY, however. It performs some
-	 * stages outside a transaction, even though it writes a lot of WAL. 
-	 * Check PROC_IN_SAFE_IC flag to cover that case.
+	 * The events are in-line with what we're expecting, under two conditions:
+	 * (a) if we aren't expecting anything, `events` has no read- or
+	 * write-ready component. (b) if we are expecting something, there's
+	 * overlap (i.e. `events & expected != 0`)
 	 */
-	if (am_walsender
-		|| (!(MyProc->statusFlags & PROC_IN_SAFE_IC)
-			&& !TransactionIdIsValid(GetCurrentTransactionIdIfAny())))
-		return retry;
+	bool		events_ok_for_state;	/* long name so the `Assert` is more
+										 * clear later */
 
-	/* Calculate replicas lag */
-	lag = backpressure_lag_impl();
-	if (lag == 0)
-		return retry;
+	if (expected == WL_NO_EVENTS)
+		events_ok_for_state = ((events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) == 0);
+	else
+		events_ok_for_state = ((events & expected) != 0);
 
-	/* Suspend writers until replicas catch up */
-	set_ps_display("backpressure throttling");
-
-	elog(DEBUG2, "backpressure throttling: lag %lu", lag);
-	start = GetCurrentTimestamp();
-	pg_usleep(BACK_PRESSURE_DELAY);
-	stop = GetCurrentTimestamp();
-	pg_atomic_add_fetch_u64(&walprop_shared->backpressureThrottlingTime, stop - start);
-	return true;
+	if (!events_ok_for_state)
+	{
+		/*
+		 * To give a descriptive message in the case of failure, we use elog
+		 * and then an assertion that's guaranteed to fail.
+		 */
+		elog(WARNING, "events %s mismatched for safekeeper %s:%s in state [%s]",
+			 FormatEvents(events), sk->host, sk->port, FormatSafekeeperState(sk->state));
+		Assert(events_ok_for_state);
+	}
 }
 
-uint64
-BackpressureThrottlingTime(void)
+/* Returns the set of events a safekeeper in this state should be waiting on
+ *
+ * This will return WL_NO_EVENTS (= 0) for some events. */
+static uint32
+SafekeeperStateDesiredEvents(SafekeeperState state)
 {
-	return pg_atomic_read_u64(&walprop_shared->backpressureThrottlingTime);
+	uint32		result = WL_NO_EVENTS;
+
+	/* If the state doesn't have a modifier, we can check the base state */
+	switch (state)
+	{
+			/* Connecting states say what they want in the name */
+		case SS_CONNECTING_READ:
+			result = WL_SOCKET_READABLE;
+			break;
+		case SS_CONNECTING_WRITE:
+			result = WL_SOCKET_WRITEABLE;
+			break;
+
+			/* Reading states need the socket to be read-ready to continue */
+		case SS_WAIT_EXEC_RESULT:
+		case SS_HANDSHAKE_RECV:
+		case SS_WAIT_VERDICT:
+			result = WL_SOCKET_READABLE;
+			break;
+
+			/*
+			 * Idle states use read-readiness as a sign that the connection
+			 * has been disconnected.
+			 */
+		case SS_VOTING:
+		case SS_IDLE:
+			result = WL_SOCKET_READABLE;
+			break;
+
+			/*
+			 * Flush states require write-ready for flushing. Active state
+			 * does both reading and writing.
+			 *
+			 * TODO: SS_ACTIVE sometimes doesn't need to be write-ready. We
+			 * should check sk->flushWrite here to set WL_SOCKET_WRITEABLE.
+			 */
+		case SS_SEND_ELECTED_FLUSH:
+		case SS_ACTIVE:
+			result = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE;
+			break;
+
+			/* The offline state expects no events. */
+		case SS_OFFLINE:
+			result = WL_NO_EVENTS;
+			break;
+
+		default:
+			Assert(false);
+			break;
+	}
+
+	return result;
+}
+
+/* Returns a human-readable string corresponding to the event set
+ *
+ * If the events do not correspond to something set as the `events` field of a `WaitEvent`, the
+ * returned string may be meaingless.
+ *
+ * The string should not be freed. It should also not be expected to remain the same between
+ * function calls. */
+static char *
+FormatEvents(uint32 events)
+{
+	static char return_str[8];
+
+	/* Helper variable to check if there's extra bits */
+	uint32		all_flags = WL_LATCH_SET
+		| WL_SOCKET_READABLE
+		| WL_SOCKET_WRITEABLE
+		| WL_TIMEOUT
+		| WL_POSTMASTER_DEATH
+		| WL_EXIT_ON_PM_DEATH
+		| WL_SOCKET_CONNECTED;
+
+	/*
+	 * The formatting here isn't supposed to be *particularly* useful -- it's
+	 * just to give an sense of what events have been triggered without
+	 * needing to remember your powers of two.
+	 */
+
+	return_str[0] = (events & WL_LATCH_SET) ? 'L' : '_';
+	return_str[1] = (events & WL_SOCKET_READABLE) ? 'R' : '_';
+	return_str[2] = (events & WL_SOCKET_WRITEABLE) ? 'W' : '_';
+	return_str[3] = (events & WL_TIMEOUT) ? 'T' : '_';
+	return_str[4] = (events & WL_POSTMASTER_DEATH) ? 'D' : '_';
+	return_str[5] = (events & WL_EXIT_ON_PM_DEATH) ? 'E' : '_';
+	return_str[5] = (events & WL_SOCKET_CONNECTED) ? 'C' : '_';
+
+	if (events & (~all_flags))
+	{
+		elog(WARNING, "Event formatting found unexpected component %d",
+			 events & (~all_flags));
+		return_str[6] = '*';
+		return_str[7] = '\0';
+	}
+	else
+		return_str[6] = '\0';
+
+	return (char *) &return_str;
 }
diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h
index fa1ba30a8f..a1a9ccdfdd 100644
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -1,8 +1,8 @@
 #ifndef __NEON_WALPROPOSER_H__
 #define __NEON_WALPROPOSER_H__
 
-#include "access/xlogdefs.h"
 #include "postgres.h"
+#include "access/xlogdefs.h"
 #include "port.h"
 #include "access/xlog_internal.h"
 #include "access/transam.h"
@@ -16,29 +16,15 @@
 #define MAX_SAFEKEEPERS 32
 #define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)	/* max size of a single* WAL
 											 * message */
-#define XLOG_HDR_SIZE (1 + 8 * 3)	/* 'w' + startPos + walEnd + timestamp */
-#define XLOG_HDR_START_POS 1	/* offset of start position in wal sender*
-								 * message header */
-#define XLOG_HDR_END_POS (1 + 8)	/* offset of end position in wal sender*
-									 * message header */
-
 /*
  * In the spirit of WL_SOCKET_READABLE and others, this corresponds to no events having occurred,
  * because all WL_* events are given flags equal to some (1 << i), starting from i = 0
  */
 #define WL_NO_EVENTS 0
 
-extern char *wal_acceptors_list;
-extern int	wal_acceptor_reconnect_timeout;
-extern int	wal_acceptor_connection_timeout;
-extern bool am_wal_proposer;
-
-struct WalProposerConn;			/* Defined in libpqwalproposer */
+struct WalProposerConn;			/* Defined in implementation (walprop_pg.c) */
 typedef struct WalProposerConn WalProposerConn;
 
-struct WalMessage;
-typedef struct WalMessage WalMessage;
-
 /* Possible return values from ReadPGAsync */
 typedef enum
 {
@@ -52,7 +38,7 @@ typedef enum
 	PG_ASYNC_READ_TRY_AGAIN,
 	/* Reading failed. Check PQerrorMessage(conn) */
 	PG_ASYNC_READ_FAIL,
-}			PGAsyncReadResult;
+} PGAsyncReadResult;
 
 /* Possible return values from WritePGAsync */
 typedef enum
@@ -71,7 +57,7 @@ typedef enum
 	PG_ASYNC_WRITE_TRY_FLUSH,
 	/* Writing failed. Check PQerrorMessage(conn) */
 	PG_ASYNC_WRITE_FAIL,
-}			PGAsyncWriteResult;
+} PGAsyncWriteResult;
 
 /*
  * WAL safekeeper state, which is used to wait for some event.
@@ -147,7 +133,7 @@ typedef enum
 	 * to read.
 	 */
 	SS_ACTIVE,
-}			SafekeeperState;
+} SafekeeperState;
 
 /* Consensus logical timestamp. */
 typedef uint64 term_t;
@@ -171,12 +157,12 @@ typedef struct ProposerGreeting
 	uint8		tenant_id[16];
 	TimeLineID	timeline;
 	uint32		walSegSize;
-}			ProposerGreeting;
+} ProposerGreeting;
 
 typedef struct AcceptorProposerMessage
 {
 	uint64		tag;
-}			AcceptorProposerMessage;
+} AcceptorProposerMessage;
 
 /*
  * Acceptor -> Proposer initial response: the highest term acceptor voted for.
@@ -186,7 +172,7 @@ typedef struct AcceptorGreeting
 	AcceptorProposerMessage apm;
 	term_t		term;
 	NNodeId		nodeId;
-}			AcceptorGreeting;
+} AcceptorGreeting;
 
 /*
  * Proposer -> Acceptor vote request.
@@ -196,20 +182,20 @@ typedef struct VoteRequest
 	uint64		tag;
 	term_t		term;
 	pg_uuid_t	proposerId;		/* for monitoring/debugging */
-}			VoteRequest;
+} VoteRequest;
 
 /* Element of term switching chain. */
 typedef struct TermSwitchEntry
 {
 	term_t		term;
 	XLogRecPtr	lsn;
-}			TermSwitchEntry;
+} TermSwitchEntry;
 
 typedef struct TermHistory
 {
 	uint32		n_entries;
 	TermSwitchEntry *entries;
-}			TermHistory;
+} TermHistory;
 
 /* Vote itself, sent from safekeeper to proposer */
 typedef struct VoteResponse
@@ -227,7 +213,7 @@ typedef struct VoteResponse
 								 * recovery of some safekeeper */
 	TermHistory termHistory;
 	XLogRecPtr	timelineStartLsn;	/* timeline globally starts at this LSN */
-}			VoteResponse;
+} VoteResponse;
 
 /*
  * Proposer -> Acceptor message announcing proposer is elected and communicating
@@ -243,7 +229,7 @@ typedef struct ProposerElected
 	TermHistory *termHistory;
 	/* timeline globally starts at this LSN */
 	XLogRecPtr	timelineStartLsn;
-}			ProposerElected;
+} ProposerElected;
 
 /*
  * Header of request with WAL message sent from proposer to safekeeper.
@@ -268,7 +254,7 @@ typedef struct AppendRequestHeader
 	 */
 	XLogRecPtr	truncateLsn;
 	pg_uuid_t	proposerId;		/* for monitoring/debugging */
-}			AppendRequestHeader;
+} AppendRequestHeader;
 
 /*
  * Hot standby feedback received from replica
@@ -278,7 +264,7 @@ typedef struct HotStandbyFeedback
 	TimestampTz ts;
 	FullTransactionId xmin;
 	FullTransactionId catalog_xmin;
-}			HotStandbyFeedback;
+} HotStandbyFeedback;
 
 typedef struct PageserverFeedback
 {
@@ -289,7 +275,7 @@ typedef struct PageserverFeedback
 	XLogRecPtr	disk_consistent_lsn;
 	XLogRecPtr	remote_consistent_lsn;
 	TimestampTz replytime;
-}			PageserverFeedback;
+} PageserverFeedback;
 
 typedef struct WalproposerShmemState
 {
@@ -297,7 +283,7 @@ typedef struct WalproposerShmemState
 	PageserverFeedback feedback;
 	term_t		mineLastElectedTerm;
 	pg_atomic_uint64 backpressureThrottlingTime;
-}			WalproposerShmemState;
+} WalproposerShmemState;
 
 /*
  * Report safekeeper state to proposer
@@ -321,17 +307,22 @@ typedef struct AppendResponse
 	/* and custom neon feedback. */
 	/* This part of the message is extensible. */
 	PageserverFeedback rf;
-}			AppendResponse;
+} AppendResponse;
 
 /*  PageserverFeedback is extensible part of the message that is parsed separately */
 /*  Other fields are fixed part */
 #define APPENDRESPONSE_FIXEDPART_SIZE offsetof(AppendResponse, rf)
 
+struct WalProposer;
+typedef struct WalProposer WalProposer;
+
 /*
  * Descriptor of safekeeper
  */
 typedef struct Safekeeper
 {
+	WalProposer *wp;
+
 	char const *host;
 	char const *port;
 
@@ -340,7 +331,7 @@ typedef struct Safekeeper
 	 *
 	 * May contain private information like password and should not be logged.
 	 */
-	char conninfo[MAXCONNINFO];
+	char		conninfo[MAXCONNINFO];
 
 	/*
 	 * postgres protocol connection to the WAL acceptor
@@ -373,27 +364,12 @@ typedef struct Safekeeper
 	int			eventPos;		/* position in wait event set. Equal to -1 if*
 								 * no event */
 	SafekeeperState state;		/* safekeeper state machine state */
-	TimestampTz latestMsgReceivedAt;        /* when latest msg is received */
+	TimestampTz latestMsgReceivedAt;	/* when latest msg is received */
 	AcceptorGreeting greetResponse; /* acceptor greeting */
 	VoteResponse voteResponse;	/* the vote */
 	AppendResponse appendResponse;	/* feedback for master */
 } Safekeeper;
 
-extern void PGDLLEXPORT WalProposerSync(int argc, char *argv[]);
-extern void PGDLLEXPORT WalProposerMain(Datum main_arg);
-extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
-extern void WalProposerPoll(void);
-extern void ParsePageserverFeedbackMessage(StringInfo reply_message,
-											PageserverFeedback *rf);
-extern void StartProposerReplication(StartReplicationCmd *cmd);
-
-extern Size WalproposerShmemSize(void);
-extern bool WalproposerShmemInit(void);
-extern void replication_feedback_set(PageserverFeedback *rf);
-extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
-
-/* libpqwalproposer hooks & helper type */
-
 /* Re-exported PostgresPollingStatusType */
 typedef enum
 {
@@ -406,7 +382,7 @@ typedef enum
 	 * 'libpq-fe.h' still has PGRES_POLLING_ACTIVE, but says it's unused.
 	 * We've removed it here to avoid clutter.
 	 */
-}			WalProposerConnectPollStatusType;
+} WalProposerConnectPollStatusType;
 
 /* Re-exported and modified ExecStatusType */
 typedef enum
@@ -431,7 +407,7 @@ typedef enum
 	WP_EXEC_NEEDS_INPUT,
 	/* Catch-all failure. Check PQerrorMessage. */
 	WP_EXEC_FAILED,
-}			WalProposerExecStatusType;
+} WalProposerExecStatusType;
 
 /* Re-exported ConnStatusType */
 typedef enum
@@ -445,67 +421,252 @@ typedef enum
 	 * that extra functionality, so we collect them into a single tag here.
 	 */
 	WP_CONNECTION_IN_PROGRESS,
-}			WalProposerConnStatusType;
-
-/* Re-exported PQerrorMessage */
-extern char *walprop_error_message(WalProposerConn *conn);
-
-/* Re-exported PQstatus */
-extern WalProposerConnStatusType walprop_status(WalProposerConn *conn);
-
-/* Re-exported PQconnectStart */
-extern WalProposerConn * walprop_connect_start(char *conninfo, char *password);
-
-/* Re-exported PQconectPoll */
-extern WalProposerConnectPollStatusType walprop_connect_poll(WalProposerConn *conn);
-
-/* Blocking wrapper around PQsendQuery */
-extern bool walprop_send_query(WalProposerConn *conn, char *query);
-
-/* Wrapper around PQconsumeInput + PQisBusy + PQgetResult */
-extern WalProposerExecStatusType walprop_get_query_result(WalProposerConn *conn);
-
-/* Re-exported PQsocket */
-extern pgsocket walprop_socket(WalProposerConn *conn);
-
-/* Wrapper around PQconsumeInput (if socket's read-ready) + PQflush */
-extern int	walprop_flush(WalProposerConn *conn);
-
-/* Re-exported PQfinish */
-extern void walprop_finish(WalProposerConn *conn);
+} WalProposerConnStatusType;
 
 /*
- * Ergonomic wrapper around PGgetCopyData
- *
- * Reads a CopyData block from a safekeeper, setting *amount to the number
- * of bytes returned.
- *
- * This function is allowed to assume certain properties specific to the
- * protocol with the safekeepers, so it should not be used as-is for any
- * other purpose.
- *
- * Note: If possible, using <AsyncRead> is generally preferred, because it
- * performs a bit of extra checking work that's always required and is normally
- * somewhat verbose.
+ * Collection of hooks for walproposer, to call postgres functions,
+ * read WAL and send it over the network.
  */
-extern PGAsyncReadResult walprop_async_read(WalProposerConn *conn, char **buf, int *amount);
+typedef struct walproposer_api
+{
+	/*
+	 * Get WalproposerShmemState. This is used to store information about last
+	 * elected term.
+	 */
+	WalproposerShmemState *(*get_shmem_state) (void);
+
+	/*
+	 * Start receiving notifications about new WAL. This is an infinite loop
+	 * which calls WalProposerBroadcast() and WalProposerPoll() to send the
+	 * WAL.
+	 */
+	void		(*start_streaming) (WalProposer *wp, XLogRecPtr startpos);
+
+	/* Get pointer to the latest available WAL. */
+	XLogRecPtr	(*get_flush_rec_ptr) (void);
+
+	/* Get current time. */
+	TimestampTz (*get_current_timestamp) (void);
+
+	/* Get postgres timeline. */
+	TimeLineID	(*get_timeline_id) (void);
+
+	/* Current error message, aka PQerrorMessage. */
+	char	   *(*conn_error_message) (WalProposerConn *conn);
+
+	/* Connection status, aka PQstatus. */
+	WalProposerConnStatusType (*conn_status) (WalProposerConn *conn);
+
+	/* Start the connection, aka PQconnectStart. */
+	WalProposerConn *(*conn_connect_start) (char *conninfo);
+
+	/* Poll an asynchronous connection, aka PQconnectPoll. */
+	WalProposerConnectPollStatusType (*conn_connect_poll) (WalProposerConn *conn);
+
+	/* Send a blocking SQL query, aka PQsendQuery. */
+	bool		(*conn_send_query) (WalProposerConn *conn, char *query);
+
+	/* Read the query result, aka PQgetResult. */
+	WalProposerExecStatusType (*conn_get_query_result) (WalProposerConn *conn);
+
+	/* Flush buffer to the network, aka PQflush. */
+	int			(*conn_flush) (WalProposerConn *conn);
+
+	/* Close the connection, aka PQfinish. */
+	void		(*conn_finish) (WalProposerConn *conn);
+
+	/* Try to read CopyData message, aka PQgetCopyData. */
+	PGAsyncReadResult (*conn_async_read) (WalProposerConn *conn, char **buf, int *amount);
+
+	/* Try to write CopyData message, aka PQputCopyData. */
+	PGAsyncWriteResult (*conn_async_write) (WalProposerConn *conn, void const *buf, size_t size);
+
+	/* Blocking CopyData write, aka PQputCopyData + PQflush. */
+	bool		(*conn_blocking_write) (WalProposerConn *conn, void const *buf, size_t size);
+
+	/* Download WAL from startpos to endpos and make it available locally. */
+	bool		(*recovery_download) (Safekeeper *sk, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos);
+
+	/* Read WAL from disk to buf. */
+	void		(*wal_read) (XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count);
+
+	/* Allocate WAL reader. */
+	XLogReaderState *(*wal_reader_allocate) (void);
+
+	/* Deallocate event set. */
+	void		(*free_event_set) (void);
+
+	/* Initialize event set. */
+	void		(*init_event_set) (int n_safekeepers);
+
+	/* Update events for an existing safekeeper connection. */
+	void		(*update_event_set) (Safekeeper *sk, uint32 events);
+
+	/* Add a new safekeeper connection to the event set. */
+	void		(*add_safekeeper_event_set) (Safekeeper *sk, uint32 events);
+
+	/*
+	 * Wait until some event happens: - timeout is reached - socket event for
+	 * safekeeper connection - new WAL is available
+	 *
+	 * Returns 0 if timeout is reached, 1 if some event happened. Updates
+	 * events mask to indicate events and sets sk to the safekeeper which has
+	 * an event.
+	 */
+	int			(*wait_event_set) (long timeout, Safekeeper **sk, uint32 *events);
+
+	/* Read random bytes. */
+	bool		(*strong_random) (void *buf, size_t len);
+
+	/*
+	 * Get a basebackup LSN. Used to cross-validate with the latest available
+	 * LSN on the safekeepers.
+	 */
+	XLogRecPtr	(*get_redo_start_lsn) (void);
+
+	/*
+	 * Finish sync safekeepers with the given LSN. This function should not
+	 * return and should exit the program.
+	 */
+	void		(*finish_sync_safekeepers) (XLogRecPtr lsn);
+
+	/*
+	 * Called after every new message from the safekeeper. Used to propagate
+	 * backpressure feedback and to confirm WAL persistence (has been commited
+	 * on the quorum of safekeepers).
+	 */
+	void		(*process_safekeeper_feedback) (WalProposer *wp, XLogRecPtr commitLsn);
+
+	/*
+	 * Called on peer_horizon_lsn updates. Used to advance replication slot
+	 * and to free up disk space by deleting unnecessary WAL.
+	 */
+	void		(*confirm_wal_streamed) (XLogRecPtr lsn);
+} walproposer_api;
 
 /*
- * Ergonomic wrapper around PQputCopyData + PQflush
- *
- * Starts to write a CopyData block to a safekeeper.
- *
- * For information on the meaning of return codes, refer to PGAsyncWriteResult.
+ * Configuration of the WAL proposer.
  */
-extern PGAsyncWriteResult walprop_async_write(WalProposerConn *conn, void const *buf, size_t size);
+typedef struct WalProposerConfig
+{
+	/* hex-encoded TenantId cstr */
+	char	   *neon_tenant;
+
+	/* hex-encoded TimelineId cstr */
+	char	   *neon_timeline;
+
+	/*
+	 * Comma-separated list of safekeepers, in the following format:
+	 * host1:port1,host2:port2,host3:port3
+	 *
+	 * This cstr should be editable.
+	 */
+	char	   *safekeepers_list;
+
+	/*
+	 * WalProposer reconnects to offline safekeepers once in this interval.
+	 * Time is in milliseconds.
+	 */
+	int			safekeeper_reconnect_timeout;
+
+	/*
+	 * WalProposer terminates the connection if it doesn't receive any message
+	 * from the safekeeper in this interval. Time is in milliseconds.
+	 */
+	int			safekeeper_connection_timeout;
+
+	/*
+	 * WAL segment size. Will be passed to safekeepers in greet request. Also
+	 * used to detect page headers.
+	 */
+	int			wal_segment_size;
+
+	/*
+	 * If safekeeper was started in sync mode, walproposer will not subscribe
+	 * for new WAL and will exit when quorum of safekeepers will be synced to
+	 * the latest available LSN.
+	 */
+	bool		syncSafekeepers;
+
+	/* Will be passed to safekeepers in greet request. */
+	uint64		systemId;
+} WalProposerConfig;
+
 
 /*
- * Blocking equivalent to walprop_async_write_fn
- *
- * Returns 'true' if successful, 'false' on failure.
+ * WAL proposer state.
  */
-extern bool walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size);
+typedef struct WalProposer
+{
+	WalProposerConfig *config;
+	int			n_safekeepers;
 
-extern uint64 BackpressureThrottlingTime(void);
+	/* (n_safekeepers / 2) + 1 */
+	int			quorum;
+
+	Safekeeper	safekeeper[MAX_SAFEKEEPERS];
+
+	/* WAL has been generated up to this point */
+	XLogRecPtr	availableLsn;
+
+	/* last commitLsn broadcasted to safekeepers */
+	XLogRecPtr	lastSentCommitLsn;
+
+	ProposerGreeting greetRequest;
+
+	/* Vote request for safekeeper */
+	VoteRequest voteRequest;
+
+	/*
+	 * Minimal LSN which may be needed for recovery of some safekeeper,
+	 * record-aligned (first record which might not yet received by someone).
+	 */
+	XLogRecPtr	truncateLsn;
+
+	/*
+	 * Term of the proposer. We want our term to be highest and unique, so we
+	 * collect terms from safekeepers quorum, choose max and +1. After that
+	 * our term is fixed and must not change. If we observe that some
+	 * safekeeper has higher term, it means that we have another running
+	 * compute, so we must stop immediately.
+	 */
+	term_t		propTerm;
+
+	/* term history of the proposer */
+	TermHistory propTermHistory;
+
+	/* epoch start lsn of the proposer */
+	XLogRecPtr	propEpochStartLsn;
+
+	/* Most advanced acceptor epoch */
+	term_t		donorEpoch;
+
+	/* Most advanced acceptor */
+	int			donor;
+
+	/* timeline globally starts at this LSN */
+	XLogRecPtr	timelineStartLsn;
+
+	/* number of votes collected from safekeepers */
+	int			n_votes;
+
+	/* number of successful connections over the lifetime of walproposer */
+	int			n_connected;
+
+	/*
+	 * Timestamp of the last reconnection attempt. Related to
+	 * config->safekeeper_reconnect_timeout
+	 */
+	TimestampTz last_reconnect_attempt;
+
+	walproposer_api api;
+} WalProposer;
+
+extern WalProposer *WalProposerCreate(WalProposerConfig *config, walproposer_api api);
+extern void WalProposerStart(WalProposer *wp);
+extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPtr endpos);
+extern void WalProposerPoll(WalProposer *wp);
+extern void ParsePageserverFeedbackMessage(StringInfo reply_message,
+										   PageserverFeedback *rf);
 
 #endif							/* __NEON_WALPROPOSER_H__ */
diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c
new file mode 100644
index 0000000000..654b411e94
--- /dev/null
+++ b/pgxn/neon/walproposer_pg.c
@@ -0,0 +1,1667 @@
+/*
+ * Implementation of postgres based walproposer disk and IO routines, i.e. the
+ * real ones. The reason this is separate from walproposer.c is ability to
+ * replace them with mocks, allowing to do simulation testing.
+ *
+ * Also contains initialization of postgres based walproposer.
+ */
+
+#include "postgres.h"
+
+#include <signal.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include "access/xact.h"
+#include "access/xlogdefs.h"
+#include "access/xlogutils.h"
+#include "access/xloginsert.h"
+#if PG_VERSION_NUM >= 150000
+#include "access/xlogrecovery.h"
+#endif
+#include "storage/fd.h"
+#include "storage/latch.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "access/xlog.h"
+#include "libpq/pqformat.h"
+#include "replication/slot.h"
+#include "replication/walreceiver.h"
+#include "replication/walsender_private.h"
+#include "postmaster/bgworker.h"
+#include "postmaster/interrupt.h"
+#include "postmaster/postmaster.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include "storage/spin.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/ps_status.h"
+#include "utils/timestamp.h"
+
+#include "neon.h"
+#include "walproposer.h"
+#include "libpq-fe.h"
+
+#define XLOG_HDR_SIZE (1 + 8 * 3)	/* 'w' + startPos + walEnd + timestamp */
+#define XLOG_HDR_START_POS 1	/* offset of start position in wal sender*
+								 * message header */
+
+#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
+
+char	   *wal_acceptors_list = "";
+int			wal_acceptor_reconnect_timeout = 1000;
+int			wal_acceptor_connection_timeout = 10000;
+
+static AppendResponse quorumFeedback;
+static WalproposerShmemState *walprop_shared;
+static WalProposerConfig walprop_config;
+static XLogRecPtr sentPtr = InvalidXLogRecPtr;
+static const walproposer_api walprop_pg;
+
+static void nwp_shmem_startup_hook(void);
+static void nwp_register_gucs(void);
+static void nwp_prepare_shmem(void);
+static uint64 backpressure_lag_impl(void);
+static bool backpressure_throttling_impl(void);
+static void walprop_register_bgworker(void);
+
+static void walprop_pg_init_standalone_sync_safekeepers(void);
+static void walprop_pg_init_walsender(void);
+static void walprop_pg_init_bgworker(void);
+static TimestampTz walprop_pg_get_current_timestamp(void);
+static void walprop_pg_load_libpqwalreceiver(void);
+
+static process_interrupts_callback_t PrevProcessInterruptsCallback;
+static shmem_startup_hook_type prev_shmem_startup_hook_type;
+#if PG_VERSION_NUM >= 150000
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+static void walproposer_shmem_request(void);
+#endif
+
+static void StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd);
+static void WalSndLoop(WalProposer *wp);
+static void XLogBroadcastWalProposer(WalProposer *wp);
+
+static void XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr);
+static void XLogWalPropClose(XLogRecPtr recptr);
+
+static void
+init_walprop_config(bool syncSafekeepers)
+{
+	walprop_config.neon_tenant = neon_tenant;
+	walprop_config.neon_timeline = neon_timeline;
+	walprop_config.safekeepers_list = wal_acceptors_list;
+	walprop_config.safekeeper_reconnect_timeout = wal_acceptor_reconnect_timeout;
+	walprop_config.safekeeper_connection_timeout = wal_acceptor_connection_timeout;
+	walprop_config.wal_segment_size = wal_segment_size;
+	walprop_config.syncSafekeepers = syncSafekeepers;
+	if (!syncSafekeepers)
+		walprop_config.systemId = GetSystemIdentifier();
+	else
+		walprop_config.systemId = 0;
+}
+
+/*
+ * Entry point for `postgres --sync-safekeepers`.
+ */
+PGDLLEXPORT void
+WalProposerSync(int argc, char *argv[])
+{
+	WalProposer *wp;
+
+	init_walprop_config(true);
+	walprop_pg_init_standalone_sync_safekeepers();
+	walprop_pg_load_libpqwalreceiver();
+
+	wp = WalProposerCreate(&walprop_config, walprop_pg);
+
+	WalProposerStart(wp);
+}
+
+/*
+ * WAL proposer bgworker entry point.
+ */
+PGDLLEXPORT void
+WalProposerMain(Datum main_arg)
+{
+	WalProposer *wp;
+
+	init_walprop_config(false);
+	walprop_pg_init_bgworker();
+	walprop_pg_load_libpqwalreceiver();
+
+	wp = WalProposerCreate(&walprop_config, walprop_pg);
+	wp->last_reconnect_attempt = walprop_pg_get_current_timestamp();
+
+	walprop_pg_init_walsender();
+	WalProposerStart(wp);
+}
+
+/*
+ * Initialize GUCs, bgworker, shmem and backpressure.
+ */
+void
+pg_init_walproposer(void)
+{
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	nwp_register_gucs();
+
+	nwp_prepare_shmem();
+
+	delay_backend_us = &backpressure_lag_impl;
+	PrevProcessInterruptsCallback = ProcessInterruptsCallback;
+	ProcessInterruptsCallback = backpressure_throttling_impl;
+
+	walprop_register_bgworker();
+}
+
+static void
+nwp_register_gucs(void)
+{
+	DefineCustomStringVariable(
+							   "neon.safekeepers",
+							   "List of Neon WAL acceptors (host:port)",
+							   NULL,	/* long_desc */
+							   &wal_acceptors_list, /* valueAddr */
+							   "",	/* bootValue */
+							   PGC_POSTMASTER,
+							   GUC_LIST_INPUT,	/* extensions can't use*
+												 * GUC_LIST_QUOTE */
+							   NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+							"neon.safekeeper_reconnect_timeout",
+							"Walproposer reconnects to offline safekeepers once in this interval.",
+							NULL,
+							&wal_acceptor_reconnect_timeout,
+							1000, 0, INT_MAX,	/* default, min, max */
+							PGC_SIGHUP, /* context */
+							GUC_UNIT_MS,	/* flags */
+							NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+							"neon.safekeeper_connect_timeout",
+							"Connection or connection attempt to safekeeper is terminated if no message is received (or connection attempt doesn't finish) within this period.",
+							NULL,
+							&wal_acceptor_connection_timeout,
+							10000, 0, INT_MAX,
+							PGC_SIGHUP,
+							GUC_UNIT_MS,
+							NULL, NULL, NULL);
+}
+
+/*  Check if we need to suspend inserts because of lagging replication. */
+static uint64
+backpressure_lag_impl(void)
+{
+	if (max_replication_apply_lag > 0 || max_replication_flush_lag > 0 || max_replication_write_lag > 0)
+	{
+		XLogRecPtr	writePtr;
+		XLogRecPtr	flushPtr;
+		XLogRecPtr	applyPtr;
+#if PG_VERSION_NUM >= 150000
+		XLogRecPtr	myFlushLsn = GetFlushRecPtr(NULL);
+#else
+		XLogRecPtr	myFlushLsn = GetFlushRecPtr();
+#endif
+		replication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr);
+#define MB ((XLogRecPtr)1024 * 1024)
+
+		elog(DEBUG2, "current flushLsn %X/%X PageserverFeedback: write %X/%X flush %X/%X apply %X/%X",
+			 LSN_FORMAT_ARGS(myFlushLsn),
+			 LSN_FORMAT_ARGS(writePtr),
+			 LSN_FORMAT_ARGS(flushPtr),
+			 LSN_FORMAT_ARGS(applyPtr));
+
+		if ((writePtr != InvalidXLogRecPtr && max_replication_write_lag > 0 && myFlushLsn > writePtr + max_replication_write_lag * MB))
+		{
+			return (myFlushLsn - writePtr - max_replication_write_lag * MB);
+		}
+
+		if ((flushPtr != InvalidXLogRecPtr && max_replication_flush_lag > 0 && myFlushLsn > flushPtr + max_replication_flush_lag * MB))
+		{
+			return (myFlushLsn - flushPtr - max_replication_flush_lag * MB);
+		}
+
+		if ((applyPtr != InvalidXLogRecPtr && max_replication_apply_lag > 0 && myFlushLsn > applyPtr + max_replication_apply_lag * MB))
+		{
+			return (myFlushLsn - applyPtr - max_replication_apply_lag * MB);
+		}
+	}
+	return 0;
+}
+
+/*
+ * WalproposerShmemSize --- report amount of shared memory space needed
+ */
+static Size
+WalproposerShmemSize(void)
+{
+	return sizeof(WalproposerShmemState);
+}
+
+static bool
+WalproposerShmemInit(void)
+{
+	bool		found;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	walprop_shared = ShmemInitStruct("Walproposer shared state",
+									 sizeof(WalproposerShmemState),
+									 &found);
+
+	if (!found)
+	{
+		memset(walprop_shared, 0, WalproposerShmemSize());
+		SpinLockInit(&walprop_shared->mutex);
+		pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
+	}
+	LWLockRelease(AddinShmemInitLock);
+
+	return found;
+}
+
+#define BACK_PRESSURE_DELAY 10000L // 0.01 sec
+
+static bool
+backpressure_throttling_impl(void)
+{
+	int64		lag;
+	TimestampTz start,
+				stop;
+	bool		retry = PrevProcessInterruptsCallback
+		? PrevProcessInterruptsCallback()
+		: false;
+
+	/*
+	 * Don't throttle read only transactions or wal sender. Do throttle CREATE
+	 * INDEX CONCURRENTLY, however. It performs some stages outside a
+	 * transaction, even though it writes a lot of WAL. Check PROC_IN_SAFE_IC
+	 * flag to cover that case.
+	 */
+	if (am_walsender
+		|| (!(MyProc->statusFlags & PROC_IN_SAFE_IC)
+			&& !TransactionIdIsValid(GetCurrentTransactionIdIfAny())))
+		return retry;
+
+	/* Calculate replicas lag */
+	lag = backpressure_lag_impl();
+	if (lag == 0)
+		return retry;
+
+	/* Suspend writers until replicas catch up */
+	set_ps_display("backpressure throttling");
+
+	elog(DEBUG2, "backpressure throttling: lag %lu", lag);
+	start = GetCurrentTimestamp();
+	pg_usleep(BACK_PRESSURE_DELAY);
+	stop = GetCurrentTimestamp();
+	pg_atomic_add_fetch_u64(&walprop_shared->backpressureThrottlingTime, stop - start);
+	return true;
+}
+
+uint64
+BackpressureThrottlingTime(void)
+{
+	return pg_atomic_read_u64(&walprop_shared->backpressureThrottlingTime);
+}
+
+/*
+ * Register a background worker proposing WAL to wal acceptors.
+ */
+static void
+walprop_register_bgworker(void)
+{
+	BackgroundWorker bgw;
+
+	/* If no wal acceptors are specified, don't start the background worker. */
+	if (*wal_acceptors_list == '\0')
+		return;
+
+	memset(&bgw, 0, sizeof(bgw));
+	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
+	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon");
+	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "WalProposerMain");
+	snprintf(bgw.bgw_name, BGW_MAXLEN, "WAL proposer");
+	snprintf(bgw.bgw_type, BGW_MAXLEN, "WAL proposer");
+	bgw.bgw_restart_time = 5;
+	bgw.bgw_notify_pid = 0;
+	bgw.bgw_main_arg = (Datum) 0;
+
+	RegisterBackgroundWorker(&bgw);
+}
+
+/* shmem handling */
+
+static void
+nwp_prepare_shmem(void)
+{
+#if PG_VERSION_NUM >= 150000
+	prev_shmem_request_hook = shmem_request_hook;
+	shmem_request_hook = walproposer_shmem_request;
+#else
+	RequestAddinShmemSpace(WalproposerShmemSize());
+#endif
+	prev_shmem_startup_hook_type = shmem_startup_hook;
+	shmem_startup_hook = nwp_shmem_startup_hook;
+}
+
+#if PG_VERSION_NUM >= 150000
+/*
+ * shmem_request hook: request additional shared resources.  We'll allocate or
+ * attach to the shared resources in nwp_shmem_startup_hook().
+ */
+static void
+walproposer_shmem_request(void)
+{
+	if (prev_shmem_request_hook)
+		prev_shmem_request_hook();
+
+	RequestAddinShmemSpace(WalproposerShmemSize());
+}
+#endif
+
+static void
+nwp_shmem_startup_hook(void)
+{
+	if (prev_shmem_startup_hook_type)
+		prev_shmem_startup_hook_type();
+
+	WalproposerShmemInit();
+}
+
+static WalproposerShmemState *
+walprop_pg_get_shmem_state(void)
+{
+	Assert(walprop_shared != NULL);
+	return walprop_shared;
+}
+
+void
+replication_feedback_set(PageserverFeedback *rf)
+{
+	SpinLockAcquire(&walprop_shared->mutex);
+	memcpy(&walprop_shared->feedback, rf, sizeof(PageserverFeedback));
+	SpinLockRelease(&walprop_shared->mutex);
+}
+
+void
+replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn)
+{
+	SpinLockAcquire(&walprop_shared->mutex);
+	*writeLsn = walprop_shared->feedback.last_received_lsn;
+	*flushLsn = walprop_shared->feedback.disk_consistent_lsn;
+	*applyLsn = walprop_shared->feedback.remote_consistent_lsn;
+	SpinLockRelease(&walprop_shared->mutex);
+}
+
+/*
+ * Start walsender streaming replication
+ */
+static void
+walprop_pg_start_streaming(WalProposer *wp, XLogRecPtr startpos)
+{
+	StartReplicationCmd cmd;
+
+	elog(LOG, "WAL proposer starts streaming at %X/%X",
+		 LSN_FORMAT_ARGS(startpos));
+	cmd.slotname = WAL_PROPOSER_SLOT_NAME;
+	cmd.timeline = wp->greetRequest.timeline;
+	cmd.startpoint = startpos;
+	StartProposerReplication(wp, &cmd);
+}
+
+static void
+walprop_pg_init_walsender(void)
+{
+	am_walsender = true;
+	InitWalSender();
+	InitProcessPhase2();
+
+	/* Create replication slot for WAL proposer if not exists */
+	if (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL)
+	{
+		ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false);
+		ReplicationSlotReserveWal();
+		/* Write this slot to disk */
+		ReplicationSlotMarkDirty();
+		ReplicationSlotSave();
+		ReplicationSlotRelease();
+	}
+}
+
+static void
+walprop_pg_init_standalone_sync_safekeepers(void)
+{
+	struct stat stat_buf;
+
+#if PG_VERSION_NUM < 150000
+	ThisTimeLineID = 1;
+#endif
+
+	/*
+	 * Initialize postmaster_alive_fds as WaitEventSet checks them.
+	 *
+	 * Copied from InitPostmasterDeathWatchHandle()
+	 */
+	if (pipe(postmaster_alive_fds) < 0)
+		ereport(FATAL,
+				(errcode_for_file_access(),
+				 errmsg_internal("could not create pipe to monitor postmaster death: %m")));
+	if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
+		ereport(FATAL,
+				(errcode_for_socket_access(),
+				 errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
+
+	ChangeToDataDir();
+
+	/* Create pg_wal directory, if it doesn't exist */
+	if (stat(XLOGDIR, &stat_buf) != 0)
+	{
+		ereport(LOG, (errmsg("creating missing WAL directory \"%s\"", XLOGDIR)));
+		if (MakePGDirectory(XLOGDIR) < 0)
+		{
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create directory \"%s\": %m",
+							XLOGDIR)));
+			exit(1);
+		}
+	}
+	BackgroundWorkerUnblockSignals();
+}
+
+static void
+walprop_pg_init_bgworker(void)
+{
+#if PG_VERSION_NUM >= 150000
+	TimeLineID	tli;
+#endif
+
+	/* Establish signal handlers. */
+	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+	pqsignal(SIGHUP, SignalHandlerForConfigReload);
+	pqsignal(SIGTERM, die);
+
+	BackgroundWorkerUnblockSignals();
+
+	application_name = (char *) "walproposer";	/* for
+												 * synchronous_standby_names */
+
+#if PG_VERSION_NUM >= 150000
+	/* FIXME pass proper tli to WalProposerInit ? */
+	GetXLogReplayRecPtr(&tli);
+#else
+	GetXLogReplayRecPtr(&ThisTimeLineID);
+#endif
+}
+
+static XLogRecPtr
+walprop_pg_get_flush_rec_ptr(void)
+{
+#if PG_MAJORVERSION_NUM < 15
+	return GetFlushRecPtr();
+#else
+	return GetFlushRecPtr(NULL);
+#endif
+}
+
+static TimestampTz
+walprop_pg_get_current_timestamp(void)
+{
+	return GetCurrentTimestamp();
+}
+
+static TimeLineID
+walprop_pg_get_timeline_id(void)
+{
+#if PG_VERSION_NUM >= 150000
+	/* FIXME don't use hardcoded timeline id */
+	return 1;
+#else
+	return ThisTimeLineID;
+#endif
+}
+
+static void
+walprop_pg_load_libpqwalreceiver(void)
+{
+	load_file("libpqwalreceiver", false);
+	if (WalReceiverFunctions == NULL)
+		elog(ERROR, "libpqwalreceiver didn't initialize correctly");
+}
+
+/* Header in walproposer.h -- Wrapper struct to abstract away the libpq connection */
+struct WalProposerConn
+{
+	PGconn	   *pg_conn;
+	bool		is_nonblocking; /* whether the connection is non-blocking */
+	char	   *recvbuf;		/* last received data from walprop_async_read */
+};
+
+/* Helper function */
+static bool
+ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
+{
+	/* If we're already correctly blocking or nonblocking, all good */
+	if (is_nonblocking == conn->is_nonblocking)
+		return true;
+
+	/* Otherwise, set it appropriately */
+	if (PQsetnonblocking(conn->pg_conn, is_nonblocking) == -1)
+		return false;
+
+	conn->is_nonblocking = is_nonblocking;
+	return true;
+}
+
+/* Exported function definitions */
+static char *
+walprop_error_message(WalProposerConn *conn)
+{
+	return PQerrorMessage(conn->pg_conn);
+}
+
+static WalProposerConnStatusType
+walprop_status(WalProposerConn *conn)
+{
+	switch (PQstatus(conn->pg_conn))
+	{
+		case CONNECTION_OK:
+			return WP_CONNECTION_OK;
+		case CONNECTION_BAD:
+			return WP_CONNECTION_BAD;
+		default:
+			return WP_CONNECTION_IN_PROGRESS;
+	}
+}
+
+static WalProposerConn *
+walprop_connect_start(char *conninfo)
+{
+	WalProposerConn *conn;
+	PGconn	   *pg_conn;
+	const char *keywords[3];
+	const char *values[3];
+	int			n;
+	char	   *password = neon_auth_token;
+
+	/*
+	 * Connect using the given connection string. If the NEON_AUTH_TOKEN
+	 * environment variable was set, use that as the password.
+	 *
+	 * The connection options are parsed in the order they're given, so when
+	 * we set the password before the connection string, the connection string
+	 * can override the password from the env variable. Seems useful, although
+	 * we don't currently use that capability anywhere.
+	 */
+	n = 0;
+	if (password)
+	{
+		keywords[n] = "password";
+		values[n] = password;
+		n++;
+	}
+	keywords[n] = "dbname";
+	values[n] = conninfo;
+	n++;
+	keywords[n] = NULL;
+	values[n] = NULL;
+	n++;
+	pg_conn = PQconnectStartParams(keywords, values, 1);
+
+	/*
+	 * Allocation of a PQconn can fail, and will return NULL. We want to fully
+	 * replicate the behavior of PQconnectStart here.
+	 */
+	if (!pg_conn)
+		return NULL;
+
+	/*
+	 * And in theory this allocation can fail as well, but it's incredibly
+	 * unlikely if we just successfully allocated a PGconn.
+	 *
+	 * palloc will exit on failure though, so there's not much we could do if
+	 * it *did* fail.
+	 */
+	conn = palloc(sizeof(WalProposerConn));
+	conn->pg_conn = pg_conn;
+	conn->is_nonblocking = false;	/* connections always start in blocking
+									 * mode */
+	conn->recvbuf = NULL;
+	return conn;
+}
+
+static WalProposerConnectPollStatusType
+walprop_connect_poll(WalProposerConn *conn)
+{
+	WalProposerConnectPollStatusType return_val;
+
+	switch (PQconnectPoll(conn->pg_conn))
+	{
+		case PGRES_POLLING_FAILED:
+			return_val = WP_CONN_POLLING_FAILED;
+			break;
+		case PGRES_POLLING_READING:
+			return_val = WP_CONN_POLLING_READING;
+			break;
+		case PGRES_POLLING_WRITING:
+			return_val = WP_CONN_POLLING_WRITING;
+			break;
+		case PGRES_POLLING_OK:
+			return_val = WP_CONN_POLLING_OK;
+			break;
+
+			/*
+			 * There's a comment at its source about this constant being
+			 * unused. We'll expect it's never returned.
+			 */
+		case PGRES_POLLING_ACTIVE:
+			elog(FATAL, "Unexpected PGRES_POLLING_ACTIVE returned from PQconnectPoll");
+
+			/*
+			 * This return is never actually reached, but it's here to make
+			 * the compiler happy
+			 */
+			return WP_CONN_POLLING_FAILED;
+
+		default:
+			Assert(false);
+			return_val = WP_CONN_POLLING_FAILED;	/* keep the compiler quiet */
+	}
+
+	return return_val;
+}
+
+static bool
+walprop_send_query(WalProposerConn *conn, char *query)
+{
+	/*
+	 * We need to be in blocking mode for sending the query to run without
+	 * requiring a call to PQflush
+	 */
+	if (!ensure_nonblocking_status(conn, false))
+		return false;
+
+	/* PQsendQuery returns 1 on success, 0 on failure */
+	if (!PQsendQuery(conn->pg_conn, query))
+		return false;
+
+	return true;
+}
+
+static WalProposerExecStatusType
+walprop_get_query_result(WalProposerConn *conn)
+{
+	PGresult   *result;
+	WalProposerExecStatusType return_val;
+
+	/* Marker variable if we need to log an unexpected success result */
+	char	   *unexpected_success = NULL;
+
+	/* Consume any input that we might be missing */
+	if (!PQconsumeInput(conn->pg_conn))
+		return WP_EXEC_FAILED;
+
+	if (PQisBusy(conn->pg_conn))
+		return WP_EXEC_NEEDS_INPUT;
+
+
+	result = PQgetResult(conn->pg_conn);
+
+	/*
+	 * PQgetResult returns NULL only if getting the result was successful &
+	 * there's no more of the result to get.
+	 */
+	if (!result)
+	{
+		elog(WARNING, "[libpqwalproposer] Unexpected successful end of command results");
+		return WP_EXEC_UNEXPECTED_SUCCESS;
+	}
+
+	/* Helper macro to reduce boilerplate */
+#define UNEXPECTED_SUCCESS(msg) \
+		return_val = WP_EXEC_UNEXPECTED_SUCCESS; \
+		unexpected_success = msg; \
+		break;
+
+
+	switch (PQresultStatus(result))
+	{
+			/* "true" success case */
+		case PGRES_COPY_BOTH:
+			return_val = WP_EXEC_SUCCESS_COPYBOTH;
+			break;
+
+			/* Unexpected success case */
+		case PGRES_EMPTY_QUERY:
+			UNEXPECTED_SUCCESS("empty query return");
+		case PGRES_COMMAND_OK:
+			UNEXPECTED_SUCCESS("data-less command end");
+		case PGRES_TUPLES_OK:
+			UNEXPECTED_SUCCESS("tuples return");
+		case PGRES_COPY_OUT:
+			UNEXPECTED_SUCCESS("'Copy Out' response");
+		case PGRES_COPY_IN:
+			UNEXPECTED_SUCCESS("'Copy In' response");
+		case PGRES_SINGLE_TUPLE:
+			UNEXPECTED_SUCCESS("single tuple return");
+		case PGRES_PIPELINE_SYNC:
+			UNEXPECTED_SUCCESS("pipeline sync point");
+
+			/* Failure cases */
+		case PGRES_BAD_RESPONSE:
+		case PGRES_NONFATAL_ERROR:
+		case PGRES_FATAL_ERROR:
+		case PGRES_PIPELINE_ABORTED:
+			return_val = WP_EXEC_FAILED;
+			break;
+
+		default:
+			Assert(false);
+			return_val = WP_EXEC_FAILED;	/* keep the compiler quiet */
+	}
+
+	if (unexpected_success)
+		elog(WARNING, "[libpqwalproposer] Unexpected successful %s", unexpected_success);
+
+	return return_val;
+}
+
+static pgsocket
+walprop_socket(WalProposerConn *conn)
+{
+	return PQsocket(conn->pg_conn);
+}
+
+static int
+walprop_flush(WalProposerConn *conn)
+{
+	return (PQflush(conn->pg_conn));
+}
+
+static void
+walprop_finish(WalProposerConn *conn)
+{
+	if (conn->recvbuf != NULL)
+		PQfreemem(conn->recvbuf);
+	PQfinish(conn->pg_conn);
+	pfree(conn);
+}
+
+/*
+ * Receive a message from the safekeeper.
+ *
+ * On success, the data is placed in *buf. It is valid until the next call
+ * to this function.
+ */
+static PGAsyncReadResult
+walprop_async_read(WalProposerConn *conn, char **buf, int *amount)
+{
+	int			result;
+
+	if (conn->recvbuf != NULL)
+	{
+		PQfreemem(conn->recvbuf);
+		conn->recvbuf = NULL;
+	}
+
+	/* Call PQconsumeInput so that we have the data we need */
+	if (!PQconsumeInput(conn->pg_conn))
+	{
+		*amount = 0;
+		*buf = NULL;
+		return PG_ASYNC_READ_FAIL;
+	}
+
+	/*
+	 * The docs for PQgetCopyData list the return values as: 0 if the copy is
+	 * still in progress, but no "complete row" is available -1 if the copy is
+	 * done -2 if an error occurred (> 0) if it was successful; that value is
+	 * the amount transferred.
+	 *
+	 * The protocol we use between walproposer and safekeeper means that we
+	 * *usually* wouldn't expect to see that the copy is done, but this can
+	 * sometimes be triggered by the server returning an ErrorResponse (which
+	 * also happens to have the effect that the copy is done).
+	 */
+	switch (result = PQgetCopyData(conn->pg_conn, &conn->recvbuf, true))
+	{
+		case 0:
+			*amount = 0;
+			*buf = NULL;
+			return PG_ASYNC_READ_TRY_AGAIN;
+		case -1:
+			{
+				/*
+				 * If we get -1, it's probably because of a server error; the
+				 * safekeeper won't normally send a CopyDone message.
+				 *
+				 * We can check PQgetResult to make sure that the server
+				 * failed; it'll always result in PGRES_FATAL_ERROR
+				 */
+				ExecStatusType status = PQresultStatus(PQgetResult(conn->pg_conn));
+
+				if (status != PGRES_FATAL_ERROR)
+					elog(FATAL, "unexpected result status %d after failed PQgetCopyData", status);
+
+				/*
+				 * If there was actually an error, it'll be properly reported
+				 * by calls to PQerrorMessage -- we don't have to do anything
+				 * else
+				 */
+				*amount = 0;
+				*buf = NULL;
+				return PG_ASYNC_READ_FAIL;
+			}
+		case -2:
+			*amount = 0;
+			*buf = NULL;
+			return PG_ASYNC_READ_FAIL;
+		default:
+			/* Positive values indicate the size of the returned result */
+			*amount = result;
+			*buf = conn->recvbuf;
+			return PG_ASYNC_READ_SUCCESS;
+	}
+}
+
+static PGAsyncWriteResult
+walprop_async_write(WalProposerConn *conn, void const *buf, size_t size)
+{
+	int			result;
+
+	/* If we aren't in non-blocking mode, switch to it. */
+	if (!ensure_nonblocking_status(conn, true))
+		return PG_ASYNC_WRITE_FAIL;
+
+	/*
+	 * The docs for PQputcopyData list the return values as: 1 if the data was
+	 * queued, 0 if it was not queued because of full buffers, or -1 if an
+	 * error occurred
+	 */
+	result = PQputCopyData(conn->pg_conn, buf, size);
+
+	/*
+	 * We won't get a result of zero because walproposer always empties the
+	 * connection's buffers before sending more
+	 */
+	Assert(result != 0);
+
+	switch (result)
+	{
+		case 1:
+			/* good -- continue */
+			break;
+		case -1:
+			return PG_ASYNC_WRITE_FAIL;
+		default:
+			elog(FATAL, "invalid return %d from PQputCopyData", result);
+	}
+
+	/*
+	 * After queueing the data, we still need to flush to get it to send. This
+	 * might take multiple tries, but we don't want to wait around until it's
+	 * done.
+	 *
+	 * PQflush has the following returns (directly quoting the docs): 0 if
+	 * sucessful, 1 if it was unable to send all the data in the send queue
+	 * yet -1 if it failed for some reason
+	 */
+	switch (result = PQflush(conn->pg_conn))
+	{
+		case 0:
+			return PG_ASYNC_WRITE_SUCCESS;
+		case 1:
+			return PG_ASYNC_WRITE_TRY_FLUSH;
+		case -1:
+			return PG_ASYNC_WRITE_FAIL;
+		default:
+			elog(FATAL, "invalid return %d from PQflush", result);
+	}
+}
+
+/*
+ * This function is very similar to walprop_async_write. For more
+ * information, refer to the comments there.
+ */
+static bool
+walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size)
+{
+	int			result;
+
+	/* If we are in non-blocking mode, switch out of it. */
+	if (!ensure_nonblocking_status(conn, false))
+		return false;
+
+	if ((result = PQputCopyData(conn->pg_conn, buf, size)) == -1)
+		return false;
+
+	Assert(result == 1);
+
+	/* Because the connection is non-blocking, flushing returns 0 or -1 */
+
+	if ((result = PQflush(conn->pg_conn)) == -1)
+		return false;
+
+	Assert(result == 0);
+	return true;
+}
+
+/*
+ * Subscribe for new WAL and stream it in the loop to safekeepers.
+ *
+ * At the moment, this never returns, but an ereport(ERROR) will take us back
+ * to the main loop.
+ */
+static void
+StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd)
+{
+	XLogRecPtr	FlushPtr;
+	TimeLineID	currTLI;
+
+#if PG_VERSION_NUM < 150000
+	if (ThisTimeLineID == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("IDENTIFY_SYSTEM has not been run before START_REPLICATION")));
+#endif
+
+	/*
+	 * We assume here that we're logging enough information in the WAL for
+	 * log-shipping, since this is checked in PostmasterMain().
+	 *
+	 * NOTE: wal_level can only change at shutdown, so in most cases it is
+	 * difficult for there to be WAL data that we can still see that was
+	 * written at wal_level='minimal'.
+	 */
+
+	if (cmd->slotname)
+	{
+		ReplicationSlotAcquire(cmd->slotname, true);
+		if (SlotIsLogical(MyReplicationSlot))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot use a logical replication slot for physical replication")));
+
+		/*
+		 * We don't need to verify the slot's restart_lsn here; instead we
+		 * rely on the caller requesting the starting point to use.  If the
+		 * WAL segment doesn't exist, we'll fail later.
+		 */
+	}
+
+	/*
+	 * Select the timeline. If it was given explicitly by the client, use
+	 * that. Otherwise use the timeline of the last replayed record, which is
+	 * kept in ThisTimeLineID.
+	 *
+	 * Neon doesn't currently use PG Timelines, but it may in the future, so
+	 * we keep this code around to lighten the load for when we need it.
+	 */
+#if PG_VERSION_NUM >= 150000
+	FlushPtr = GetFlushRecPtr(&currTLI);
+#else
+	FlushPtr = GetFlushRecPtr();
+	currTLI = ThisTimeLineID;
+#endif
+
+	/*
+	 * When we first start replication the standby will be behind the primary.
+	 * For some applications, for example synchronous replication, it is
+	 * important to have a clear state for this initial catchup mode, so we
+	 * can trigger actions when we change streaming state later. We may stay
+	 * in this state for a long time, which is exactly why we want to be able
+	 * to monitor whether or not we are still here.
+	 */
+	WalSndSetState(WALSNDSTATE_CATCHUP);
+
+	/*
+	 * Don't allow a request to stream from a future point in WAL that hasn't
+	 * been flushed to disk in this server yet.
+	 */
+	if (FlushPtr < cmd->startpoint)
+	{
+		ereport(ERROR,
+				(errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",
+						LSN_FORMAT_ARGS(cmd->startpoint),
+						LSN_FORMAT_ARGS(FlushPtr))));
+	}
+
+	/* Start streaming from the requested point */
+	sentPtr = cmd->startpoint;
+
+	/* Initialize shared memory status, too */
+	SpinLockAcquire(&MyWalSnd->mutex);
+	MyWalSnd->sentPtr = sentPtr;
+	SpinLockRelease(&MyWalSnd->mutex);
+
+	SyncRepInitConfig();
+
+	/* Infinite send loop, never returns */
+	WalSndLoop(wp);
+
+	WalSndSetState(WALSNDSTATE_STARTUP);
+
+	if (cmd->slotname)
+		ReplicationSlotRelease();
+}
+
+/*
+ * Main loop that waits for LSN updates and calls the walproposer.
+ * Synchronous replication sets latch in WalSndWakeup at walsender.c
+ */
+static void
+WalSndLoop(WalProposer *wp)
+{
+	/* Clear any already-pending wakeups */
+	ResetLatch(MyLatch);
+
+	for (;;)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		XLogBroadcastWalProposer(wp);
+
+		if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
+			WalSndSetState(WALSNDSTATE_STREAMING);
+		WalProposerPoll(wp);
+	}
+}
+
+/*
+ * Notify walproposer about the new WAL position.
+ */
+static void
+XLogBroadcastWalProposer(WalProposer *wp)
+{
+	XLogRecPtr	startptr;
+	XLogRecPtr	endptr;
+
+	/* Start from the last sent position */
+	startptr = sentPtr;
+
+	/*
+	 * Streaming the current timeline on a primary.
+	 *
+	 * Attempt to send all data that's already been written out and fsync'd to
+	 * disk.  We cannot go further than what's been written out given the
+	 * current implementation of WALRead().  And in any case it's unsafe to
+	 * send WAL that is not securely down to disk on the primary: if the
+	 * primary subsequently crashes and restarts, standbys must not have
+	 * applied any WAL that got lost on the primary.
+	 */
+#if PG_VERSION_NUM >= 150000
+	endptr = GetFlushRecPtr(NULL);
+#else
+	endptr = GetFlushRecPtr();
+#endif
+
+	/*
+	 * Record the current system time as an approximation of the time at which
+	 * this WAL location was written for the purposes of lag tracking.
+	 *
+	 * In theory we could make XLogFlush() record a time in shmem whenever WAL
+	 * is flushed and we could get that time as well as the LSN when we call
+	 * GetFlushRecPtr() above (and likewise for the cascading standby
+	 * equivalent), but rather than putting any new code into the hot WAL path
+	 * it seems good enough to capture the time here.  We should reach this
+	 * after XLogFlush() runs WalSndWakeupProcessRequests(), and although that
+	 * may take some time, we read the WAL flush pointer and take the time
+	 * very close to together here so that we'll get a later position if it is
+	 * still moving.
+	 *
+	 * Because LagTrackerWrite ignores samples when the LSN hasn't advanced,
+	 * this gives us a cheap approximation for the WAL flush time for this
+	 * LSN.
+	 *
+	 * Note that the LSN is not necessarily the LSN for the data contained in
+	 * the present message; it's the end of the WAL, which might be further
+	 * ahead.  All the lag tracking machinery cares about is finding out when
+	 * that arbitrary LSN is eventually reported as written, flushed and
+	 * applied, so that it can measure the elapsed time.
+	 */
+	LagTrackerWrite(endptr, GetCurrentTimestamp());
+
+	/* Do we have any work to do? */
+	Assert(startptr <= endptr);
+	if (endptr <= startptr)
+		return;
+
+	WalProposerBroadcast(wp, startptr, endptr);
+	sentPtr = endptr;
+
+	/* Update shared memory status */
+	{
+		WalSnd	   *walsnd = MyWalSnd;
+
+		SpinLockAcquire(&walsnd->mutex);
+		walsnd->sentPtr = sentPtr;
+		SpinLockRelease(&walsnd->mutex);
+	}
+
+	/* Report progress of XLOG streaming in PS display */
+	if (update_process_title)
+	{
+		char		activitymsg[50];
+
+		snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
+				 LSN_FORMAT_ARGS(sentPtr));
+		set_ps_display(activitymsg);
+	}
+}
+
+/*
+ * Receive WAL from most advanced safekeeper
+ */
+static bool
+WalProposerRecovery(Safekeeper *sk, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos)
+{
+	char	   *err;
+	WalReceiverConn *wrconn;
+	WalRcvStreamOptions options;
+	char		conninfo[MAXCONNINFO];
+
+	if (!neon_auth_token)
+	{
+		memcpy(conninfo, sk->conninfo, MAXCONNINFO);
+	}
+	else
+	{
+		int			written = 0;
+
+		written = snprintf((char *) conninfo, MAXCONNINFO, "password=%s %s", neon_auth_token, sk->conninfo);
+		if (written > MAXCONNINFO || written < 0)
+			elog(FATAL, "could not append password to the safekeeper connection string");
+	}
+
+#if PG_MAJORVERSION_NUM < 16
+	wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err);
+#else
+	wrconn = walrcv_connect(conninfo, false, false, "wal_proposer_recovery", &err);
+#endif
+
+	if (!wrconn)
+	{
+		ereport(WARNING,
+				(errmsg("could not connect to WAL acceptor %s:%s: %s",
+						sk->host, sk->port,
+						err)));
+		return false;
+	}
+	elog(LOG,
+		 "start recovery from %s:%s starting from %X/%08X till %X/%08X timeline "
+		 "%d",
+		 sk->host, sk->port, (uint32) (startpos >> 32),
+		 (uint32) startpos, (uint32) (endpos >> 32), (uint32) endpos, timeline);
+
+	options.logical = false;
+	options.startpoint = startpos;
+	options.slotname = NULL;
+	options.proto.physical.startpointTLI = timeline;
+
+	if (walrcv_startstreaming(wrconn, &options))
+	{
+		XLogRecPtr	rec_start_lsn;
+		XLogRecPtr	rec_end_lsn = 0;
+		int			len;
+		char	   *buf;
+		pgsocket	wait_fd = PGINVALID_SOCKET;
+
+		while ((len = walrcv_receive(wrconn, &buf, &wait_fd)) >= 0)
+		{
+			if (len == 0)
+			{
+				(void) WaitLatchOrSocket(
+										 MyLatch, WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE, wait_fd,
+										 -1, WAIT_EVENT_WAL_RECEIVER_MAIN);
+			}
+			else
+			{
+				Assert(buf[0] == 'w' || buf[0] == 'k');
+				if (buf[0] == 'k')
+					continue;	/* keepalive */
+				memcpy(&rec_start_lsn, &buf[XLOG_HDR_START_POS],
+					   sizeof rec_start_lsn);
+				rec_start_lsn = pg_ntoh64(rec_start_lsn);
+				rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE;
+
+				/* write WAL to disk */
+				XLogWalPropWrite(&buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
+
+				ereport(DEBUG1,
+						(errmsg("Recover message %X/%X length %d",
+								LSN_FORMAT_ARGS(rec_start_lsn), len)));
+				if (rec_end_lsn >= endpos)
+					break;
+			}
+		}
+		ereport(LOG,
+				(errmsg("end of replication stream at %X/%X: %m",
+						LSN_FORMAT_ARGS(rec_end_lsn))));
+		walrcv_disconnect(wrconn);
+
+		/* failed to receive all WAL till endpos */
+		if (rec_end_lsn < endpos)
+			return false;
+	}
+	else
+	{
+		ereport(LOG,
+				(errmsg("primary server contains no more WAL on requested timeline %u LSN %X/%08X",
+						timeline, (uint32) (startpos >> 32), (uint32) startpos)));
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * These variables are used similarly to openLogFile/SegNo,
+ * but for walproposer to write the XLOG during recovery. walpropFileTLI is the TimeLineID
+ * corresponding the filename of walpropFile.
+ */
+static int	walpropFile = -1;
+static TimeLineID walpropFileTLI = 0;
+static XLogSegNo walpropSegNo = 0;
+
+/*
+ * Write XLOG data to disk.
+ */
+static void
+XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr)
+{
+	int			startoff;
+	int			byteswritten;
+
+	while (nbytes > 0)
+	{
+		int			segbytes;
+
+		/* Close the current segment if it's completed */
+		if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size))
+			XLogWalPropClose(recptr);
+
+		if (walpropFile < 0)
+		{
+#if PG_VERSION_NUM >= 150000
+			/* FIXME Is it ok to use hardcoded value here? */
+			TimeLineID	tli = 1;
+#else
+			bool		use_existent = true;
+#endif
+			/* Create/use new log file */
+			XLByteToSeg(recptr, walpropSegNo, wal_segment_size);
+#if PG_VERSION_NUM >= 150000
+			walpropFile = XLogFileInit(walpropSegNo, tli);
+			walpropFileTLI = tli;
+#else
+			walpropFile = XLogFileInit(walpropSegNo, &use_existent, false);
+			walpropFileTLI = ThisTimeLineID;
+#endif
+		}
+
+		/* Calculate the start offset of the received logs */
+		startoff = XLogSegmentOffset(recptr, wal_segment_size);
+
+		if (startoff + nbytes > wal_segment_size)
+			segbytes = wal_segment_size - startoff;
+		else
+			segbytes = nbytes;
+
+		/* OK to write the logs */
+		errno = 0;
+
+		byteswritten = pg_pwrite(walpropFile, buf, segbytes, (off_t) startoff);
+		if (byteswritten <= 0)
+		{
+			char		xlogfname[MAXFNAMELEN];
+			int			save_errno;
+
+			/* if write didn't set errno, assume no disk space */
+			if (errno == 0)
+				errno = ENOSPC;
+
+			save_errno = errno;
+			XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size);
+			errno = save_errno;
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not write to log segment %s "
+							"at offset %u, length %lu: %m",
+							xlogfname, startoff, (unsigned long) segbytes)));
+		}
+
+		/* Update state for write */
+		recptr += byteswritten;
+
+		nbytes -= byteswritten;
+		buf += byteswritten;
+	}
+
+	/*
+	 * Close the current segment if it's fully written up in the last cycle of
+	 * the loop.
+	 */
+	if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size))
+	{
+		XLogWalPropClose(recptr);
+	}
+}
+
+/*
+ * Close the current segment.
+ */
+static void
+XLogWalPropClose(XLogRecPtr recptr)
+{
+	Assert(walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size));
+
+	if (close(walpropFile) != 0)
+	{
+		char		xlogfname[MAXFNAMELEN];
+
+		XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size);
+
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg("could not close log segment %s: %m",
+						xlogfname)));
+	}
+
+	walpropFile = -1;
+}
+
+static void
+walprop_pg_wal_read(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count)
+{
+	WALReadError errinfo;
+
+	if (!WALRead(state,
+				 buf,
+				 startptr,
+				 count,
+				 walprop_pg_get_timeline_id(),
+				 &errinfo))
+	{
+		WALReadRaiseError(&errinfo);
+	}
+}
+
+static XLogReaderState *
+walprop_pg_wal_reader_allocate(void)
+{
+	return XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(.segment_open = wal_segment_open,.segment_close = wal_segment_close), NULL);
+}
+
+static WaitEventSet *waitEvents;
+
+static void
+walprop_pg_free_event_set(void)
+{
+	if (waitEvents)
+	{
+		FreeWaitEventSet(waitEvents);
+		waitEvents = NULL;
+	}
+}
+
+static void
+walprop_pg_init_event_set(int n_safekeepers)
+{
+	if (waitEvents)
+		elog(FATAL, "double-initialization of event set");
+
+	waitEvents = CreateWaitEventSet(TopMemoryContext, 2 + n_safekeepers);
+	AddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET,
+					  MyLatch, NULL);
+	AddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
+					  NULL, NULL);
+}
+
+static void
+walprop_pg_update_event_set(Safekeeper *sk, uint32 events)
+{
+	/* eventPos = -1 when we don't have an event */
+	Assert(sk->eventPos != -1);
+
+	ModifyWaitEvent(waitEvents, sk->eventPos, events, NULL);
+}
+
+static void
+walprop_pg_add_safekeeper_event_set(Safekeeper *sk, uint32 events)
+{
+	sk->eventPos = AddWaitEventToSet(waitEvents, events, walprop_socket(sk->conn), NULL, sk);
+}
+
+static int
+walprop_pg_wait_event_set(long timeout, Safekeeper **sk, uint32 *events)
+{
+	WaitEvent	event = {0};
+	int			rc = 0;
+	bool		late_cv_trigger = false;
+
+	*sk = NULL;
+	*events = 0;
+
+#if PG_MAJORVERSION_NUM >= 16
+	if (WalSndCtl != NULL)
+		ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
+#endif
+
+	/*
+	 * Wait for a wait event to happen, or timeout: - Safekeeper socket can
+	 * become available for READ or WRITE - Our latch got set, because *
+	 * PG15-: We got woken up by a process triggering the WalSender * PG16+:
+	 * WalSndCtl->wal_flush_cv was triggered
+	 */
+	rc = WaitEventSetWait(waitEvents, timeout,
+						  &event, 1, WAIT_EVENT_WAL_SENDER_MAIN);
+#if PG_MAJORVERSION_NUM >= 16
+	if (WalSndCtl != NULL)
+		late_cv_trigger = ConditionVariableCancelSleep();
+#endif
+
+	/*
+	 * If wait is terminated by latch set (walsenders' latch is set on each
+	 * wal flush). (no need for pm death check due to WL_EXIT_ON_PM_DEATH)
+	 */
+	if ((rc == 1 && event.events & WL_LATCH_SET) || late_cv_trigger)
+	{
+		/* Reset our latch */
+		ResetLatch(MyLatch);
+		*events = WL_LATCH_SET;
+		return 1;
+	}
+
+	/*
+	 * If the event contains something about the socket, it means we got an
+	 * event from a safekeeper socket.
+	 */
+	if (rc == 1 && (event.events & (WL_SOCKET_MASK)))
+	{
+		*sk = (Safekeeper *) event.user_data;
+		*events = event.events;
+		return 1;
+	}
+
+	/* XXX: Can we have non-timeout event here? */
+	*events = event.events;
+	return rc;
+}
+
+static void
+walprop_pg_finish_sync_safekeepers(XLogRecPtr lsn)
+{
+	fprintf(stdout, "%X/%X\n", LSN_FORMAT_ARGS(lsn));
+	exit(0);
+}
+
+/*
+ * Get PageserverFeedback fields from the most advanced safekeeper
+ */
+static void
+GetLatestNeonFeedback(PageserverFeedback *rf, WalProposer *wp)
+{
+	int			latest_safekeeper = 0;
+	XLogRecPtr	last_received_lsn = InvalidXLogRecPtr;
+
+	for (int i = 0; i < wp->n_safekeepers; i++)
+	{
+		if (wp->safekeeper[i].appendResponse.rf.last_received_lsn > last_received_lsn)
+		{
+			latest_safekeeper = i;
+			last_received_lsn = wp->safekeeper[i].appendResponse.rf.last_received_lsn;
+		}
+	}
+
+	rf->currentClusterSize = wp->safekeeper[latest_safekeeper].appendResponse.rf.currentClusterSize;
+	rf->last_received_lsn = wp->safekeeper[latest_safekeeper].appendResponse.rf.last_received_lsn;
+	rf->disk_consistent_lsn = wp->safekeeper[latest_safekeeper].appendResponse.rf.disk_consistent_lsn;
+	rf->remote_consistent_lsn = wp->safekeeper[latest_safekeeper].appendResponse.rf.remote_consistent_lsn;
+	rf->replytime = wp->safekeeper[latest_safekeeper].appendResponse.rf.replytime;
+
+	elog(DEBUG2, "GetLatestNeonFeedback: currentClusterSize %lu,"
+		 " last_received_lsn %X/%X, disk_consistent_lsn %X/%X, remote_consistent_lsn %X/%X, replytime %lu",
+		 rf->currentClusterSize,
+		 LSN_FORMAT_ARGS(rf->last_received_lsn),
+		 LSN_FORMAT_ARGS(rf->disk_consistent_lsn),
+		 LSN_FORMAT_ARGS(rf->remote_consistent_lsn),
+		 rf->replytime);
+
+	replication_feedback_set(rf);
+}
+
+/*
+ * Combine hot standby feedbacks from all safekeepers.
+ */
+static void
+CombineHotStanbyFeedbacks(HotStandbyFeedback *hs, WalProposer *wp)
+{
+	hs->ts = 0;
+	hs->xmin.value = ~0;		/* largest unsigned value */
+	hs->catalog_xmin.value = ~0;	/* largest unsigned value */
+
+	for (int i = 0; i < wp->n_safekeepers; i++)
+	{
+		if (wp->safekeeper[i].appendResponse.hs.ts != 0)
+		{
+			HotStandbyFeedback *skhs = &wp->safekeeper[i].appendResponse.hs;
+
+			if (FullTransactionIdIsNormal(skhs->xmin)
+				&& FullTransactionIdPrecedes(skhs->xmin, hs->xmin))
+			{
+				hs->xmin = skhs->xmin;
+				hs->ts = skhs->ts;
+			}
+			if (FullTransactionIdIsNormal(skhs->catalog_xmin)
+				&& FullTransactionIdPrecedes(skhs->catalog_xmin, hs->xmin))
+			{
+				hs->catalog_xmin = skhs->catalog_xmin;
+				hs->ts = skhs->ts;
+			}
+		}
+	}
+
+	if (hs->xmin.value == ~0)
+		hs->xmin = InvalidFullTransactionId;
+	if (hs->catalog_xmin.value == ~0)
+		hs->catalog_xmin = InvalidFullTransactionId;
+}
+
+static void
+walprop_pg_process_safekeeper_feedback(WalProposer *wp, XLogRecPtr commitLsn)
+{
+	HotStandbyFeedback hsFeedback;
+	XLogRecPtr	diskConsistentLsn;
+
+	diskConsistentLsn = quorumFeedback.rf.disk_consistent_lsn;
+
+	if (!wp->config->syncSafekeepers)
+	{
+		/* Get PageserverFeedback fields from the most advanced safekeeper */
+		GetLatestNeonFeedback(&quorumFeedback.rf, wp);
+		SetZenithCurrentClusterSize(quorumFeedback.rf.currentClusterSize);
+	}
+
+	if (commitLsn > quorumFeedback.flushLsn || diskConsistentLsn != quorumFeedback.rf.disk_consistent_lsn)
+	{
+
+		if (commitLsn > quorumFeedback.flushLsn)
+			quorumFeedback.flushLsn = commitLsn;
+
+		/* advance the replication slot */
+		if (!wp->config->syncSafekeepers)
+			ProcessStandbyReply(
+			/* write_lsn -  This is what durably stored in WAL service. */
+								quorumFeedback.flushLsn,
+			/* flush_lsn - This is what durably stored in WAL service. */
+								quorumFeedback.flushLsn,
+
+			/*
+			 * apply_lsn - This is what processed and durably saved at*
+			 * pageserver.
+			 */
+								quorumFeedback.rf.disk_consistent_lsn,
+								walprop_pg_get_current_timestamp(), false);
+	}
+
+	CombineHotStanbyFeedbacks(&hsFeedback, wp);
+	if (hsFeedback.ts != 0 && memcmp(&hsFeedback, &quorumFeedback.hs, sizeof hsFeedback) != 0)
+	{
+		quorumFeedback.hs = hsFeedback;
+		if (!wp->config->syncSafekeepers)
+			ProcessStandbyHSFeedback(hsFeedback.ts,
+									 XidFromFullTransactionId(hsFeedback.xmin),
+									 EpochFromFullTransactionId(hsFeedback.xmin),
+									 XidFromFullTransactionId(hsFeedback.catalog_xmin),
+									 EpochFromFullTransactionId(hsFeedback.catalog_xmin));
+	}
+}
+
+static void
+walprop_pg_confirm_wal_streamed(XLogRecPtr lsn)
+{
+	if (MyReplicationSlot)
+		PhysicalConfirmReceivedLocation(lsn);
+}
+
+static const walproposer_api walprop_pg = {
+	.get_shmem_state = walprop_pg_get_shmem_state,
+	.start_streaming = walprop_pg_start_streaming,
+	.get_flush_rec_ptr = walprop_pg_get_flush_rec_ptr,
+	.get_current_timestamp = walprop_pg_get_current_timestamp,
+	.get_timeline_id = walprop_pg_get_timeline_id,
+	.conn_error_message = walprop_error_message,
+	.conn_status = walprop_status,
+	.conn_connect_start = walprop_connect_start,
+	.conn_connect_poll = walprop_connect_poll,
+	.conn_send_query = walprop_send_query,
+	.conn_get_query_result = walprop_get_query_result,
+	.conn_flush = walprop_flush,
+	.conn_finish = walprop_finish,
+	.conn_async_read = walprop_async_read,
+	.conn_async_write = walprop_async_write,
+	.conn_blocking_write = walprop_blocking_write,
+	.recovery_download = WalProposerRecovery,
+	.wal_read = walprop_pg_wal_read,
+	.wal_reader_allocate = walprop_pg_wal_reader_allocate,
+	.free_event_set = walprop_pg_free_event_set,
+	.init_event_set = walprop_pg_init_event_set,
+	.update_event_set = walprop_pg_update_event_set,
+	.add_safekeeper_event_set = walprop_pg_add_safekeeper_event_set,
+	.wait_event_set = walprop_pg_wait_event_set,
+	.strong_random = pg_strong_random,
+	.get_redo_start_lsn = GetRedoStartLsn,
+	.finish_sync_safekeepers = walprop_pg_finish_sync_safekeepers,
+	.process_safekeeper_feedback = walprop_pg_process_safekeeper_feedback,
+	.confirm_wal_streamed = walprop_pg_confirm_wal_streamed,
+};
diff --git a/pgxn/neon/walproposer_utils.c b/pgxn/neon/walproposer_utils.c
deleted file mode 100644
index 05030360f6..0000000000
--- a/pgxn/neon/walproposer_utils.c
+++ /dev/null
@@ -1,659 +0,0 @@
-#include "postgres.h"
-
-#include "access/timeline.h"
-#include "access/xlogutils.h"
-#include "common/logging.h"
-#include "common/ip.h"
-#include "funcapi.h"
-#include "libpq/libpq.h"
-#include "libpq/pqformat.h"
-#include "miscadmin.h"
-#include "postmaster/interrupt.h"
-#include "replication/slot.h"
-#include "walproposer_utils.h"
-#include "replication/walsender_private.h"
-
-#include "storage/ipc.h"
-#include "utils/builtins.h"
-#include "utils/ps_status.h"
-
-#include "libpq-fe.h"
-#include <netinet/tcp.h>
-#include <unistd.h>
-
-#if PG_VERSION_NUM >= 150000
-#include "access/xlogutils.h"
-#include "access/xlogrecovery.h"
-#endif
-#if PG_MAJORVERSION_NUM >= 16
-#include "utils/guc.h"
-#endif
-
-/*
- * These variables are used similarly to openLogFile/SegNo,
- * but for walproposer to write the XLOG during recovery. walpropFileTLI is the TimeLineID
- * corresponding the filename of walpropFile.
- */
-static int	walpropFile = -1;
-static TimeLineID walpropFileTLI = 0;
-static XLogSegNo walpropSegNo = 0;
-
-/* START cloned file-local variables and functions from walsender.c */
-
-/*
- * How far have we sent WAL already? This is also advertised in
- * MyWalSnd->sentPtr.  (Actually, this is the next WAL location to send.)
- */
-static XLogRecPtr sentPtr = InvalidXLogRecPtr;
-
-static void WalSndLoop(void);
-static void XLogBroadcastWalProposer(void);
-/* END cloned file-level variables and functions from walsender.c */
-
-int
-CompareLsn(const void *a, const void *b)
-{
-	XLogRecPtr	lsn1 = *((const XLogRecPtr *) a);
-	XLogRecPtr	lsn2 = *((const XLogRecPtr *) b);
-
-	if (lsn1 < lsn2)
-		return -1;
-	else if (lsn1 == lsn2)
-		return 0;
-	else
-		return 1;
-}
-
-/* Returns a human-readable string corresonding to the SafekeeperState
- *
- * The string should not be freed.
- *
- * The strings are intended to be used as a prefix to "state", e.g.:
- *
- *   elog(LOG, "currently in %s state", FormatSafekeeperState(sk->state));
- *
- * If this sort of phrasing doesn't fit the message, instead use something like:
- *
- *   elog(LOG, "currently in state [%s]", FormatSafekeeperState(sk->state));
- */
-char *
-FormatSafekeeperState(SafekeeperState state)
-{
-	char	   *return_val = NULL;
-
-	switch (state)
-	{
-		case SS_OFFLINE:
-			return_val = "offline";
-			break;
-		case SS_CONNECTING_READ:
-		case SS_CONNECTING_WRITE:
-			return_val = "connecting";
-			break;
-		case SS_WAIT_EXEC_RESULT:
-			return_val = "receiving query result";
-			break;
-		case SS_HANDSHAKE_RECV:
-			return_val = "handshake (receiving)";
-			break;
-		case SS_VOTING:
-			return_val = "voting";
-			break;
-		case SS_WAIT_VERDICT:
-			return_val = "wait-for-verdict";
-			break;
-		case SS_SEND_ELECTED_FLUSH:
-			return_val = "send-announcement-flush";
-			break;
-		case SS_IDLE:
-			return_val = "idle";
-			break;
-		case SS_ACTIVE:
-			return_val = "active";
-			break;
-	}
-
-	Assert(return_val != NULL);
-
-	return return_val;
-}
-
-/* Asserts that the provided events are expected for given safekeeper's state */
-void
-AssertEventsOkForState(uint32 events, Safekeeper *sk)
-{
-	uint32		expected = SafekeeperStateDesiredEvents(sk->state);
-
-	/*
-	 * The events are in-line with what we're expecting, under two conditions:
-	 * (a) if we aren't expecting anything, `events` has no read- or
-	 * write-ready component. (b) if we are expecting something, there's
-	 * overlap (i.e. `events & expected != 0`)
-	 */
-	bool		events_ok_for_state;	/* long name so the `Assert` is more
-										 * clear later */
-
-	if (expected == WL_NO_EVENTS)
-		events_ok_for_state = ((events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) == 0);
-	else
-		events_ok_for_state = ((events & expected) != 0);
-
-	if (!events_ok_for_state)
-	{
-		/*
-		 * To give a descriptive message in the case of failure, we use elog
-		 * and then an assertion that's guaranteed to fail.
-		 */
-		elog(WARNING, "events %s mismatched for safekeeper %s:%s in state [%s]",
-			 FormatEvents(events), sk->host, sk->port, FormatSafekeeperState(sk->state));
-		Assert(events_ok_for_state);
-	}
-}
-
-/* Returns the set of events a safekeeper in this state should be waiting on
- *
- * This will return WL_NO_EVENTS (= 0) for some events. */
-uint32
-SafekeeperStateDesiredEvents(SafekeeperState state)
-{
-	uint32		result = WL_NO_EVENTS;
-
-	/* If the state doesn't have a modifier, we can check the base state */
-	switch (state)
-	{
-			/* Connecting states say what they want in the name */
-		case SS_CONNECTING_READ:
-			result = WL_SOCKET_READABLE;
-			break;
-		case SS_CONNECTING_WRITE:
-			result = WL_SOCKET_WRITEABLE;
-			break;
-
-			/* Reading states need the socket to be read-ready to continue */
-		case SS_WAIT_EXEC_RESULT:
-		case SS_HANDSHAKE_RECV:
-		case SS_WAIT_VERDICT:
-			result = WL_SOCKET_READABLE;
-			break;
-
-			/*
-			 * Idle states use read-readiness as a sign that the connection
-			 * has been disconnected.
-			 */
-		case SS_VOTING:
-		case SS_IDLE:
-			result = WL_SOCKET_READABLE;
-			break;
-
-			/*
-			 * Flush states require write-ready for flushing. Active state
-			 * does both reading and writing.
-			 *
-			 * TODO: SS_ACTIVE sometimes doesn't need to be write-ready. We
-			 * should check sk->flushWrite here to set WL_SOCKET_WRITEABLE.
-			 */
-		case SS_SEND_ELECTED_FLUSH:
-		case SS_ACTIVE:
-			result = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE;
-			break;
-
-			/* The offline state expects no events. */
-		case SS_OFFLINE:
-			result = WL_NO_EVENTS;
-			break;
-
-		default:
-			Assert(false);
-			break;
-	}
-
-	return result;
-}
-
-/* Returns a human-readable string corresponding to the event set
- *
- * If the events do not correspond to something set as the `events` field of a `WaitEvent`, the
- * returned string may be meaingless.
- *
- * The string should not be freed. It should also not be expected to remain the same between
- * function calls. */
-char *
-FormatEvents(uint32 events)
-{
-	static char return_str[8];
-
-	/* Helper variable to check if there's extra bits */
-	uint32		all_flags = WL_LATCH_SET
-	| WL_SOCKET_READABLE
-	| WL_SOCKET_WRITEABLE
-	| WL_TIMEOUT
-	| WL_POSTMASTER_DEATH
-	| WL_EXIT_ON_PM_DEATH
-	| WL_SOCKET_CONNECTED;
-
-	/*
-	 * The formatting here isn't supposed to be *particularly* useful -- it's
-	 * just to give an sense of what events have been triggered without
-	 * needing to remember your powers of two.
-	 */
-
-	return_str[0] = (events & WL_LATCH_SET) ? 'L' : '_';
-	return_str[1] = (events & WL_SOCKET_READABLE) ? 'R' : '_';
-	return_str[2] = (events & WL_SOCKET_WRITEABLE) ? 'W' : '_';
-	return_str[3] = (events & WL_TIMEOUT) ? 'T' : '_';
-	return_str[4] = (events & WL_POSTMASTER_DEATH) ? 'D' : '_';
-	return_str[5] = (events & WL_EXIT_ON_PM_DEATH) ? 'E' : '_';
-	return_str[5] = (events & WL_SOCKET_CONNECTED) ? 'C' : '_';
-
-	if (events & (~all_flags))
-	{
-		elog(WARNING, "Event formatting found unexpected component %d",
-			 events & (~all_flags));
-		return_str[6] = '*';
-		return_str[7] = '\0';
-	}
-	else
-		return_str[6] = '\0';
-
-	return (char *) &return_str;
-}
-
-/*
- * Convert a character which represents a hexadecimal digit to an integer.
- *
- * Returns -1 if the character is not a hexadecimal digit.
- */
-static int
-HexDecodeChar(char c)
-{
-	if (c >= '0' && c <= '9')
-		return c - '0';
-	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 10;
-	if (c >= 'A' && c <= 'F')
-		return c - 'A' + 10;
-
-	return -1;
-}
-
-/*
- * Decode a hex string into a byte string, 2 hex chars per byte.
- *
- * Returns false if invalid characters are encountered; otherwise true.
- */
-bool
-HexDecodeString(uint8 *result, char *input, int nbytes)
-{
-	int			i;
-
-	for (i = 0; i < nbytes; ++i)
-	{
-		int			n1 = HexDecodeChar(input[i * 2]);
-		int			n2 = HexDecodeChar(input[i * 2 + 1]);
-
-		if (n1 < 0 || n2 < 0)
-			return false;
-		result[i] = n1 * 16 + n2;
-	}
-
-	return true;
-}
-
-/* --------------------------------
- *		pq_getmsgint32_le	- get a binary 4-byte int from a message buffer in native (LE) order
- * --------------------------------
- */
-uint32
-pq_getmsgint32_le(StringInfo msg)
-{
-	uint32		n32;
-
-	pq_copymsgbytes(msg, (char *) &n32, sizeof(n32));
-
-	return n32;
-}
-
-/* --------------------------------
- *		pq_getmsgint64	- get a binary 8-byte int from a message buffer in native (LE) order
- * --------------------------------
- */
-uint64
-pq_getmsgint64_le(StringInfo msg)
-{
-	uint64		n64;
-
-	pq_copymsgbytes(msg, (char *) &n64, sizeof(n64));
-
-	return n64;
-}
-
-/* append a binary [u]int32 to a StringInfo buffer in native (LE) order */
-void
-pq_sendint32_le(StringInfo buf, uint32 i)
-{
-	enlargeStringInfo(buf, sizeof(uint32));
-	memcpy(buf->data + buf->len, &i, sizeof(uint32));
-	buf->len += sizeof(uint32);
-}
-
-/* append a binary [u]int64 to a StringInfo buffer in native (LE) order */
-void
-pq_sendint64_le(StringInfo buf, uint64 i)
-{
-	enlargeStringInfo(buf, sizeof(uint64));
-	memcpy(buf->data + buf->len, &i, sizeof(uint64));
-	buf->len += sizeof(uint64);
-}
-
-/*
- * Write XLOG data to disk.
- */
-void
-XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr)
-{
-	int			startoff;
-	int			byteswritten;
-
-	while (nbytes > 0)
-	{
-		int			segbytes;
-
-		/* Close the current segment if it's completed */
-		if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size))
-			XLogWalPropClose(recptr);
-
-		if (walpropFile < 0)
-		{
-#if PG_VERSION_NUM >= 150000
-			/* FIXME Is it ok to use hardcoded value here? */
-			TimeLineID	tli = 1;
-#else
-			bool		use_existent = true;
-#endif
-			/* Create/use new log file */
-			XLByteToSeg(recptr, walpropSegNo, wal_segment_size);
-#if PG_VERSION_NUM >= 150000
-			walpropFile = XLogFileInit(walpropSegNo, tli);
-			walpropFileTLI = tli;
-#else
-			walpropFile = XLogFileInit(walpropSegNo, &use_existent, false);
-			walpropFileTLI = ThisTimeLineID;
-#endif
-		}
-
-		/* Calculate the start offset of the received logs */
-		startoff = XLogSegmentOffset(recptr, wal_segment_size);
-
-		if (startoff + nbytes > wal_segment_size)
-			segbytes = wal_segment_size - startoff;
-		else
-			segbytes = nbytes;
-
-		/* OK to write the logs */
-		errno = 0;
-
-		byteswritten = pg_pwrite(walpropFile, buf, segbytes, (off_t) startoff);
-		if (byteswritten <= 0)
-		{
-			char		xlogfname[MAXFNAMELEN];
-			int			save_errno;
-
-			/* if write didn't set errno, assume no disk space */
-			if (errno == 0)
-				errno = ENOSPC;
-
-			save_errno = errno;
-			XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size);
-			errno = save_errno;
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not write to log segment %s "
-							"at offset %u, length %lu: %m",
-							xlogfname, startoff, (unsigned long) segbytes)));
-		}
-
-		/* Update state for write */
-		recptr += byteswritten;
-
-		nbytes -= byteswritten;
-		buf += byteswritten;
-	}
-
-	/*
-	 * Close the current segment if it's fully written up in the last cycle of
-	 * the loop.
-	 */
-	if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size))
-	{
-		XLogWalPropClose(recptr);
-	}
-}
-
-/*
- * Close the current segment.
- */
-void
-XLogWalPropClose(XLogRecPtr recptr)
-{
-	Assert(walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size));
-
-	if (close(walpropFile) != 0)
-	{
-		char		xlogfname[MAXFNAMELEN];
-
-		XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size);
-
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not close log segment %s: %m",
-						xlogfname)));
-	}
-
-	walpropFile = -1;
-}
-
-/* START of cloned functions from walsender.c */
-
-/*
- * Subscribe for new WAL and stream it in the loop to safekeepers.
- *
- * At the moment, this never returns, but an ereport(ERROR) will take us back
- * to the main loop.
- */
-void
-StartProposerReplication(StartReplicationCmd *cmd)
-{
-	XLogRecPtr	FlushPtr;
-	TimeLineID	currTLI;
-
-#if PG_VERSION_NUM < 150000
-	if (ThisTimeLineID == 0)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("IDENTIFY_SYSTEM has not been run before START_REPLICATION")));
-#endif
-
-	/*
-	 * We assume here that we're logging enough information in the WAL for
-	 * log-shipping, since this is checked in PostmasterMain().
-	 *
-	 * NOTE: wal_level can only change at shutdown, so in most cases it is
-	 * difficult for there to be WAL data that we can still see that was
-	 * written at wal_level='minimal'.
-	 */
-
-	if (cmd->slotname)
-	{
-		ReplicationSlotAcquire(cmd->slotname, true);
-		if (SlotIsLogical(MyReplicationSlot))
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("cannot use a logical replication slot for physical replication")));
-
-		/*
-		 * We don't need to verify the slot's restart_lsn here; instead we
-		 * rely on the caller requesting the starting point to use.  If the
-		 * WAL segment doesn't exist, we'll fail later.
-		 */
-	}
-
-	/*
-	 * Select the timeline. If it was given explicitly by the client, use
-	 * that. Otherwise use the timeline of the last replayed record, which is
-	 * kept in ThisTimeLineID.
-	 *
-	 * Neon doesn't currently use PG Timelines, but it may in the future, so
-	 * we keep this code around to lighten the load for when we need it.
-	 */
-#if PG_VERSION_NUM >= 150000
-	FlushPtr = GetFlushRecPtr(&currTLI);
-#else
-	FlushPtr = GetFlushRecPtr();
-	currTLI = ThisTimeLineID;
-#endif
-
-	/*
-	 * When we first start replication the standby will be behind the
-	 * primary. For some applications, for example synchronous
-	 * replication, it is important to have a clear state for this initial
-	 * catchup mode, so we can trigger actions when we change streaming
-	 * state later. We may stay in this state for a long time, which is
-	 * exactly why we want to be able to monitor whether or not we are
-	 * still here.
-	 */
-	WalSndSetState(WALSNDSTATE_CATCHUP);
-
-	/*
-	 * Don't allow a request to stream from a future point in WAL that
-	 * hasn't been flushed to disk in this server yet.
-	 */
-	if (FlushPtr < cmd->startpoint)
-	{
-		ereport(ERROR,
-				(errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",
-						LSN_FORMAT_ARGS(cmd->startpoint),
-						LSN_FORMAT_ARGS(FlushPtr))));
-	}
-
-	/* Start streaming from the requested point */
-	sentPtr = cmd->startpoint;
-
-	/* Initialize shared memory status, too */
-	SpinLockAcquire(&MyWalSnd->mutex);
-	MyWalSnd->sentPtr = sentPtr;
-	SpinLockRelease(&MyWalSnd->mutex);
-
-	SyncRepInitConfig();
-
-	/* Infinite send loop, never returns */
-	WalSndLoop();
-
-	WalSndSetState(WALSNDSTATE_STARTUP);
-
-	if (cmd->slotname)
-		ReplicationSlotRelease();
-}
-
-/*
- * Main loop that waits for LSN updates and calls the walproposer.
- * Synchronous replication sets latch in WalSndWakeup at walsender.c
- */
-static void
-WalSndLoop(void)
-{
-	/* Clear any already-pending wakeups */
-	ResetLatch(MyLatch);
-
-	for (;;)
-	{
-		CHECK_FOR_INTERRUPTS();
-
-		XLogBroadcastWalProposer();
-
-		if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
-			WalSndSetState(WALSNDSTATE_STREAMING);
-		WalProposerPoll();
-	}
-}
-
-/*
- * Notify walproposer about the new WAL position.
- */
-static void
-XLogBroadcastWalProposer(void)
-{
-	XLogRecPtr	startptr;
-	XLogRecPtr	endptr;
-
-	/* Start from the last sent position */
-	startptr = sentPtr;
-
-	/*
-	 * Streaming the current timeline on a primary.
-	 *
-	 * Attempt to send all data that's already been written out and
-	 * fsync'd to disk.  We cannot go further than what's been written out
-	 * given the current implementation of WALRead().  And in any case
-	 * it's unsafe to send WAL that is not securely down to disk on the
-	 * primary: if the primary subsequently crashes and restarts, standbys
-	 * must not have applied any WAL that got lost on the primary.
-	 */
-#if PG_VERSION_NUM >= 150000
-	endptr = GetFlushRecPtr(NULL);
-#else
-	endptr = GetFlushRecPtr();
-#endif
-
-	/*
-	 * Record the current system time as an approximation of the time at which
-	 * this WAL location was written for the purposes of lag tracking.
-	 *
-	 * In theory we could make XLogFlush() record a time in shmem whenever WAL
-	 * is flushed and we could get that time as well as the LSN when we call
-	 * GetFlushRecPtr() above (and likewise for the cascading standby
-	 * equivalent), but rather than putting any new code into the hot WAL path
-	 * it seems good enough to capture the time here.  We should reach this
-	 * after XLogFlush() runs WalSndWakeupProcessRequests(), and although that
-	 * may take some time, we read the WAL flush pointer and take the time
-	 * very close to together here so that we'll get a later position if it is
-	 * still moving.
-	 *
-	 * Because LagTrackerWrite ignores samples when the LSN hasn't advanced,
-	 * this gives us a cheap approximation for the WAL flush time for this
-	 * LSN.
-	 *
-	 * Note that the LSN is not necessarily the LSN for the data contained in
-	 * the present message; it's the end of the WAL, which might be further
-	 * ahead.  All the lag tracking machinery cares about is finding out when
-	 * that arbitrary LSN is eventually reported as written, flushed and
-	 * applied, so that it can measure the elapsed time.
-	 */
-	LagTrackerWrite(endptr, GetCurrentTimestamp());
-
-	/* Do we have any work to do? */
-	Assert(startptr <= endptr);
-	if (endptr <= startptr)
-		return;
-
-	WalProposerBroadcast(startptr, endptr);
-	sentPtr = endptr;
-
-	/* Update shared memory status */
-	{
-		WalSnd	   *walsnd = MyWalSnd;
-
-		SpinLockAcquire(&walsnd->mutex);
-		walsnd->sentPtr = sentPtr;
-		SpinLockRelease(&walsnd->mutex);
-	}
-
-	/* Report progress of XLOG streaming in PS display */
-	if (update_process_title)
-	{
-		char		activitymsg[50];
-
-		snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
-				 LSN_FORMAT_ARGS(sentPtr));
-		set_ps_display(activitymsg);
-	}
-}
diff --git a/pgxn/neon/walproposer_utils.h b/pgxn/neon/walproposer_utils.h
deleted file mode 100644
index aa5df5fa43..0000000000
--- a/pgxn/neon/walproposer_utils.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __NEON_WALPROPOSER_UTILS_H__
-#define __NEON_WALPROPOSER_UTILS_H__
-
-#include "walproposer.h"
-
-int			CompareLsn(const void *a, const void *b);
-char	   *FormatSafekeeperState(SafekeeperState state);
-void		AssertEventsOkForState(uint32 events, Safekeeper *sk);
-uint32		SafekeeperStateDesiredEvents(SafekeeperState state);
-char	   *FormatEvents(uint32 events);
-bool		HexDecodeString(uint8 *result, char *input, int nbytes);
-uint32		pq_getmsgint32_le(StringInfo msg);
-uint64		pq_getmsgint64_le(StringInfo msg);
-void		pq_sendint32_le(StringInfo buf, uint32 i);
-void		pq_sendint64_le(StringInfo buf, uint64 i);
-void		XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr);
-void		XLogWalPropClose(XLogRecPtr recptr);
-
-#endif							/* __NEON_WALPROPOSER_UTILS_H__ */