diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 769befb4e5..4460e3b40c 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -911,7 +911,74 @@ pageserver_receive(shardno_t shard_no) } PG_CATCH(); { - neon_shard_log(shard_no, LOG, "pageserver_receive: disconnect due malformatted response"); + neon_shard_log(shard_no, LOG, "pageserver_receive: disconnect due to failure while parsing response"); + pageserver_disconnect(shard_no); + PG_RE_THROW(); + } + PG_END_TRY(); + + if (message_level_is_interesting(PageStoreTrace)) + { + char *msg = nm_to_string((NeonMessage *) resp); + + neon_shard_log(shard_no, PageStoreTrace, "got response: %s", msg); + pfree(msg); + } + } + else if (rc == -1) + { + neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: psql end of copy data: %s", pchomp(PQerrorMessage(pageserver_conn))); + pageserver_disconnect(shard_no); + resp = NULL; + } + else if (rc == -2) + { + char *msg = pchomp(PQerrorMessage(pageserver_conn)); + + pageserver_disconnect(shard_no); + neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect: could not read COPY data: %s", msg); + } + else + { + pageserver_disconnect(shard_no); + neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect: unexpected PQgetCopyData return value: %d", rc); + } + + shard->nresponses_received++; + return (NeonResponse *) resp; +} + +static NeonResponse * +pageserver_try_receive(shardno_t shard_no) +{ + StringInfoData resp_buff; + NeonResponse *resp; + PageServer *shard = &page_servers[shard_no]; + PGconn *pageserver_conn = shard->conn; + /* read response */ + int rc; + + if (shard->state != PS_Connected) + return NULL; + + Assert(pageserver_conn); + + rc = PQgetCopyData(shard->conn, &resp_buff.data, 1 /* async = true */); + + if (rc == 0) + return NULL; + else if (rc > 0) + { + PG_TRY(); + { + resp_buff.len = rc; + resp_buff.cursor = 0; + resp = nm_unpack_response(&resp_buff); + PQfreemem(resp_buff.data); + } + PG_CATCH(); + { + neon_shard_log(shard_no, LOG, "pageserver_receive: disconnect due to failure while parsing response"); pageserver_disconnect(shard_no); PG_RE_THROW(); } @@ -980,6 +1047,7 @@ page_server_api api = .send = pageserver_send, .flush = pageserver_flush, .receive = pageserver_receive, + .try_receive = pageserver_try_receive, .disconnect = pageserver_disconnect_shard }; diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index 37bc4f7886..b751235595 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -192,9 +192,29 @@ typedef uint16 shardno_t; typedef struct { + /* + * Send this request to the PageServer associated with this shard. + */ bool (*send) (shardno_t shard_no, NeonRequest * request); + /* + * Blocking read for the next response of this shard. + * + * When a CANCEL signal is handled, the connection state will be + * unmodified. + */ NeonResponse *(*receive) (shardno_t shard_no); + /* + * Try get the next response from the TCP buffers, if any. + * Returns NULL when the data is not yet available. + */ + NeonResponse *(*try_receive) (shardno_t shard_no); + /* + * Make sure all requests are sent to PageServer. + */ bool (*flush) (shardno_t shard_no); + /* + * Disconnect from this pageserver shard. + */ void (*disconnect) (shardno_t shard_no); } page_server_api; diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 7a4c0ef487..54cacea984 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -405,6 +405,56 @@ compact_prefetch_buffers(void) return false; } +/* + * If there might be responses still in the TCP buffer, then + * we should try to use those, so as to reduce any TCP backpressure + * on the OS/PS side. + * + * This procedure handles that. + * + * Note that this is only valid as long as the only pipelined + * operations in the TCP buffer are getPage@Lsn requests. + */ +static void +prefetch_pump_state(void) +{ + while (MyPState->ring_receive != MyPState->ring_flush) + { + NeonResponse *response; + PrefetchRequest *slot; + MemoryContext old; + + slot = GetPrfSlot(MyPState->ring_receive); + + old = MemoryContextSwitchTo(MyPState->errctx); + response = page_server->try_receive(slot->shard_no); + MemoryContextSwitchTo(old); + + if (response == NULL) + break; + + /* The slot should still be valid */ + if (slot->status != PRFS_REQUESTED || + slot->response != NULL || + slot->my_ring_index != MyPState->ring_receive) + neon_shard_log(slot->shard_no, ERROR, + "Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu", + slot->status, slot->response, + (long) slot->my_ring_index, (long) MyPState->ring_receive); + + /* update prefetch state */ + MyPState->n_responses_buffered += 1; + MyPState->n_requests_inflight -= 1; + MyPState->ring_receive += 1; + MyNeonCounters->getpage_prefetches_buffered = + MyPState->n_responses_buffered; + + /* update slot state */ + slot->status = PRFS_RECEIVED; + slot->response = response; + } +} + void readahead_buffer_resize(int newsize, void *extra) { @@ -2808,6 +2858,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MyPState->ring_last <= ring_index); } + prefetch_pump_state(); + return false; } @@ -2849,6 +2901,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) Assert(ring_index < MyPState->ring_unused && MyPState->ring_last <= ring_index); + prefetch_pump_state(); + return false; } #endif /* PG_MAJORVERSION_NUM < 17 */ @@ -2891,6 +2945,8 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum, */ neon_log(SmgrTrace, "writeback noop"); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) mdwriteback(reln, forknum, blocknum, nblocks); @@ -3145,6 +3201,8 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, NULL); neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) { @@ -3282,6 +3340,8 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, neon_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns, buffers, nblocks, read); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) { @@ -3450,6 +3510,8 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) #if PG_MAJORVERSION_NUM >= 17 @@ -3503,6 +3565,8 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) mdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync); @@ -3792,6 +3856,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum) neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop"); + prefetch_pump_state(); + #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) mdimmedsync(reln, forknum);