Files
neon/libs
Heikki Linnakangas 989d78aac8 Buffer the TCP incoming stream on libpq connections.
Reduces the number of syscalls needed to read the commands from the
compute.

Here's a snippet of strace output from the pageserver, when performing
a sequential scan on a table, with prefetch:

    3084934 recvfrom(47, "d", 1, 0, NULL, NULL) = 1
    3084934 recvfrom(47, "\0\0\0\37", 4, 0, NULL, NULL) = 4
    3084934 recvfrom(47, "\2\1\0\0\0\0\362\302\360\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\0\3", 27, 0, NULL, NULL) = 27
    3084934 pread64(28, "\0\0\0\1\0\0\0\0\0\0\0\253                    "..., 8192, 25190400) = 8192
    3084934 write(45, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\3A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3084934 poll([{fd=46, events=POLLIN}, {fd=48, events=POLLIN}], 2, 60000) = 1 ([{fd=46, revents=POLLIN}])
    3084934 read(46, "\0\0\0\0p\311q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3084934 sendto(47, "d\0\0 \5f\0\0\0\0p\311q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198
    3084934 recvfrom(47, "d", 1, 0, NULL, NULL) = 1
    3084934 recvfrom(47, "\0\0\0\37", 4, 0, NULL, NULL) = 4
    3084934 recvfrom(47, "\2\1\0\0\0\0\362\302\360\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\0\4", 27, 0, NULL, NULL) = 27
    3084934 pread64(28, "    \0=\0L\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0;;\0\0\0\4\4\0"..., 8192, 25198592) = 8192
    3084934 write(45, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\4A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3084934 poll([{fd=46, events=POLLIN}, {fd=48, events=POLLIN}], 2, 60000) = 1 ([{fd=46, revents=POLLIN}])
    3084934 read(46, "\0\0\0\0\260\344q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3084934 sendto(47, "d\0\0 \5f\0\0\0\0\260\344q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198
    3084934 recvfrom(47, "d", 1, 0, NULL, NULL) = 1
    3084934 recvfrom(47, "\0\0\0\37", 4, 0, NULL, NULL) = 4
    3084934 recvfrom(47, "\2\1\0\0\0\0\362\302\360\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\0\5", 27, 0, NULL, NULL) = 27
    3084934 write(45, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\5A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3084934 poll([{fd=46, events=POLLIN}, {fd=48, events=POLLIN}], 2, 60000) = 1 ([{fd=46, revents=POLLIN}])
    3084934 read(46, "\0\0\0\0\330\377q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3084934 sendto(47, "d\0\0 \5f\0\0\0\0\330\377q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198

This shows the interaction for three get_page_at_lsn requests. For
each request, the pageserver performs three recvfrom syscalls to read
the incoming request from the socket. After this patch, those recvfrom
calls are gone:

    3086123 read(47, "\0\0\0\0\360\222q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3086123 sendto(45, "d\0\0 \5f\0\0\0\0\360\222q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198
    3086123 pread64(29, "                                "..., 8192, 25182208) = 8192
    3086123 write(46, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\2A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3086123 poll([{fd=47, events=POLLIN}, {fd=49, events=POLLIN}], 2, 60000) = 1 ([{fd=47, revents=POLLIN}])
    3086123 read(47, "\0\0\0\0000\256q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3086123 sendto(45, "d\0\0 \5f\0\0\0\0000\256q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198
    3086123 pread64(29, "\0\0\0\1\0\0\0\0\0\0\0\253                    "..., 8192, 25190400) = 8192
    3086123 write(46, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\3A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3086123 poll([{fd=47, events=POLLIN}, {fd=49, events=POLLIN}], 2, 60000) = 1 ([{fd=47, revents=POLLIN}])
    3086123 read(47, "\0\0\0\0p\311q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237\362\0\0\237\362\0"..., 8192) = 8192
    3086123 sendto(45, "d\0\0 \5f\0\0\0\0p\311q\1\0\0\4\0\f\1\200\1\0 \4 \0\0\0\0\200\237"..., 8198, MSG_NOSIGNAL, NULL, 0) = 8198
    3086123 pread64(29, "    \0=\0L\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0;;\0\0\0\4\4\0"..., 8192, 25198592) = 8192
    3086123 write(46, "B\0\0\0\25\0\0\0\6\177\0\0002\276\0\0@\f\0\0\0\4A\0\0\32\355\0\0\0\0\1"..., 7010) = 7010
    3086123 poll([{fd=47, events=POLLIN}, {fd=49, events=POLLIN}], 2, 60000) = 1 ([{fd=47, revents=POLLIN}])

In this test, the compute sends a batch of prefetch requests, and they
are read from the socket in one syscall. That syscall was not captured
by the strace snippet above, but there are much fewer of them than
before.
2022-10-18 18:46:07 +03:00
..