From 839f41f5bb5ef072972eefded5e5ccc32429b6e3 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Thu, 6 Feb 2025 15:39:45 +0100 Subject: [PATCH] fix pgcopydb seg fault and -c idle_in_transaction_session_timeout=0 (#10692) ## Problem During ingest_benchmark which uses `pgcopydb` ([see](https://github.com/dimitri/pgcopydb))we sometimes had outages. - when PostgreSQL COPY step failed we got a segfault (reported [here](https://github.com/dimitri/pgcopydb/issues/899)) - the root cause was Neon idle_in_transaction_session_timeout is set to 5 minutes which is suboptimal for long-running tasks like project import (reported [here](https://github.com/dimitri/pgcopydb/issues/900)) ## Summary of changes Patch pgcopydb to avoid segfault. override idle_in_transaction_session_timeout and set it to "unlimited" --- .dockerignore | 1 + build-tools.Dockerfile | 3 ++ build_tools/patches/pgcopydbv017.patch | 37 +++++++++++++++++++ .../test_perf_ingest_using_pgcopydb.py | 2 +- 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 build_tools/patches/pgcopydbv017.patch diff --git a/.dockerignore b/.dockerignore index 9e2d2e7108..7ead48db7c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -24,3 +24,4 @@ !storage_controller/ !vendor/postgres-*/ !workspace_hack/ +!build_tools/patches diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile index 3ade57b175..52874d2ef6 100644 --- a/build-tools.Dockerfile +++ b/build-tools.Dockerfile @@ -12,6 +12,8 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \ echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc +COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch + RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \ set -e && \ apt update && \ @@ -44,6 +46,7 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \ mkdir /tmp/pgcopydb && \ tar -xzf /tmp/pgcopydb.tar.gz -C /tmp/pgcopydb --strip-components=1 && \ cd /tmp/pgcopydb && \ + patch -p1 < /pgcopydbv017.patch && \ make -s clean && \ make -s -j12 install && \ libpq_path=$(find /lib /usr/lib -name "libpq.so.5" | head -n 1) && \ diff --git a/build_tools/patches/pgcopydbv017.patch b/build_tools/patches/pgcopydbv017.patch new file mode 100644 index 0000000000..c309d8fe59 --- /dev/null +++ b/build_tools/patches/pgcopydbv017.patch @@ -0,0 +1,37 @@ +diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c +index d730b03..69a9be9 100644 +--- a/src/bin/pgcopydb/copydb.c ++++ b/src/bin/pgcopydb/copydb.c +@@ -44,6 +44,7 @@ GUC dstSettings[] = { + { "synchronous_commit", "'off'" }, + { "statement_timeout", "0" }, + { "lock_timeout", "0" }, ++ { "idle_in_transaction_session_timeout", "0" }, + { NULL, NULL }, + }; + +diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c +index 94f2f46..86b9448 100644 +--- a/src/bin/pgcopydb/pgsql.c ++++ b/src/bin/pgcopydb/pgsql.c +@@ -3174,11 +3174,18 @@ pgcopy_log_error(PGSQL *pgsql, PGresult *res, const char *context) + /* errors have already been logged */ + return; + } +- + if (res != NULL) + { + char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); +- strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate)); ++ if (sqlstate == NULL) ++ { ++ // PQresultErrorField returned NULL! ++ pgsql->sqlstate[0] = '\0'; // Set to an empty string to avoid segfault ++ } ++ else ++ { ++ strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate)); ++ } + } + + char *endpoint = diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py index f0a0c1f5a2..da62422fca 100644 --- a/test_runner/performance/test_perf_ingest_using_pgcopydb.py +++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py @@ -136,7 +136,7 @@ def run_command_and_log_output(command, log_file_path: Path): "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}", "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")), "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")), - "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", + "PGOPTIONS": "-c idle_in_transaction_session_timeout=0 -c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", } # Combine the current environment with custom variables env = os.environ.copy()