From de1c35fab32717a114b89e007c490fcc01faa5d9 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Wed, 29 Jan 2025 22:02:54 +0100 Subject: [PATCH] add retries for apt, wget and curl (#10553) Ref: https://github.com/neondatabase/cloud/issues/23461 ## Problem > recent CI failure due to apt-get: ``` 4.266 E: Failed to fetch http://deb.debian.org/debian/pool/main/g/gcc-10/libgfortran5_10.2.1-6_arm64.deb Error reading from server - read (104: Connection reset by peer) [IP: 146.75.122.132 80] ``` https://github.com/neondatabase/neon/actions/runs/11144974698/job/30973537767?pr=9186 thinking about if there should be a mirror-selector at the beginning of the dockerfile so that it uses a debian mirror closer to the build server? ## Summary of changes We could consider adding local mirror or proxy and keep it close to our self-hosted runners. For now lets just add retries for `apt`, `wget` and `curl` thanks to @skyzh for reporting that in October 2024, I just finally found time to take a look here :) --- Dockerfile | 2 ++ build-tools.Dockerfile | 10 ++++++++++ compute/compute-node.Dockerfile | 13 ++++++++++++- docker-compose/compute_wrapper/Dockerfile | 7 ++++--- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index a8f7ae0a62..7ba54c8ca5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,6 +64,7 @@ ARG DEFAULT_PG_VERSION WORKDIR /data RUN set -e \ + && echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \ && apt update \ && apt install -y \ libreadline-dev \ @@ -72,6 +73,7 @@ RUN set -e \ # System postgres for use with client libraries (e.g. in storage controller) postgresql-15 \ openssl \ + && rm -f /etc/apt/apt.conf.d/80-retries \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ && useradd -d /data neon \ && chown -R neon:neon /data diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile index 7a2ec9c43e..9c13e480c1 100644 --- a/build-tools.Dockerfile +++ b/build-tools.Dockerfile @@ -3,6 +3,10 @@ ARG DEBIAN_VERSION=bookworm FROM debian:bookworm-slim AS pgcopydb_builder ARG DEBIAN_VERSION +RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ + echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \ + echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc + RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \ set -e && \ apt update && \ @@ -61,6 +65,10 @@ RUN mkdir -p /pgcopydb/bin && \ COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/pgcopydb COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5 +RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ + echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \ + echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc + # System deps # # 'gdb' is included so that we get backtraces of core dumps produced in @@ -218,6 +226,8 @@ RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/re USER nonroot:nonroot WORKDIR /home/nonroot +RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc + # Python ENV PYTHON_VERSION=3.11.10 \ PYENV_ROOT=/home/nonroot/.pyenv \ diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 7ac6e9bc58..a428c61f34 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -18,6 +18,10 @@ ARG DEBIAN_VERSION # Use strict mode for bash to catch errors early SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ + echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc \ + echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc + RUN case $DEBIAN_VERSION in \ # Version-specific installs for Bullseye (PG14-PG16): # The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18. @@ -838,6 +842,8 @@ ENV PATH="/home/nonroot/.cargo/bin:$PATH" USER nonroot WORKDIR /home/nonroot +RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc + RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \ chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \ @@ -874,6 +880,8 @@ ENV PATH="/home/nonroot/.cargo/bin:$PATH" USER nonroot WORKDIR /home/nonroot +RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc + RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \ chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \ @@ -1243,6 +1251,7 @@ RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin c FROM debian:$DEBIAN_FLAVOR AS pgbouncer RUN set -e \ + && echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \ && apt update \ && apt install --no-install-suggests --no-install-recommends -y \ build-essential \ @@ -1444,6 +1453,8 @@ RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/loca # libboost* for rdkit # ca-certificates for communicating with s3 by compute_ctl +RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ + echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc RUN apt update && \ case $DEBIAN_VERSION in \ @@ -1500,7 +1511,7 @@ RUN set -ex; \ else \ echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \ fi; \ - curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \ + curl --retry 5 -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \ echo "${CHECKSUM} /tmp/awscliv2.zip" | sha256sum -c -; \ unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \ /tmp/awscliv2/aws/install; \ diff --git a/docker-compose/compute_wrapper/Dockerfile b/docker-compose/compute_wrapper/Dockerfile index e2e5bc7248..61f44681da 100644 --- a/docker-compose/compute_wrapper/Dockerfile +++ b/docker-compose/compute_wrapper/Dockerfile @@ -7,11 +7,12 @@ FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG ARG COMPUTE_IMAGE USER root -RUN apt-get update && \ +RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \ + apt-get update && \ apt-get install -y curl \ jq \ netcat-openbsd #This is required for the pg_hintplan test -RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src +RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src -USER postgres \ No newline at end of file +USER postgres