From a463749f59c3fe020065c4cacc91df8fa11ffb99 Mon Sep 17 00:00:00 2001 From: MMeent Date: Fri, 2 Sep 2022 14:34:40 +0200 Subject: [PATCH] Slim down compute-node images (#2346) Slim down compute-node images: - Optimize compute_ctl build for size, not performance & debug-ability - Don't run unused stages. Saves time in not building the PLV8 extension. - Do not include static libraries in clean postgres - Do the installation and finishing touches in the final layer in one job This allows docker (and kaniko) to only register one change to the files, removing potentially duplicate changed files. - The runtime library for libreadline-dev is libreadline8, changing the dependency saves 45 MB - libprotobuf-c-dev -> libprotobuf-c1, saving 100 kB - libossp-uuid-dev -> libossp-uuid16, saving 150 kB - gdal-bin + libgdal-dev -> libgeos-c1v5 + libgdal28 + libproj19, saving 747MB - binutils @ testing -> libc6 @ testing, saving 32 MB --- .github/workflows/build_and_test.yml | 2 +- Cargo.toml | 53 ++++++++++++++++++ Dockerfile.compute-node | 84 ++++++++++++++++++++++------ 3 files changed, 122 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a3314738fa..6fae36c6e4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -459,7 +459,7 @@ jobs: run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json - name: Kaniko build compute node with extensions - run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID + run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID promote-images: runs-on: dev diff --git a/Cargo.toml b/Cargo.toml index f0934853f0..a19f65a14f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,59 @@ members = [ # Besides, debug info should not affect the performance. debug = true +[profile.release-line-debug] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +[profile.release-line-debug-lto] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +lto = true + +[profile.release-line-debug-size] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +opt-level = "s" +[profile.release-line-debug-zize] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +opt-level = "z" +[profile.release-line-debug-size-lto] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +opt-level = "s" +lto = true +[profile.release-line-debug-zize-lto] +inherits = "release" +debug = 1 # true = 2 = all symbols, 1 = line only +opt-level = "z" +lto = true + +[profile.release-no-debug] +inherits = "release" +debug = false # true = 2 = all symbols, 1 = line only + +[profile.release-no-debug-size] +inherits = "release" +debug = false # true = 2 = all symbols, 1 = line only +opt-level = "s" +[profile.release-no-debug-zize] +inherits = "release" +debug = false # true = 2 = all symbols, 1 = line only +opt-level = "z" + +[profile.release-no-debug-size-lto] +inherits = "release" +debug = false # true = 2 = all symbols, 1 = line only +opt-level = "s" +lto = true + +[profile.release-no-debug-zize-lto] +inherits = "release" +debug = false # true = 2 = all symbols, 1 = line only +opt-level = "z" +lto = true + + # This is only needed for proxy's tests. # TODO: we should probably fork `tokio-postgres-rustls` instead. [patch.crates-io] diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 2e031b17da..3298032030 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -3,12 +3,18 @@ ARG TAG=pinned # ARG POSTGIS_VERSION=3.3.0 # ARG PLV8_VERSION=3.1.4 +# +# Layer "build-deps" +# FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \ libcurl4-openssl-dev libossp-uuid-dev +# +# Layer "pg-build" # Build Postgres from the neon postgres repository. +# FROM build-deps AS pg-build COPY vendor/postgres postgres RUN cd postgres && \ @@ -19,9 +25,14 @@ RUN cd postgres && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install -# Build PostGIS from the upstream PostGIS mirror. PostGIS compiles against neon postgres sources without changes. -# Perhaps we could even use the upstream binaries, compiled against vanilla Postgres, but it would require some -# investigation to check that it works, and also keeps working in the future. So for now, we compile our own binaries. +# +# Layer "postgis-build" +# Build PostGIS from the upstream PostGIS mirror. +# +# PostGIS compiles against neon postgres sources without changes. Perhaps we +# could even use the upstream binaries, compiled against vanilla Postgres, but +# it would require some investigation to check that it works, and also keeps +# working in the future. So for now, we compile our own binaries. FROM build-deps AS postgis-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN apt update && \ @@ -42,7 +53,10 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control +# +# Layer "plv8-build" # Build plv8 +# FROM build-deps AS plv8-build COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/ RUN apt update && \ @@ -64,7 +78,10 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \ rm -rf /plv8-* && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control +# +# Layer "neon-pg-ext-build" # compile neon extensions +# FROM build-deps AS neon-pg-ext-build COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ @@ -79,9 +96,32 @@ FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools USER nonroot # Copy entire project to get Cargo.* files with proper dependencies for the whole project COPY --chown=nonroot . . -RUN cd compute_tools && cargo build --locked --release +RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto +# +# Clean up postgres folder before inclusion +# +FROM neon-pg-ext-build AS postgres-cleanup-layer +COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql + +# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise) +RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp + +# Remove headers that we won't need anymore - we've completed installation of all extensions +RUN rm -r /usr/local/pgsql/include + +# Remove now-useless PGXS src infrastructure +RUN rm -r /usr/local/pgsql/lib/pgxs/src + +# Remove static postgresql libraries - all compilation is finished, so we +# can now remove these files - they must be included in other binaries by now +# if they were to be used by other libraries. +RUN rm /usr/local/pgsql/lib/lib*.a + +# +# Final layer # Put it all together into the final image +# FROM debian:bullseye-slim # Add user postgres RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ @@ -93,22 +133,34 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ # TODO: Check if we can make the extension setup more modular versus a linear build # currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc# -COPY --from=neon-pg-ext-build --chown=postgres /usr/local/pgsql /usr/local -COPY --from=compute-tools --chown=postgres /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl +COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local +COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl +# Install: +# libreadline8 for psql +# libossp-uuid16 for extension ossp-uuid +# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS +# GLIBC 2.34 for plv8. +# Debian bullseye provides GLIBC 2.31, so we install the library from testing +# +# Lastly, link compute_ctl into zenith_ctl while we're at it, +# so that we don't need to put this in another layer. RUN apt update && \ - apt install -y libreadline-dev libossp-uuid-dev gdal-bin libgdal-dev libprotobuf-c-dev && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# Debian bullseye provides GLIBC 2.31 when 2.34 is necessary as we compiled plv8 with that version -RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \ + apt install --no-install-recommends -y \ + libreadline8 \ + libossp-uuid16 \ + libgeos-c1v5 \ + libgdal28 \ + libproj19 \ + libprotobuf-c1 && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ + echo "Installing GLIBC 2.34" && \ + echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \ echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \ apt update && \ - apt install -y --no-install-recommends -t testing binutils && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# "temporary" symlink for old control-plane -RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl + apt install -y --no-install-recommends -t testing libc6 && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ + ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl USER postgres ENTRYPOINT ["/usr/local/bin/compute_ctl"]