From 5f8fd640bfa8e5d4d23dbc3df1b0a521ec666e56 Mon Sep 17 00:00:00 2001 From: Alek Westover Date: Wed, 26 Jul 2023 08:24:03 -0400 Subject: [PATCH] Upload Test Remote Extensions (#4792) We need some real extensions in S3 to accurately test the code for handling remote extensions. In this PR we just upload three extensions (anon, kq_imcx and postgis), which is enough for testing purposes for now. In addition to creating and uploading the extension archives, we must generate a file `ext_index.json` which specifies important metadata about the extensions. --------- Co-authored-by: Anastasia Lubennikova Co-authored-by: Alexander Bayandin --- .dockerignore | 1 + .github/workflows/build_and_test.yml | 46 ++++----------------- Dockerfile.compute-node | 62 +++++++++++++++++++--------- scripts/combine_control_files.py | 33 +++++++++++++++ 4 files changed, 84 insertions(+), 58 deletions(-) create mode 100644 scripts/combine_control_files.py diff --git a/.dockerignore b/.dockerignore index a6e11805e9..960588b6f2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,4 +21,5 @@ !workspace_hack/ !neon_local/ !scripts/ninstall.sh +!scripts/combine_control_files.py !vm-cgconfig.conf diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 18dfc458b5..27bad61639 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -955,22 +955,15 @@ jobs: version: [ v14, v15 ] env: - # While on transition period we extract public extensions from compute-node image and custom extensions from extensions image. - # Later all the extensions will be moved to extensions image. - EXTENSIONS_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:latest - COMPUTE_NODE_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:latest + EXTENSIONS_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} AWS_ACCESS_KEY_ID: ${{ github.ref_name == 'release' && secrets.AWS_ACCESS_KEY_PROD || secrets.AWS_ACCESS_KEY_DEV }} AWS_SECRET_ACCESS_KEY: ${{ github.ref_name == 'release' && secrets.AWS_SECRET_KEY_PROD || secrets.AWS_SECRET_KEY_DEV }} - S3_BUCKETS: | - ${{ github.ref_name == 'release' && - 'neon-prod-extensions-ap-southeast-1 neon-prod-extensions-eu-central-1 neon-prod-extensions-us-east-1 neon-prod-extensions-us-east-2 neon-prod-extensions-us-west-2' || - 'neon-dev-extensions-eu-central-1 neon-dev-extensions-eu-west-1 neon-dev-extensions-us-east-2' }} + S3_BUCKETS: ${{ github.ref_name == 'release' && vars.S3_EXTENSIONS_BUCKETS_PROD || vars.S3_EXTENSIONS_BUCKETS_DEV }} steps: - name: Pull postgres-extensions image run: | docker pull ${EXTENSIONS_IMAGE} - docker pull ${COMPUTE_NODE_IMAGE} - name: Create postgres-extensions container id: create-container @@ -978,46 +971,23 @@ jobs: EID=$(docker create ${EXTENSIONS_IMAGE} true) echo "EID=${EID}" >> $GITHUB_OUTPUT - CID=$(docker create ${COMPUTE_NODE_IMAGE} true) - echo "CID=${CID}" >> $GITHUB_OUTPUT - - name: Extract postgres-extensions from container run: | - rm -rf ./extensions-to-upload ./custom-extensions # Just in case + rm -rf ./extensions-to-upload # Just in case + mkdir -p extensions-to-upload - # In compute image we have a bit different directory layout - mkdir -p extensions-to-upload/share - docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/share/extension ./extensions-to-upload/share/extension - docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/lib ./extensions-to-upload/lib - - # Delete Neon extensitons (they always present on compute-node image) - rm -rf ./extensions-to-upload/share/extension/neon* - rm -rf ./extensions-to-upload/lib/neon* - - # Delete leftovers from the extension build step - rm -rf ./extensions-to-upload/lib/pgxs - rm -rf ./extensions-to-upload/lib/pkgconfig - - docker cp ${{ steps.create-container.outputs.EID }}:/extensions ./custom-extensions - for EXT_NAME in $(ls ./custom-extensions); do - mkdir -p ./extensions-to-upload/${EXT_NAME}/share - - mv ./custom-extensions/${EXT_NAME}/share/extension ./extensions-to-upload/${EXT_NAME}/share/extension - mv ./custom-extensions/${EXT_NAME}/lib ./extensions-to-upload/${EXT_NAME}/lib - done + docker cp ${{ steps.create-container.outputs.EID }}:/extensions/ ./extensions-to-upload/ + docker cp ${{ steps.create-container.outputs.EID }}:/ext_index.json ./extensions-to-upload/ - name: Upload postgres-extensions to S3 - # TODO: Reenable step after switching to the new extensions format (tar-gzipped + index.json) - if: false run: | - for BUCKET in $(echo ${S3_BUCKETS}); do + for BUCKET in $(echo ${S3_BUCKETS:-[]} | jq --raw-output '.[]'); do aws s3 cp --recursive --only-show-errors ./extensions-to-upload s3://${BUCKET}/${{ needs.tag.outputs.build-tag }}/${{ matrix.version }} done - name: Cleanup - if: ${{ always() && (steps.create-container.outputs.CID || steps.create-container.outputs.EID) }} + if: ${{ always() && steps.create-container.outputs.EID }} run: | - docker rm ${{ steps.create-container.outputs.CID }} || true docker rm ${{ steps.create-container.outputs.EID }} || true deploy: diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 495ef25526..7d60458a3e 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -13,7 +13,7 @@ FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \ - libicu-dev libxslt1-dev liblz4-dev libzstd-dev + libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd ######################################################################################### # @@ -77,6 +77,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH" RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \ echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \ mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \ + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ ./autogen.sh && \ ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \ make -j $(getconf _NPROCESSORS_ONLN) install && \ @@ -89,17 +90,28 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \ - echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control + echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control && \ + mkdir -p /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/postgis.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/postgis_raster.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/postgis_sfcgal.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/postgis_topology.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \ + cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \ echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \ mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \ - mkdir build && \ - cd build && \ + mkdir build && cd build && \ cmake -DCMAKE_BUILD_TYPE=Release .. && \ make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) install && \ - echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control + echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \ + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\ + cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \ + sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \ + comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/postgis.tar.zst -T - ######################################################################################### # @@ -419,12 +431,16 @@ RUN apt-get update && \ wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \ echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \ mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \ - mkdir build && \ - cd build && \ + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ + mkdir build && cd build && \ cmake -DCMAKE_BUILD_TYPE=Release .. && \ make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) install && \ - echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control + echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control && \ + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\ + mkdir -p /extensions/kq_imcx && cp /usr/local/pgsql/share/extension/kq_imcx.control /extensions/kq_imcx && \ + sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \ + comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/kq_imcx.tar.zst -T - ######################################################################################### # @@ -553,16 +569,17 @@ RUN wget https://github.com/neondatabase/pg_embedding/archive/eeb3ba7c3a60c95b26 FROM build-deps AS pg-anon-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -# Kaniko doesn't allow to do `${from#/usr/local/pgsql/}`, so we use `${from:17}` instead ENV PATH "/usr/local/pgsql/bin/:$PATH" RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgresql_anonymizer-1.1.0.tar.gz -O pg_anon.tar.gz && \ echo "08b09d2ff9b962f96c60db7e6f8e79cf7253eb8772516998fc35ece08633d3ad pg_anon.tar.gz" | sha256sum --check && \ mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \ - find /usr/local/pgsql -type f | sort > /before.txt && \ + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \ - find /usr/local/pgsql -type f | sort > /after.txt && \ - /bin/bash -c 'for from in $(comm -13 /before.txt /after.txt); do to=/extensions/anon/${from:17} && mkdir -p $(dirname ${to}) && cp -a ${from} ${to}; done' + find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\ + mkdir -p /extensions/anon && cp /usr/local/pgsql/share/extension/anon.control /extensions/anon && \ + sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \ + comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/anon.tar.zst -T - ######################################################################################### # @@ -754,16 +771,21 @@ RUN rm /usr/local/pgsql/lib/lib*.a # Extenstion only # ######################################################################################### +FROM python:3.9-slim-bullseye AS generate-ext-index +ARG PG_VERSION +ARG BUILD_TAG +# copy the control files here +COPY --from=kq-imcx-pg-build /extensions/ /extensions/ +COPY --from=pg-anon-pg-build /extensions/ /extensions/ +COPY --from=postgis-build /extensions/ /extensions/ +COPY scripts/combine_control_files.py ./combine_control_files.py +RUN python3 ./combine_control_files.py ${PG_VERSION} ${BUILD_TAG} + FROM scratch AS postgres-extensions # After the transition this layer will include all extensitons. -# As for now, it's only for new custom ones -# -# # Default extensions -# COPY --from=postgres-cleanup-layer /usr/local/pgsql/share/extension /usr/local/pgsql/share/extension -# COPY --from=postgres-cleanup-layer /usr/local/pgsql/lib /usr/local/pgsql/lib -# Custom extensions -COPY --from=pg-anon-pg-build /extensions/anon/lib/ /extensions/anon/lib -COPY --from=pg-anon-pg-build /extensions/anon/share/extension /extensions/anon/share/extension +# As for now, it's only a couple for testing purposses +COPY --from=generate-ext-index /extensions/*.tar.zst /extensions/ +COPY --from=generate-ext-index /ext_index.json /ext_index.json ######################################################################################### # diff --git a/scripts/combine_control_files.py b/scripts/combine_control_files.py new file mode 100644 index 0000000000..37db522ddd --- /dev/null +++ b/scripts/combine_control_files.py @@ -0,0 +1,33 @@ +#! /usr/bin/env python3 +# Script to generate ext_index.json metadata file +# that stores content of the control files and location of extension archives +# for all extensions in extensions subdir. +import argparse +import json +from pathlib import Path + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="generate ext_index.json") + parser.add_argument("pg_version", type=str, choices=["v14", "v15"], help="pg_version") + parser.add_argument("BUILD_TAG", type=str, help="BUILD_TAG for this compute image") + args = parser.parse_args() + pg_version = args.pg_version + BUILD_TAG = args.BUILD_TAG + + ext_index = {} + EXT_PATH = Path("extensions") + for extension in EXT_PATH.iterdir(): + if extension.is_dir(): + control_data = {} + for control_file in extension.glob("*.control"): + if control_file.suffix != ".control": + continue + with open(control_file, "r") as f: + control_data[control_file.name] = f.read() + ext_index[extension.name] = { + "control_data": control_data, + "archive_path": f"{BUILD_TAG}/{pg_version}/extensions/{extension.name}.tar.zst", + } + + with open("ext_index.json", "w") as f: + json.dump(ext_index, f)