From 03695261fccc0311591c7027bfa8062327b557e8 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 11 Nov 2022 19:42:26 +0200 Subject: [PATCH] Test storage Docker images (#2767) Closes https://github.com/neondatabase/neon/issues/2697 Example: https://github.com/neondatabase/neon/actions/runs/3416774593/jobs/5688394855 Adds a set of tests on the storage Docker images before they are pushed to the public registries: * tests that pageserver binary has the correct version string (other binaries are built with the same library, so it should be enough to test one) * tests that the compose file set-up works and all components are able to start and perform a single SQL query (CREATE TABLE) --- .github/workflows/build_and_test.yml | 46 +++++++++++++- docker-compose/compute_wrapper/Dockerfile | 13 ++++ .../shell/compute.sh | 0 .../var/db/postgres/specs/spec.json | 0 docker-compose/docker-compose.yml | 29 +++++---- docker-compose/docker_compose_test.sh | 60 +++++++++++++++++++ docker-compose/image/compute/Dockerfile | 10 ---- scripts/docker-compose_test.sh | 51 ---------------- 8 files changed, 136 insertions(+), 73 deletions(-) create mode 100644 docker-compose/compute_wrapper/Dockerfile rename docker-compose/{compute => compute_wrapper}/shell/compute.sh (100%) rename docker-compose/{compute => compute_wrapper}/var/db/postgres/specs/spec.json (100%) create mode 100755 docker-compose/docker_compose_test.sh delete mode 100644 docker-compose/image/compute/Dockerfile delete mode 100755 scripts/docker-compose_test.sh diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b598949f2b..e6014ecb84 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -529,7 +529,6 @@ jobs: - name: Kaniko build compute node with extensions v14 run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} - compute-node-image-v15: runs-on: dev container: gcr.io/kaniko-project/executor:v1.9.0-debug @@ -547,9 +546,52 @@ jobs: - name: Kaniko build compute node with extensions v15 run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} + test-images: + needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ] + runs-on: dev + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + # `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library. + # Pick pageserver as currently the only binary with extra "version" features printed in the string to verify. + # Regular pageserver version string looks like + # Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: [] + # Bad versions might loop like: + # Neon page server git-env:local failpoints: true, features: ["testing"] + # Ensure that we don't have bad versions. + - name: Verify image versions + shell: bash # ensure no set -e for better error messages + run: | + pageserver_version=$(docker run --rm 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} "/bin/sh" "-c" "/usr/local/bin/pageserver --version") + + echo "Pageserver version string: $pageserver_version" + + if ! echo "$pageserver_version" | grep -qv 'git-env:local' ; then + echo "Pageserver version should not be the default Dockerfile one" + exit 1 + fi + + if ! echo "$pageserver_version" | grep -qv '"testing"' ; then + echo "Pageserver version should have no testing feature enabled" + exit 1 + fi + + - name: Verify docker-compose example + run: env REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh + + - name: Print logs and clean up + if: always() + run: | + docker compose -f ./docker-compose/docker-compose.yml logs || 0 + docker compose -f ./docker-compose/docker-compose.yml down + promote-images: runs-on: dev - needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ] + needs: [ tag, test-images ] if: github.event_name != 'workflow_dispatch' container: amazon/aws-cli strategy: diff --git a/docker-compose/compute_wrapper/Dockerfile b/docker-compose/compute_wrapper/Dockerfile new file mode 100644 index 0000000000..f1b1986072 --- /dev/null +++ b/docker-compose/compute_wrapper/Dockerfile @@ -0,0 +1,13 @@ +ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com +ARG COMPUTE_IMAGE=compute-node-v14 +ARG TAG=latest + +FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG + +USER root +RUN apt-get update && \ + apt-get install -y curl \ + jq \ + netcat + +USER postgres diff --git a/docker-compose/compute/shell/compute.sh b/docker-compose/compute_wrapper/shell/compute.sh similarity index 100% rename from docker-compose/compute/shell/compute.sh rename to docker-compose/compute_wrapper/shell/compute.sh diff --git a/docker-compose/compute/var/db/postgres/specs/spec.json b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json similarity index 100% rename from docker-compose/compute/var/db/postgres/specs/spec.json rename to docker-compose/compute_wrapper/var/db/postgres/specs/spec.json diff --git a/docker-compose/docker-compose.yml b/docker-compose/docker-compose.yml index 9ab775c3f9..61b53dba41 100644 --- a/docker-compose/docker-compose.yml +++ b/docker-compose/docker-compose.yml @@ -2,6 +2,7 @@ version: '3' services: etcd: + restart: always image: quay.io/coreos/etcd:v3.5.4 ports: - 2379:2379 @@ -9,7 +10,7 @@ services: environment: # This signifficantly speeds up etcd and we anyway don't data persistency there. ETCD_UNSAFE_NO_FSYNC: "1" - command: + command: - "etcd" - "--auto-compaction-mode=revision" - "--auto-compaction-retention=1" @@ -24,6 +25,7 @@ services: - "--quota-backend-bytes=134217728" # 128 MB minio: + restart: always image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z ports: - 9000:9000 @@ -41,7 +43,7 @@ services: entrypoint: - "/bin/sh" - "-c" - command: + command: - "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do echo 'Waiting to start minio...' && sleep 1; done; @@ -51,7 +53,8 @@ services: - minio pageserver: - image: neondatabase/neon:${TAG:-latest} + restart: always + image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest} environment: - BROKER_ENDPOINT='http://etcd:2379' - AWS_ACCESS_KEY_ID=minio @@ -77,7 +80,8 @@ services: - minio_create_buckets safekeeper1: - image: neondatabase/neon:${TAG:-latest} + restart: always + image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest} environment: - SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454 - SAFEKEEPER_ID=1 @@ -106,7 +110,8 @@ services: - minio_create_buckets safekeeper2: - image: neondatabase/neon:${TAG:-latest} + restart: always + image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest} environment: - SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454 - SAFEKEEPER_ID=2 @@ -135,7 +140,8 @@ services: - minio_create_buckets safekeeper3: - image: neondatabase/neon:${TAG:-latest} + restart: always + image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest} environment: - SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454 - SAFEKEEPER_ID=3 @@ -164,18 +170,21 @@ services: - minio_create_buckets compute: + restart: always build: - context: ./image/compute + context: ./compute_wrapper/ args: - - COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest} + - COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14} + - TAG=${TAG:-latest} - http_proxy=$http_proxy - https_proxy=$https_proxy environment: - PG_VERSION=${PG_VERSION:-14} #- RUST_BACKTRACE=1 + # Mount the test files directly, for faster editing cycle. volumes: - - ./compute/var/db/postgres/specs/:/var/db/postgres/specs/ - - ./compute/shell/:/shell/ + - ./compute_wrapper/var/db/postgres/specs/:/var/db/postgres/specs/ + - ./compute_wrapper/shell/:/shell/ ports: - 55433:55433 # pg protocol handler - 3080:3080 # http endpoints diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh new file mode 100755 index 0000000000..9de5277bf1 --- /dev/null +++ b/docker-compose/docker_compose_test.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# A basic test to ensure Docker images are built correctly. +# Build a wrapper around the compute, start all services and runs a simple SQL query. +# Repeats the process for all currenly supported Postgres versions. + +# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file +# Their defaults point at DockerHub `neondatabase/neon:latest` image.`, +# to verify custom image builds (e.g pre-published ones). + +# XXX: Current does not work on M1 macs due to x86_64 Docker images compiled only, and no seccomp support in M1 Docker emulation layer. + +set -eux -o pipefail + +SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +COMPOSE_FILE=$SCRIPT_DIR/docker-compose.yml + +COMPUTE_CONTAINER_NAME=docker-compose-compute-1 +SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;" +PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres" + +cleanup() { + echo "show container information" + docker ps + docker compose -f $COMPOSE_FILE logs + echo "stop containers..." + docker compose -f $COMPOSE_FILE down +} + +echo "clean up containers if exists" +cleanup + +for pg_version in 14 15; do + echo "start containers (pg_version=$pg_version)." + PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d + + echo "wait until the compute is ready. timeout after 60s. " + cnt=0 + while sleep 1; do + # check timeout + cnt=`expr $cnt + 1` + if [ $cnt -gt 60 ]; then + echo "timeout before the compute is ready." + cleanup + exit 1 + fi + + # check if the compute is ready + set +o pipefail + result=`docker compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l` + set -o pipefail + if [ $result -eq 1 ]; then + echo "OK. The compute is ready to connect." + echo "execute simple queries." + docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION" + cleanup + break + fi + done +done diff --git a/docker-compose/image/compute/Dockerfile b/docker-compose/image/compute/Dockerfile deleted file mode 100644 index 1b9d8c4900..0000000000 --- a/docker-compose/image/compute/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -ARG COMPUTE_IMAGE=compute-node-v14:latest -FROM neondatabase/${COMPUTE_IMAGE} - -USER root -RUN apt-get update && \ - apt-get install -y curl \ - jq \ - netcat - -USER postgres diff --git a/scripts/docker-compose_test.sh b/scripts/docker-compose_test.sh deleted file mode 100755 index b4551365f8..0000000000 --- a/scripts/docker-compose_test.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -# this is a shortcut script to avoid duplication in CI -set -eux -o pipefail - -SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -COMPOSE_FILE=$SCRIPT_DIR/../docker-compose/docker-compose.yml - -COMPUTE_CONTAINER_NAME=dockercompose_compute_1 -SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;" -PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres" - -cleanup() { - echo "show container information" - docker ps - docker-compose -f $COMPOSE_FILE logs - echo "stop containers..." - docker-compose -f $COMPOSE_FILE down -} - -echo "clean up containers if exists" -cleanup - -for pg_version in 14 15; do - echo "start containers (pg_version=$pg_version)." - PG_VERSION=$pg_version TAG=latest docker-compose -f $COMPOSE_FILE up --build -d - - echo "wait until the compute is ready. timeout after 60s. " - cnt=0 - while sleep 1; do - # check timeout - cnt=`expr $cnt + 1` - if [ $cnt -gt 60 ]; then - echo "timeout before the compute is ready." - cleanup - exit 1 - fi - - # check if the compute is ready - set +o pipefail - result=`docker-compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l` - set -o pipefail - if [ $result -eq 1 ]; then - echo "OK. The compute is ready to connect." - echo "execute simple queries." - docker exec -it $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION" - cleanup - break - fi - done -done