diff --git a/.circleci/ansible/ansible.cfg b/.circleci/ansible/ansible.cfg new file mode 100644 index 0000000000..e3daf3abe3 --- /dev/null +++ b/.circleci/ansible/ansible.cfg @@ -0,0 +1,10 @@ +[defaults] + +localhost_warning = False +host_key_checking = False +timeout = 30 + +[ssh_connection] +ssh_args = -F ./ansible.ssh.cfg +scp_if_ssh = True +pipelining = True diff --git a/.circleci/ansible/ansible.ssh.cfg b/.circleci/ansible/ansible.ssh.cfg new file mode 100644 index 0000000000..91f673718e --- /dev/null +++ b/.circleci/ansible/ansible.ssh.cfg @@ -0,0 +1,11 @@ +Host tele.zenith.tech + User admin + Port 3023 + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + +Host * !tele.zenith.tech + User admin + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + ProxyJump tele.zenith.tech diff --git a/.circleci/ansible/deploy.yaml b/.circleci/ansible/deploy.yaml new file mode 100644 index 0000000000..06385aa0d3 --- /dev/null +++ b/.circleci/ansible/deploy.yaml @@ -0,0 +1,174 @@ +- name: Upload Zenith binaries + hosts: pageservers:safekeepers + gather_facts: False + remote_user: admin + vars: + force_deploy: false + + tasks: + + - name: get latest version of Zenith binaries + ignore_errors: true + register: current_version_file + set_fact: + current_version: "{{ lookup('file', '.zenith_current_version') | trim }}" + tags: + - pageserver + - safekeeper + + - name: set zero value for current_version + when: current_version_file is failed + set_fact: + current_version: "0" + tags: + - pageserver + - safekeeper + + - name: get deployed version from content of remote file + ignore_errors: true + ansible.builtin.slurp: + src: /usr/local/.zenith_current_version + register: remote_version_file + tags: + - pageserver + - safekeeper + + - name: decode remote file content + when: remote_version_file is succeeded + set_fact: + remote_version: "{{ remote_version_file['content'] | b64decode | trim }}" + tags: + - pageserver + - safekeeper + + - name: set zero value for remote_version + when: remote_version_file is failed + set_fact: + remote_version: "0" + tags: + - pageserver + - safekeeper + + - name: inform about versions + debug: msg="Version to deploy - {{ current_version }}, version on storage node - {{ remote_version }}" + tags: + - pageserver + - safekeeper + + + - name: upload and extract Zenith binaries to /usr/local + when: current_version > remote_version or force_deploy + ansible.builtin.unarchive: + owner: root + group: root + src: zenith_install.tar.gz + dest: /usr/local + become: true + tags: + - pageserver + - safekeeper + - binaries + - putbinaries + +- name: Deploy pageserver + hosts: pageservers + gather_facts: False + remote_user: admin + vars: + force_deploy: false + + tasks: + - name: init pageserver + when: current_version > remote_version or force_deploy + shell: + cmd: sudo -u pageserver /usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data + args: + creates: "/storage/pageserver/data/tenants" + environment: + ZENITH_REPO_DIR: "/storage/pageserver/data" + LD_LIBRARY_PATH: "/usr/local/lib" + become: true + tags: + - pageserver + + - name: upload systemd service definition + when: current_version > remote_version or force_deploy + ansible.builtin.template: + src: systemd/pageserver.service + dest: /etc/systemd/system/pageserver.service + owner: root + group: root + mode: '0644' + become: true + tags: + - pageserver + + - name: start systemd service + when: current_version > remote_version or force_deploy + ansible.builtin.systemd: + daemon_reload: yes + name: pageserver + enabled: yes + state: restarted + become: true + tags: + - pageserver + + - name: post version to console + when: (current_version > remote_version or force_deploy) and console_mgmt_base_url is defined + shell: + cmd: | + INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) + curl -sfS -d '{"version": {{ current_version }} }' -X POST {{ console_mgmt_base_url }}/api/v1/pageservers/$INSTANCE_ID + tags: + - pageserver + +- name: Deploy safekeeper + hosts: safekeepers + gather_facts: False + remote_user: admin + vars: + force_deploy: false + + tasks: + + # in the future safekeepers should discover pageservers byself + # but currently use first pageserver that was discovered + - name: set first pageserver var for safekeepers + when: current_version > remote_version or force_deploy + set_fact: + first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}" + tags: + - safekeeper + + - name: upload systemd service definition + when: current_version > remote_version or force_deploy + ansible.builtin.template: + src: systemd/safekeeper.service + dest: /etc/systemd/system/safekeeper.service + owner: root + group: root + mode: '0644' + become: true + tags: + - safekeeper + + - name: start systemd service + when: current_version > remote_version or force_deploy + ansible.builtin.systemd: + daemon_reload: yes + name: safekeeper + enabled: yes + state: restarted + become: true + tags: + - safekeeper + + - name: post version to console + when: (current_version > remote_version or force_deploy) and console_mgmt_base_url is defined + shell: + cmd: | + INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) + curl -sfS -d '{"version": {{ current_version }} }' -X POST {{ hostvars.localhost.zenith.console_mgmt_base_url }}/api/v1/safekeepers/$INSTANCE_ID + tags: + - safekeeper diff --git a/.circleci/ansible/get_binaries.sh b/.circleci/ansible/get_binaries.sh new file mode 100755 index 0000000000..242a9e87e2 --- /dev/null +++ b/.circleci/ansible/get_binaries.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -e + +RELEASE=${RELEASE:-false} + +# look at docker hub for latest tag fo zenith docker image +if [ "${RELEASE}" = "true" ]; then + echo "search latest relase tag" + VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1) + if [ -z "${VERSION}" ]; then + echo "no any docker tags found, exiting..." + exit 1 + else + TAG="release-${VERSION}" + fi +else + echo "search latest dev tag" + VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1) + if [ -z "${VERSION}" ]; then + echo "no any docker tags found, exiting..." + exit 1 + else + TAG="${VERSION}" + fi +fi + +echo "found ${VERSION}" + +# do initial cleanup +rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version +mkdir zenith_install + +# retrive binaries from docker image +echo "getting binaries from docker image" +docker pull --quiet zenithdb/zenith:${TAG} +ID=$(docker create zenithdb/zenith:${TAG}) +docker cp ${ID}:/data/postgres_install.tar.gz . +tar -xzf postgres_install.tar.gz -C zenith_install +docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/ +docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/ +docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/ +docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/ +docker rm -vf ${ID} + +# store version to file (for ansible playbooks) and create binaries tarball +echo ${VERSION} > zenith_install/.zenith_current_version +echo ${VERSION} > .zenith_current_version +tar -czf zenith_install.tar.gz -C zenith_install . + +# do final cleaup +rm -rf zenith_install postgres_install.tar.gz diff --git a/.circleci/ansible/production.hosts b/.circleci/ansible/production.hosts new file mode 100644 index 0000000000..c5b4f664a6 --- /dev/null +++ b/.circleci/ansible/production.hosts @@ -0,0 +1,7 @@ +[pageservers] +zenith-1-ps-1 + +[safekeepers] +zenith-1-sk-1 +zenith-1-sk-2 +zenith-1-sk-3 diff --git a/.circleci/ansible/staging.hosts b/.circleci/ansible/staging.hosts new file mode 100644 index 0000000000..e625120bf3 --- /dev/null +++ b/.circleci/ansible/staging.hosts @@ -0,0 +1,7 @@ +[pageservers] +zenith-us-stage-ps-1 + +[safekeepers] +zenith-us-stage-sk-1 +zenith-us-stage-sk-2 +zenith-us-stage-sk-3 diff --git a/.circleci/ansible/systemd/pageserver.service b/.circleci/ansible/systemd/pageserver.service new file mode 100644 index 0000000000..d346643e58 --- /dev/null +++ b/.circleci/ansible/systemd/pageserver.service @@ -0,0 +1,18 @@ +[Unit] +Description=Zenith pageserver +After=network.target auditd.service + +[Service] +Type=simple +User=pageserver +Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib +ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /storage/pageserver/data +ExecReload=/bin/kill -HUP $MAINPID +KillMode=mixed +KillSignal=SIGINT +Restart=on-failure +TimeoutSec=10 +LimitNOFILE=30000000 + +[Install] +WantedBy=multi-user.target diff --git a/.circleci/ansible/systemd/safekeeper.service b/.circleci/ansible/systemd/safekeeper.service new file mode 100644 index 0000000000..e75602b609 --- /dev/null +++ b/.circleci/ansible/systemd/safekeeper.service @@ -0,0 +1,18 @@ +[Unit] +Description=Zenith safekeeper +After=network.target auditd.service + +[Service] +Type=simple +User=safekeeper +Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib +ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data +ExecReload=/bin/kill -HUP $MAINPID +KillMode=mixed +KillSignal=SIGINT +Restart=on-failure +TimeoutSec=10 +LimitNOFILE=30000000 + +[Install] +WantedBy=multi-user.target diff --git a/.circleci/config.yml b/.circleci/config.yml index 73c487c301..db9fc31334 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -471,46 +471,78 @@ jobs: docker build -t zenithdb/compute-node:latest vendor/postgres && docker push zenithdb/compute-node:latest docker tag zenithdb/compute-node:latest zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG} + # Build production zenithdb/zenith:release image and push it to Docker hub + docker-image-release: + docker: + - image: cimg/base:2021.04 + steps: + - checkout + - setup_remote_docker: + docker_layer_caching: true + - run: + name: Init postgres submodule + command: git submodule update --init --depth 1 + - run: + name: Build and push Docker image + command: | + echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin + DOCKER_TAG="release-$(git log --oneline|wc -l)" + docker build --build-arg GIT_VERSION=$CIRCLE_SHA1 -t zenithdb/zenith:release . && docker push zenithdb/zenith:release + docker tag zenithdb/zenith:release zenithdb/zenith:${DOCKER_TAG} && docker push zenithdb/zenith:${DOCKER_TAG} + + # Build production zenithdb/compute-node:release image and push it to Docker hub + docker-image-compute-release: + docker: + - image: cimg/base:2021.04 + steps: + - checkout + - setup_remote_docker: + docker_layer_caching: true + # Build zenithdb/compute-tools:release image and push it to Docker hub + # TODO: this should probably also use versioned tag, not just :latest. + # XXX: but should it? We build and use it only locally now. + - run: + name: Build and push compute-tools Docker image + command: | + echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin + docker build -t zenithdb/compute-tools:release -f Dockerfile.compute-tools . + docker push zenithdb/compute-tools:release + - run: + name: Init postgres submodule + command: git submodule update --init --depth 1 + - run: + name: Build and push compute-node Docker image + command: | + echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin + DOCKER_TAG="release-$(git log --oneline|wc -l)" + docker build -t zenithdb/compute-node:release vendor/postgres && docker push zenithdb/compute-node:release + docker tag zenithdb/compute-node:release zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG} + deploy-staging: docker: - image: cimg/python:3.10 steps: - checkout - setup_remote_docker - - run: - name: Get Zenith binaries - command: | - rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz - mkdir zenith_install - DOCKER_TAG=$(git log --oneline|wc -l) - docker pull --quiet zenithdb/zenith:${DOCKER_TAG} - ID=$(docker create zenithdb/zenith:${DOCKER_TAG}) - docker cp $ID:/data/postgres_install.tar.gz . - tar -xzf postgres_install.tar.gz -C zenith_install && rm postgres_install.tar.gz - docker cp $ID:/usr/local/bin/pageserver zenith_install/bin/ - docker cp $ID:/usr/local/bin/safekeeper zenith_install/bin/ - docker cp $ID:/usr/local/bin/proxy zenith_install/bin/ - docker cp $ID:/usr/local/bin/postgres zenith_install/bin/ - docker rm -v $ID - echo ${DOCKER_TAG} | tee zenith_install/.zenith_current_version - tar -czf zenith_install.tar.gz -C zenith_install . - ls -la zenith_install.tar.gz - run: name: Setup ansible command: | pip install --progress-bar off --user ansible boto3 - ansible-galaxy collection install amazon.aws - run: - name: Apply re-deploy playbook - environment: - ANSIBLE_HOST_KEY_CHECKING: false + name: Redeploy command: | - echo "${STAGING_SSH_KEY}" | base64 --decode | ssh-add - - export AWS_REGION=${STAGING_AWS_REGION} - export AWS_ACCESS_KEY_ID=${STAGING_AWS_ACCESS_KEY_ID} - export AWS_SECRET_ACCESS_KEY=${STAGING_AWS_SECRET_ACCESS_KEY} - ansible-playbook .circleci/storage-redeploy.playbook.yml - rm -f zenith_install.tar.gz + cd "$(pwd)/.circleci/ansible" + + ./get_binaries.sh + + echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key + echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub + chmod 0600 ssh-key + ssh-add ssh-key + rm -f ssh-key ssh-key-cert.pub + + ansible-playbook deploy.yaml -i staging.hosts + rm -f zenith_install.tar.gz .zenith_current_version deploy-staging-proxy: docker: @@ -533,7 +565,57 @@ jobs: name: Re-deploy proxy command: | DOCKER_TAG=$(git log --oneline|wc -l) - helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/proxy.staging.yaml --set image.tag=${DOCKER_TAG} --wait + helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait + + + deploy-release: + docker: + - image: cimg/python:3.10 + steps: + - checkout + - setup_remote_docker + - run: + name: Setup ansible + command: | + pip install --progress-bar off --user ansible boto3 + - run: + name: Redeploy + command: | + cd "$(pwd)/.circleci/ansible" + + RELEASE=true ./get_binaries.sh + + echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key + echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub + chmod 0600 ssh-key + ssh-add ssh-key + rm -f ssh-key ssh-key-cert.pub + + ansible-playbook deploy.yaml -i production.hosts -e console_mgmt_base_url=http://console-release.local + rm -f zenith_install.tar.gz .zenith_current_version + + deploy-release-proxy: + docker: + - image: cimg/base:2021.04 + environment: + KUBECONFIG: .kubeconfig + steps: + - checkout + - run: + name: Store kubeconfig file + command: | + echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG} + chmod 0600 ${KUBECONFIG} + - run: + name: Setup helm v3 + command: | + curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + helm repo add zenithdb https://zenithdb.github.io/helm-charts + - run: + name: Re-deploy proxy + command: | + DOCKER_TAG="release-$(git log --oneline|wc -l)" + helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait # Trigger a new remote CI job remote-ci-trigger: @@ -669,6 +751,47 @@ workflows: - main requires: - docker-image + + - docker-image-release: + # Context gives an ability to login + context: Docker Hub + # Build image only for commits to main + filters: + branches: + only: + - release + requires: + - pg_regress-tests-release + - other-tests-release + - docker-image-compute-release: + # Context gives an ability to login + context: Docker Hub + # Build image only for commits to main + filters: + branches: + only: + - release + requires: + - pg_regress-tests-release + - other-tests-release + - deploy-release: + # Context gives an ability to login + context: Docker Hub + # deploy only for commits to main + filters: + branches: + only: + - release + requires: + - docker-image-release + - deploy-release-proxy: + # deploy only for commits to main + filters: + branches: + only: + - release + requires: + - docker-image-release - remote-ci-trigger: # Context passes credentials for gh api context: CI_ACCESS_TOKEN diff --git a/.circleci/helm-values/production.proxy.yaml b/.circleci/helm-values/production.proxy.yaml new file mode 100644 index 0000000000..27aa169c79 --- /dev/null +++ b/.circleci/helm-values/production.proxy.yaml @@ -0,0 +1,35 @@ +# Helm chart values for zenith-proxy. +# This is a YAML-formatted file. + +settings: + authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/" + uri: "https://console.zenith.tech/psql_session/" + +# -- Additional labels for zenith-proxy pods +podLabels: + zenith_service: proxy + zenith_env: production + zenith_region: us-west-2 + zenith_region_slug: oregon + +service: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internal + external-dns.alpha.kubernetes.io/hostname: proxy-release.local + type: LoadBalancer + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: start.zenith.tech + +metrics: + enabled: true + serviceMonitor: + enabled: true + selector: + release: kube-prometheus-stack diff --git a/.circleci/proxy.staging.yaml b/.circleci/helm-values/staging.proxy.yaml similarity index 100% rename from .circleci/proxy.staging.yaml rename to .circleci/helm-values/staging.proxy.yaml diff --git a/.circleci/storage-redeploy.playbook.yml b/.circleci/storage-redeploy.playbook.yml deleted file mode 100644 index 8173d81521..0000000000 --- a/.circleci/storage-redeploy.playbook.yml +++ /dev/null @@ -1,138 +0,0 @@ -- name: discover storage nodes - hosts: localhost - connection: local - gather_facts: False - - tasks: - - - name: discover safekeepers - no_log: true - ec2_instance_info: - filters: - "tag:zenith_env": "staging" - "tag:zenith_service": "safekeeper" - register: ec2_safekeepers - - - name: discover pageservers - no_log: true - ec2_instance_info: - filters: - "tag:zenith_env": "staging" - "tag:zenith_service": "pageserver" - register: ec2_pageservers - - - name: add safekeepers to host group - no_log: true - add_host: - name: safekeeper-{{ ansible_loop.index }} - ansible_host: "{{ item.public_ip_address }}" - groups: - - storage - - safekeepers - with_items: "{{ ec2_safekeepers.instances }}" - loop_control: - extended: yes - - - name: add pageservers to host group - no_log: true - add_host: - name: pageserver-{{ ansible_loop.index }} - ansible_host: "{{ item.public_ip_address }}" - groups: - - storage - - pageservers - with_items: "{{ ec2_pageservers.instances }}" - loop_control: - extended: yes - -- name: Retrive versions - hosts: storage - gather_facts: False - remote_user: admin - - tasks: - - - name: Get current version of binaries - set_fact: - current_version: "{{lookup('file', '../zenith_install/.zenith_current_version') }}" - - - name: Check that file with version exists on host - stat: - path: /usr/local/.zenith_current_version - register: version_file - - - name: Try to get current version from the host - when: version_file.stat.exists - ansible.builtin.fetch: - src: /usr/local/.zenith_current_version - dest: .remote_version.{{ inventory_hostname }} - fail_on_missing: no - flat: yes - - - name: Store remote version to variable - when: version_file.stat.exists - set_fact: - remote_version: "{{ lookup('file', '.remote_version.{{ inventory_hostname }}') }}" - - - name: Store default value of remote version to variable in case when remote version file not found - when: not version_file.stat.exists - set_fact: - remote_version: "000" - -- name: Extract Zenith binaries - hosts: storage - gather_facts: False - remote_user: admin - - tasks: - - - name: Inform about version conflict - when: current_version <= remote_version - debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}" - - - name: Extract Zenith binaries to /usr/local - when: current_version > remote_version - ansible.builtin.unarchive: - src: ../zenith_install.tar.gz - dest: /usr/local - become: true - -- name: Restart safekeepers - hosts: safekeepers - gather_facts: False - remote_user: admin - - tasks: - - - name: Inform about version conflict - when: current_version <= remote_version - debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}" - - - name: Restart systemd service - when: current_version > remote_version - ansible.builtin.systemd: - daemon_reload: yes - name: safekeeper - enabled: yes - state: restarted - become: true - -- name: Restart pageservers - hosts: pageservers - gather_facts: False - remote_user: admin - - tasks: - - - name: Inform about version conflict - when: current_version <= remote_version - debug: msg="Current version {{ current_version }} LE than remote {{ remote_version }}" - - - name: Restart systemd service - when: current_version > remote_version - ansible.builtin.systemd: - daemon_reload: yes - name: pageserver - enabled: yes - state: restarted - become: true