From 5d6553d41d383459ef5fb9ebfc1199faed978ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lassi=20P=C3=B6l=C3=B6nen?= Date: Sun, 16 Oct 2022 14:37:10 +0300 Subject: [PATCH] Fix pageserver configuration generation bug (#2584) * We had an issue with `lineinfile` usage for pageserver configuration file: if the S3 bucket related values were changed, it would have resulted in duplicate keys, resulting in invalid toml. So to fix the issue, we should keep the configuration in structured format (yaml in this case) so we can always generate syntactically correct toml. Inventories are converted to yaml just so that it's easier to maintain the configuration there. Another alternative would have been a separate variable files. * Keep the ansible collections dir, but locally installed collections should not be tracked. --- .github/ansible/.gitignore | 3 ++ .github/ansible/ansible.cfg | 1 + .github/ansible/collections/.keep | 0 .github/ansible/deploy.yaml | 35 ++++++++++++----- .github/ansible/neon-stress.hosts | 20 ---------- .github/ansible/neon-stress.hosts.yaml | 30 +++++++++++++++ .github/ansible/production.hosts | 20 ---------- .github/ansible/production.hosts.yaml | 31 +++++++++++++++ .github/ansible/staging.hosts | 25 ------------ .github/ansible/staging.hosts.yaml | 40 ++++++++++++++++++++ .github/ansible/templates/pageserver.toml.j2 | 1 + .github/workflows/build_and_test.yml | 6 +-- 12 files changed, 134 insertions(+), 78 deletions(-) create mode 100644 .github/ansible/collections/.keep delete mode 100644 .github/ansible/neon-stress.hosts create mode 100644 .github/ansible/neon-stress.hosts.yaml delete mode 100644 .github/ansible/production.hosts create mode 100644 .github/ansible/production.hosts.yaml delete mode 100644 .github/ansible/staging.hosts create mode 100644 .github/ansible/staging.hosts.yaml create mode 100644 .github/ansible/templates/pageserver.toml.j2 diff --git a/.github/ansible/.gitignore b/.github/ansible/.gitignore index 441d9a8b82..e3454fd43c 100644 --- a/.github/ansible/.gitignore +++ b/.github/ansible/.gitignore @@ -2,3 +2,6 @@ zenith_install.tar.gz .zenith_current_version neon_install.tar.gz .neon_current_version + +collections/* +!collections/.keep diff --git a/.github/ansible/ansible.cfg b/.github/ansible/ansible.cfg index 5818a64455..0497ee401d 100644 --- a/.github/ansible/ansible.cfg +++ b/.github/ansible/ansible.cfg @@ -3,6 +3,7 @@ localhost_warning = False host_key_checking = False timeout = 30 +collections_paths = ./collections [ssh_connection] ssh_args = -F ./ansible.ssh.cfg diff --git a/.github/ansible/collections/.keep b/.github/ansible/collections/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.github/ansible/deploy.yaml b/.github/ansible/deploy.yaml index e206f9d5ba..bfd3fd123d 100644 --- a/.github/ansible/deploy.yaml +++ b/.github/ansible/deploy.yaml @@ -14,7 +14,8 @@ - safekeeper - name: inform about versions - debug: msg="Version to deploy - {{ current_version }}" + debug: + msg: "Version to deploy - {{ current_version }}" tags: - pageserver - safekeeper @@ -63,15 +64,29 @@ tags: - pageserver - - name: update remote storage (s3) config - lineinfile: - path: /storage/pageserver/data/pageserver.toml - line: "{{ item }}" - loop: - - "[remote_storage]" - - "bucket_name = '{{ bucket_name }}'" - - "bucket_region = '{{ bucket_region }}'" - - "prefix_in_bucket = '{{ inventory_hostname }}'" + - name: read the existing remote pageserver config + ansible.builtin.slurp: + src: /storage/pageserver/data/pageserver.toml + register: _remote_ps_config + tags: + - pageserver + + - name: parse the existing pageserver configuration + ansible.builtin.set_fact: + _existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}" + tags: + - pageserver + + - name: construct the final pageserver configuration dict + ansible.builtin.set_fact: + pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}" + tags: + - pageserver + + - name: template the pageserver config + template: + src: templates/pageserver.toml.j2 + dest: /storage/pageserver/data/pageserver.toml become: true tags: - pageserver diff --git a/.github/ansible/neon-stress.hosts b/.github/ansible/neon-stress.hosts deleted file mode 100644 index c1bc8243f8..0000000000 --- a/.github/ansible/neon-stress.hosts +++ /dev/null @@ -1,20 +0,0 @@ -[pageservers] -neon-stress-ps-1 console_region_id=1 -neon-stress-ps-2 console_region_id=1 - -[safekeepers] -neon-stress-sk-1 console_region_id=1 -neon-stress-sk-2 console_region_id=1 -neon-stress-sk-3 console_region_id=1 - -[storage:children] -pageservers -safekeepers - -[storage:vars] -env_name = neon-stress -console_mgmt_base_url = http://neon-stress-console.local -bucket_name = neon-storage-ireland -bucket_region = eu-west-1 -etcd_endpoints = neon-stress-etcd.local:2379 -safekeeper_enable_s3_offload = false diff --git a/.github/ansible/neon-stress.hosts.yaml b/.github/ansible/neon-stress.hosts.yaml new file mode 100644 index 0000000000..d4c77e7ada --- /dev/null +++ b/.github/ansible/neon-stress.hosts.yaml @@ -0,0 +1,30 @@ +storage: + vars: + bucket_name: neon-storage-ireland + bucket_region: eu-west-1 + console_mgmt_base_url: http://neon-stress-console.local + env_name: neon-stress + etcd_endpoints: neon-stress-etcd.local:2379 + safekeeper_enable_s3_offload: 'false' + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "{{ inventory_hostname }}" + + children: + pageservers: + hosts: + neon-stress-ps-1: + console_region_id: 1 + neon-stress-ps-2: + console_region_id: 1 + safekeepers: + hosts: + neon-stress-sk-1: + console_region_id: 1 + neon-stress-sk-2: + console_region_id: 1 + neon-stress-sk-3: + console_region_id: 1 diff --git a/.github/ansible/production.hosts b/.github/ansible/production.hosts deleted file mode 100644 index 364e8ed50e..0000000000 --- a/.github/ansible/production.hosts +++ /dev/null @@ -1,20 +0,0 @@ -[pageservers] -#zenith-1-ps-1 console_region_id=1 -zenith-1-ps-2 console_region_id=1 -zenith-1-ps-3 console_region_id=1 - -[safekeepers] -zenith-1-sk-1 console_region_id=1 -zenith-1-sk-2 console_region_id=1 -zenith-1-sk-3 console_region_id=1 - -[storage:children] -pageservers -safekeepers - -[storage:vars] -env_name = prod-1 -console_mgmt_base_url = http://console-release.local -bucket_name = zenith-storage-oregon -bucket_region = us-west-2 -etcd_endpoints = zenith-1-etcd.local:2379 diff --git a/.github/ansible/production.hosts.yaml b/.github/ansible/production.hosts.yaml new file mode 100644 index 0000000000..c276ca3805 --- /dev/null +++ b/.github/ansible/production.hosts.yaml @@ -0,0 +1,31 @@ +--- +storage: + vars: + env_name: prod-1 + console_mgmt_base_url: http://console-release.local + bucket_name: zenith-storage-oregon + bucket_region: us-west-2 + etcd_endpoints: zenith-1-etcd.local:2379 + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "{{ inventory_hostname }}" + + children: + pageservers: + hosts: + zenith-1-ps-2: + console_region_id: 1 + zenith-1-ps-3: + console_region_id: 1 + + safekeepers: + hosts: + zenith-1-sk-1: + console_region_id: 1 + zenith-1-sk-2: + console_region_id: 1 + zenith-1-sk-3: + console_region_id: 1 diff --git a/.github/ansible/staging.hosts b/.github/ansible/staging.hosts deleted file mode 100644 index f5accc188a..0000000000 --- a/.github/ansible/staging.hosts +++ /dev/null @@ -1,25 +0,0 @@ -[pageservers] -#zenith-us-stage-ps-1 console_region_id=27 -zenith-us-stage-ps-2 console_region_id=27 -zenith-us-stage-ps-3 console_region_id=27 -zenith-us-stage-ps-4 console_region_id=27 -zenith-us-stage-test-ps-1 console_region_id=28 - -[safekeepers] -zenith-us-stage-sk-4 console_region_id=27 -zenith-us-stage-sk-5 console_region_id=27 -zenith-us-stage-sk-6 console_region_id=27 -zenith-us-stage-test-sk-1 console_region_id=28 -zenith-us-stage-test-sk-2 console_region_id=28 -zenith-us-stage-test-sk-3 console_region_id=28 - -[storage:children] -pageservers -safekeepers - -[storage:vars] -env_name = us-stage -console_mgmt_base_url = http://console-staging.local -bucket_name = zenith-staging-storage-us-east-1 -bucket_region = us-east-1 -etcd_endpoints = zenith-us-stage-etcd.local:2379 diff --git a/.github/ansible/staging.hosts.yaml b/.github/ansible/staging.hosts.yaml new file mode 100644 index 0000000000..a3534ed5ce --- /dev/null +++ b/.github/ansible/staging.hosts.yaml @@ -0,0 +1,40 @@ +storage: + vars: + bucket_name: zenith-staging-storage-us-east-1 + bucket_region: us-east-1 + console_mgmt_base_url: http://console-staging.local + env_name: us-stage + etcd_endpoints: zenith-us-stage-etcd.local:2379 + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "{{ inventory_hostname }}" + + children: + pageservers: + hosts: + zenith-us-stage-ps-2: + console_region_id: 27 + zenith-us-stage-ps-3: + console_region_id: 27 + zenith-us-stage-ps-4: + console_region_id: 27 + zenith-us-stage-test-ps-1: + console_region_id: 28 + + safekeepers: + hosts: + zenith-us-stage-sk-4: + console_region_id: 27 + zenith-us-stage-sk-5: + console_region_id: 27 + zenith-us-stage-sk-6: + console_region_id: 27 + zenith-us-stage-test-sk-1: + console_region_id: 28 + zenith-us-stage-test-sk-2: + console_region_id: 28 + zenith-us-stage-test-sk-3: + console_region_id: 28 diff --git a/.github/ansible/templates/pageserver.toml.j2 b/.github/ansible/templates/pageserver.toml.j2 new file mode 100644 index 0000000000..7b0857d5e0 --- /dev/null +++ b/.github/ansible/templates/pageserver.toml.j2 @@ -0,0 +1 @@ +{{ pageserver_config | sivel.toiletwater.to_toml }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 69b17113ed..e8d724581d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -712,7 +712,7 @@ jobs: - name: Setup ansible run: | export PATH="/root/.local/bin:$PATH" - pip install --progress-bar off --user ansible boto3 + pip install --progress-bar off --user ansible boto3 toml - name: Redeploy run: | @@ -734,8 +734,8 @@ jobs: chmod 0600 ssh-key ssh-add ssh-key rm -f ssh-key ssh-key-cert.pub - - ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts + ansible-galaxy collection install sivel.toiletwater + ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml rm -f neon_install.tar.gz .neon_current_version deploy-proxy: