mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Make sk_collect_dumps also possible with teleport (#4739)
Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
This commit is contained in:
committed by
GitHub
parent
f260f1565e
commit
0f56104a61
2
scripts/sk_collect_dumps/.gitignore
vendored
2
scripts/sk_collect_dumps/.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
result
|
||||
*.json
|
||||
hosts
|
||||
poetry.lock
|
||||
|
||||
11
scripts/sk_collect_dumps/ansible.cfg
Normal file
11
scripts/sk_collect_dumps/ansible.cfg
Normal file
@@ -0,0 +1,11 @@
|
||||
[defaults]
|
||||
host_key_checking = False
|
||||
inventory=./hosts
|
||||
remote_tmp=/tmp
|
||||
remote_user=developer
|
||||
callbacks_enabled = profile_tasks
|
||||
|
||||
[ssh_connection]
|
||||
scp_if_ssh = True
|
||||
ssh_args = -F ./ssh.cfg
|
||||
pipelining = True
|
||||
16
scripts/sk_collect_dumps/pyproject.toml
Normal file
16
scripts/sk_collect_dumps/pyproject.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[tool.poetry]
|
||||
name = "sk-collect-dumps"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Arseny Sher <sher-ars@yandex.ru>"]
|
||||
readme = "README.md"
|
||||
packages = [{include = "sk_collect_dumps"}]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
ansible = "^9.1.0"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -1,25 +1,43 @@
|
||||
# Collect /v1/debug_dump from all safekeeper nodes
|
||||
|
||||
1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
|
||||
2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
|
||||
|
||||
## How to use ansible (staging)
|
||||
|
||||
3. Issue admin token (add/remove .stage from url for staging/prod and setting proper API key):
|
||||
```
|
||||
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
# staging:
|
||||
AUTH_TOKEN=$(curl https://console.stage.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_STAGING_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
|
||||
# prod:
|
||||
AUTH_TOKEN=$(curl https://console.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_PROD_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
|
||||
# check
|
||||
echo $AUTH_TOKEN
|
||||
```
|
||||
2. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
|
||||
|
||||
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
There are two ways to do that, with ssm or tsh. ssm:
|
||||
```
|
||||
# in aws repo, cd .github/ansible and run e.g. (adjusting profile and region in vars and limit):
|
||||
AWS_DEFAULT_PROFILE=dev ansible-playbook -i inventory_aws_ec2.yaml -i staging.us-east-2.vars.yaml -e @ssm_config -l 'safekeeper:&us_east_2' -e "auth_token=${AUTH_TOKEN}" ~/neon/neon/scripts/sk_collect_dumps/remote.yaml
|
||||
```
|
||||
It will put the results to .results directory *near the playbook*.
|
||||
|
||||
tsh:
|
||||
|
||||
Update the inventory, if needed, selecting .build/.tech and optionally region:
|
||||
```
|
||||
rm -f hosts && echo '[safekeeper]' >> hosts
|
||||
# staging:
|
||||
tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.build" | grep us-east-2 >> hosts
|
||||
# prod:
|
||||
tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.tech" | grep us-east-2 >> hosts
|
||||
```
|
||||
|
||||
## How to use ansible (prod)
|
||||
|
||||
Test ansible connection:
|
||||
```
|
||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
|
||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
|
||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
|
||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
||||
ansible all -m ping -v
|
||||
```
|
||||
|
||||
Download the dumps:
|
||||
```
|
||||
mkdir -p result && rm -f result/*
|
||||
ansible-playbook -e "auth_token=${AUTH_TOKEN}" remote.yaml
|
||||
```
|
||||
|
||||
3. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
|
||||
|
||||
@@ -1,18 +1,37 @@
|
||||
- name: Fetch state dumps from safekeepers
|
||||
hosts: safekeepers
|
||||
hosts: safekeeper
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
|
||||
tasks:
|
||||
- name: Download file
|
||||
- name: Dump file
|
||||
get_url:
|
||||
url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
|
||||
dest: "/tmp/{{ inventory_hostname }}.json"
|
||||
dest: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||
headers:
|
||||
Authorization: "Bearer {{ auth_token }}"
|
||||
|
||||
- name: Fetch file from remote hosts
|
||||
- name: install rsync
|
||||
ansible.builtin.apt:
|
||||
name: rsync
|
||||
update_cache: yes
|
||||
become: yes
|
||||
ignore_errors: true # it can be already installed and we don't always have sudo
|
||||
|
||||
- name: Fetch file from remote hosts (works only with ssm)
|
||||
fetch:
|
||||
src: "/tmp/{{ inventory_hostname }}.json"
|
||||
dest: "./result/{{ inventory_hostname }}.json"
|
||||
src: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||
dest: "./result/{{ inventory_hostname }}-dump.json"
|
||||
flat: yes
|
||||
fail_on_missing: no
|
||||
when: ansible_connection == "aws_ssm"
|
||||
|
||||
# xxx not sure how to make ansible 'synchronize' work with tsh
|
||||
- name: Fetch file from remote hosts
|
||||
shell: rsync -e 'tsh ssh' -azvP "developer@{{ inventory_hostname }}:/tmp/{{ inventory_hostname }}-dump.json" "./result/{{ inventory_hostname }}-dump.json"
|
||||
delegate_to: localhost
|
||||
when: ansible_connection != "aws_ssm"
|
||||
|
||||
- name: remove remote dumps
|
||||
ansible.builtin.file:
|
||||
path: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||
state: absent
|
||||
|
||||
13
scripts/sk_collect_dumps/ssh.cfg
Normal file
13
scripts/sk_collect_dumps/ssh.cfg
Normal file
@@ -0,0 +1,13 @@
|
||||
# Begin generated Teleport configuration for teleport.aws.neon.tech by tsh
|
||||
|
||||
# Common flags for all teleport.aws.neon.tech hosts
|
||||
Host *
|
||||
HostKeyAlgorithms rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-rsa-cert-v01@openssh.com
|
||||
|
||||
# Flags for all teleport.aws.neon.tech hosts except the proxy
|
||||
Host * !teleport.aws.neon.tech
|
||||
Port 3022
|
||||
ProxyCommand "/usr/local/bin/tsh" proxy ssh --cluster=teleport.aws.neon.tech --proxy=teleport.aws.neon.tech:443 %r@%h:%p
|
||||
User developer
|
||||
|
||||
# End generated Teleport configuration
|
||||
@@ -31,22 +31,22 @@ SELECT
|
||||
(data->>'tenant_id') AS tenant_id,
|
||||
(data->>'timeline_id') AS timeline_id,
|
||||
(data->'memory'->>'active')::bool AS active,
|
||||
(data->'memory'->>'flush_lsn')::bigint AS flush_lsn,
|
||||
(data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn,
|
||||
(data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn,
|
||||
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn,
|
||||
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn,
|
||||
(data->'memory'->>'write_lsn')::bigint AS write_lsn,
|
||||
(data->'memory'->>'flush_lsn')::pg_lsn AS flush_lsn,
|
||||
(data->'memory'->'mem_state'->>'backup_lsn')::pg_lsn AS backup_lsn,
|
||||
(data->'memory'->'mem_state'->>'commit_lsn')::pg_lsn AS commit_lsn,
|
||||
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::pg_lsn AS peer_horizon_lsn,
|
||||
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::pg_lsn AS remote_consistent_lsn,
|
||||
(data->'memory'->>'write_lsn')::pg_lsn AS write_lsn,
|
||||
(data->'memory'->>'num_computes')::bigint AS num_computes,
|
||||
(data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn,
|
||||
(data->'memory'->>'epoch_start_lsn')::pg_lsn AS epoch_start_lsn,
|
||||
(data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
|
||||
(data->'memory'->>'is_cancelled')::bool AS is_cancelled,
|
||||
(data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn,
|
||||
(data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn,
|
||||
(data->'control_file'->>'backup_lsn')::pg_lsn AS disk_backup_lsn,
|
||||
(data->'control_file'->>'commit_lsn')::pg_lsn AS disk_commit_lsn,
|
||||
(data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
|
||||
(data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn,
|
||||
(data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn,
|
||||
(data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn,
|
||||
(data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn
|
||||
(data->'control_file'->>'local_start_lsn')::pg_lsn AS local_start_lsn,
|
||||
(data->'control_file'->>'peer_horizon_lsn')::pg_lsn AS disk_peer_horizon_lsn,
|
||||
(data->'control_file'->>'timeline_start_lsn')::pg_lsn AS timeline_start_lsn,
|
||||
(data->'control_file'->>'remote_consistent_lsn')::pg_lsn AS disk_remote_consistent_lsn
|
||||
FROM tmp_json
|
||||
EOF
|
||||
|
||||
Reference in New Issue
Block a user