mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
Make sk_collect_dumps also possible with teleport (#4739)
Co-authored-by: Arseny Sher <sher-ars@yandex.ru>
This commit is contained in:
committed by
GitHub
parent
f260f1565e
commit
0f56104a61
2
scripts/sk_collect_dumps/.gitignore
vendored
2
scripts/sk_collect_dumps/.gitignore
vendored
@@ -1,2 +1,4 @@
|
|||||||
result
|
result
|
||||||
*.json
|
*.json
|
||||||
|
hosts
|
||||||
|
poetry.lock
|
||||||
|
|||||||
11
scripts/sk_collect_dumps/ansible.cfg
Normal file
11
scripts/sk_collect_dumps/ansible.cfg
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[defaults]
|
||||||
|
host_key_checking = False
|
||||||
|
inventory=./hosts
|
||||||
|
remote_tmp=/tmp
|
||||||
|
remote_user=developer
|
||||||
|
callbacks_enabled = profile_tasks
|
||||||
|
|
||||||
|
[ssh_connection]
|
||||||
|
scp_if_ssh = True
|
||||||
|
ssh_args = -F ./ssh.cfg
|
||||||
|
pipelining = True
|
||||||
16
scripts/sk_collect_dumps/pyproject.toml
Normal file
16
scripts/sk_collect_dumps/pyproject.toml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "sk-collect-dumps"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = ""
|
||||||
|
authors = ["Arseny Sher <sher-ars@yandex.ru>"]
|
||||||
|
readme = "README.md"
|
||||||
|
packages = [{include = "sk_collect_dumps"}]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.11"
|
||||||
|
ansible = "^9.1.0"
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
@@ -1,25 +1,43 @@
|
|||||||
# Collect /v1/debug_dump from all safekeeper nodes
|
# Collect /v1/debug_dump from all safekeeper nodes
|
||||||
|
|
||||||
1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
|
3. Issue admin token (add/remove .stage from url for staging/prod and setting proper API key):
|
||||||
2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
|
|
||||||
|
|
||||||
## How to use ansible (staging)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
# staging:
|
||||||
|
AUTH_TOKEN=$(curl https://console.stage.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_STAGING_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
|
||||||
|
# prod:
|
||||||
|
AUTH_TOKEN=$(curl https://console.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_PROD_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
|
||||||
|
# check
|
||||||
|
echo $AUTH_TOKEN
|
||||||
|
```
|
||||||
|
2. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
|
||||||
|
|
||||||
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
There are two ways to do that, with ssm or tsh. ssm:
|
||||||
|
```
|
||||||
|
# in aws repo, cd .github/ansible and run e.g. (adjusting profile and region in vars and limit):
|
||||||
|
AWS_DEFAULT_PROFILE=dev ansible-playbook -i inventory_aws_ec2.yaml -i staging.us-east-2.vars.yaml -e @ssm_config -l 'safekeeper:&us_east_2' -e "auth_token=${AUTH_TOKEN}" ~/neon/neon/scripts/sk_collect_dumps/remote.yaml
|
||||||
|
```
|
||||||
|
It will put the results to .results directory *near the playbook*.
|
||||||
|
|
||||||
|
tsh:
|
||||||
|
|
||||||
|
Update the inventory, if needed, selecting .build/.tech and optionally region:
|
||||||
|
```
|
||||||
|
rm -f hosts && echo '[safekeeper]' >> hosts
|
||||||
|
# staging:
|
||||||
|
tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.build" | grep us-east-2 >> hosts
|
||||||
|
# prod:
|
||||||
|
tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.tech" | grep us-east-2 >> hosts
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to use ansible (prod)
|
Test ansible connection:
|
||||||
|
|
||||||
```
|
```
|
||||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
ansible all -m ping -v
|
||||||
|
|
||||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
|
||||||
|
|
||||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
|
||||||
|
|
||||||
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Download the dumps:
|
||||||
|
```
|
||||||
|
mkdir -p result && rm -f result/*
|
||||||
|
ansible-playbook -e "auth_token=${AUTH_TOKEN}" remote.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
|
||||||
|
|||||||
@@ -1,18 +1,37 @@
|
|||||||
- name: Fetch state dumps from safekeepers
|
- name: Fetch state dumps from safekeepers
|
||||||
hosts: safekeepers
|
hosts: safekeeper
|
||||||
gather_facts: False
|
gather_facts: False
|
||||||
remote_user: "{{ remote_user }}"
|
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: Download file
|
- name: Dump file
|
||||||
get_url:
|
get_url:
|
||||||
url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
|
url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
|
||||||
dest: "/tmp/{{ inventory_hostname }}.json"
|
dest: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||||
|
headers:
|
||||||
|
Authorization: "Bearer {{ auth_token }}"
|
||||||
|
|
||||||
- name: Fetch file from remote hosts
|
- name: install rsync
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: rsync
|
||||||
|
update_cache: yes
|
||||||
|
become: yes
|
||||||
|
ignore_errors: true # it can be already installed and we don't always have sudo
|
||||||
|
|
||||||
|
- name: Fetch file from remote hosts (works only with ssm)
|
||||||
fetch:
|
fetch:
|
||||||
src: "/tmp/{{ inventory_hostname }}.json"
|
src: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||||
dest: "./result/{{ inventory_hostname }}.json"
|
dest: "./result/{{ inventory_hostname }}-dump.json"
|
||||||
flat: yes
|
flat: yes
|
||||||
fail_on_missing: no
|
fail_on_missing: no
|
||||||
|
when: ansible_connection == "aws_ssm"
|
||||||
|
|
||||||
|
# xxx not sure how to make ansible 'synchronize' work with tsh
|
||||||
|
- name: Fetch file from remote hosts
|
||||||
|
shell: rsync -e 'tsh ssh' -azvP "developer@{{ inventory_hostname }}:/tmp/{{ inventory_hostname }}-dump.json" "./result/{{ inventory_hostname }}-dump.json"
|
||||||
|
delegate_to: localhost
|
||||||
|
when: ansible_connection != "aws_ssm"
|
||||||
|
|
||||||
|
- name: remove remote dumps
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "/tmp/{{ inventory_hostname }}-dump.json"
|
||||||
|
state: absent
|
||||||
|
|||||||
13
scripts/sk_collect_dumps/ssh.cfg
Normal file
13
scripts/sk_collect_dumps/ssh.cfg
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Begin generated Teleport configuration for teleport.aws.neon.tech by tsh
|
||||||
|
|
||||||
|
# Common flags for all teleport.aws.neon.tech hosts
|
||||||
|
Host *
|
||||||
|
HostKeyAlgorithms rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-rsa-cert-v01@openssh.com
|
||||||
|
|
||||||
|
# Flags for all teleport.aws.neon.tech hosts except the proxy
|
||||||
|
Host * !teleport.aws.neon.tech
|
||||||
|
Port 3022
|
||||||
|
ProxyCommand "/usr/local/bin/tsh" proxy ssh --cluster=teleport.aws.neon.tech --proxy=teleport.aws.neon.tech:443 %r@%h:%p
|
||||||
|
User developer
|
||||||
|
|
||||||
|
# End generated Teleport configuration
|
||||||
@@ -31,22 +31,22 @@ SELECT
|
|||||||
(data->>'tenant_id') AS tenant_id,
|
(data->>'tenant_id') AS tenant_id,
|
||||||
(data->>'timeline_id') AS timeline_id,
|
(data->>'timeline_id') AS timeline_id,
|
||||||
(data->'memory'->>'active')::bool AS active,
|
(data->'memory'->>'active')::bool AS active,
|
||||||
(data->'memory'->>'flush_lsn')::bigint AS flush_lsn,
|
(data->'memory'->>'flush_lsn')::pg_lsn AS flush_lsn,
|
||||||
(data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn,
|
(data->'memory'->'mem_state'->>'backup_lsn')::pg_lsn AS backup_lsn,
|
||||||
(data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn,
|
(data->'memory'->'mem_state'->>'commit_lsn')::pg_lsn AS commit_lsn,
|
||||||
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn,
|
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::pg_lsn AS peer_horizon_lsn,
|
||||||
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn,
|
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::pg_lsn AS remote_consistent_lsn,
|
||||||
(data->'memory'->>'write_lsn')::bigint AS write_lsn,
|
(data->'memory'->>'write_lsn')::pg_lsn AS write_lsn,
|
||||||
(data->'memory'->>'num_computes')::bigint AS num_computes,
|
(data->'memory'->>'num_computes')::bigint AS num_computes,
|
||||||
(data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn,
|
(data->'memory'->>'epoch_start_lsn')::pg_lsn AS epoch_start_lsn,
|
||||||
(data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
|
(data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
|
||||||
(data->'memory'->>'is_cancelled')::bool AS is_cancelled,
|
(data->'memory'->>'is_cancelled')::bool AS is_cancelled,
|
||||||
(data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn,
|
(data->'control_file'->>'backup_lsn')::pg_lsn AS disk_backup_lsn,
|
||||||
(data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn,
|
(data->'control_file'->>'commit_lsn')::pg_lsn AS disk_commit_lsn,
|
||||||
(data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
|
(data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
|
||||||
(data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn,
|
(data->'control_file'->>'local_start_lsn')::pg_lsn AS local_start_lsn,
|
||||||
(data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn,
|
(data->'control_file'->>'peer_horizon_lsn')::pg_lsn AS disk_peer_horizon_lsn,
|
||||||
(data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn,
|
(data->'control_file'->>'timeline_start_lsn')::pg_lsn AS timeline_start_lsn,
|
||||||
(data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn
|
(data->'control_file'->>'remote_consistent_lsn')::pg_lsn AS disk_remote_consistent_lsn
|
||||||
FROM tmp_json
|
FROM tmp_json
|
||||||
EOF
|
EOF
|
||||||
|
|||||||
Reference in New Issue
Block a user