Add script to collect state from safekeepers (#3835)

Add an ansible script to collect
https://github.com/neondatabase/neon/pull/3710 state JSON from all
safekeeper nodes and upload them to a postgres table.
This commit is contained in:
Arthur Petukhovsky
2023-03-28 17:04:02 +03:00
committed by GitHub
parent 9798737ec6
commit 7456e5b71c
4 changed files with 97 additions and 0 deletions

2
scripts/sk_collect_dumps/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
result
*.json

View File

@@ -0,0 +1,25 @@
# Collect /v1/debug_dump from all safekeeper nodes
1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
## How to use ansible (staging)
```
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
```
## How to use ansible (prod)
```
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
```

View File

@@ -0,0 +1,18 @@
- name: Fetch state dumps from safekeepers
hosts: safekeepers
gather_facts: False
remote_user: "{{ remote_user }}"
tasks:
- name: Download file
get_url:
url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
dest: "/tmp/{{ inventory_hostname }}.json"
- name: Fetch file from remote hosts
fetch:
src: "/tmp/{{ inventory_hostname }}.json"
dest: "./result/{{ inventory_hostname }}.json"
flat: yes
fail_on_missing: no

View File

@@ -0,0 +1,52 @@
#!/bin/bash
if [ -z "$DB_CONNSTR" ]; then
echo "DB_CONNSTR is not set"
exit 1
fi
# Create a temporary table for JSON data
psql $DB_CONNSTR -c 'DROP TABLE IF EXISTS tmp_json'
psql $DB_CONNSTR -c 'CREATE TABLE tmp_json (data jsonb)'
for file in ./result/*.json; do
echo "$file"
SK_ID=$(jq '.config.id' $file)
echo "SK_ID: $SK_ID"
jq -c ".timelines[] | . + {\"sk_id\": $SK_ID}" $file | psql $DB_CONNSTR -c "\\COPY tmp_json (data) FROM STDIN"
done
TABLE_NAME=$1
if [ -z "$TABLE_NAME" ]; then
echo "TABLE_NAME is not set, skipping conversion to table with typed columns"
echo "Usage: ./upload.sh TABLE_NAME"
exit 0
fi
psql $DB_CONNSTR <<EOF
CREATE TABLE $TABLE_NAME AS
SELECT
(data->>'sk_id')::bigint AS sk_id,
(data->>'tenant_id') AS tenant_id,
(data->>'timeline_id') AS timeline_id,
(data->'memory'->>'active')::bool AS active,
(data->'memory'->>'flush_lsn')::bigint AS flush_lsn,
(data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn,
(data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn,
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn,
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn,
(data->'memory'->>'write_lsn')::bigint AS write_lsn,
(data->'memory'->>'num_computes')::bigint AS num_computes,
(data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn,
(data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
(data->'memory'->>'is_cancelled')::bool AS is_cancelled,
(data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn,
(data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn,
(data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
(data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn,
(data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn,
(data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn,
(data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn
FROM tmp_json
EOF