diff --git a/scripts/sk_collect_dumps/.gitignore b/scripts/sk_collect_dumps/.gitignore new file mode 100644 index 0000000000..d9d4d0296a --- /dev/null +++ b/scripts/sk_collect_dumps/.gitignore @@ -0,0 +1,2 @@ +result +*.json diff --git a/scripts/sk_collect_dumps/readme.md b/scripts/sk_collect_dumps/readme.md new file mode 100644 index 0000000000..52b73e9495 --- /dev/null +++ b/scripts/sk_collect_dumps/readme.md @@ -0,0 +1,25 @@ +# Collect /v1/debug_dump from all safekeeper nodes + +1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory. +2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database. + +## How to use ansible (staging) + +``` +AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml + +AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml +``` + +## How to use ansible (prod) + +``` +AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml + +AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml + +AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml + +AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml +``` + diff --git a/scripts/sk_collect_dumps/remote.yaml b/scripts/sk_collect_dumps/remote.yaml new file mode 100644 index 0000000000..29ce83efde --- /dev/null +++ b/scripts/sk_collect_dumps/remote.yaml @@ -0,0 +1,18 @@ +- name: Fetch state dumps from safekeepers + hosts: safekeepers + gather_facts: False + remote_user: "{{ remote_user }}" + + tasks: + - name: Download file + get_url: + url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false" + dest: "/tmp/{{ inventory_hostname }}.json" + + - name: Fetch file from remote hosts + fetch: + src: "/tmp/{{ inventory_hostname }}.json" + dest: "./result/{{ inventory_hostname }}.json" + flat: yes + fail_on_missing: no + diff --git a/scripts/sk_collect_dumps/upload.sh b/scripts/sk_collect_dumps/upload.sh new file mode 100755 index 0000000000..2e54ecba1c --- /dev/null +++ b/scripts/sk_collect_dumps/upload.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +if [ -z "$DB_CONNSTR" ]; then + echo "DB_CONNSTR is not set" + exit 1 +fi + +# Create a temporary table for JSON data +psql $DB_CONNSTR -c 'DROP TABLE IF EXISTS tmp_json' +psql $DB_CONNSTR -c 'CREATE TABLE tmp_json (data jsonb)' + +for file in ./result/*.json; do + echo "$file" + SK_ID=$(jq '.config.id' $file) + echo "SK_ID: $SK_ID" + jq -c ".timelines[] | . + {\"sk_id\": $SK_ID}" $file | psql $DB_CONNSTR -c "\\COPY tmp_json (data) FROM STDIN" +done + +TABLE_NAME=$1 + +if [ -z "$TABLE_NAME" ]; then + echo "TABLE_NAME is not set, skipping conversion to table with typed columns" + echo "Usage: ./upload.sh TABLE_NAME" + exit 0 +fi + +psql $DB_CONNSTR <>'sk_id')::bigint AS sk_id, + (data->>'tenant_id') AS tenant_id, + (data->>'timeline_id') AS timeline_id, + (data->'memory'->>'active')::bool AS active, + (data->'memory'->>'flush_lsn')::bigint AS flush_lsn, + (data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn, + (data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn, + (data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn, + (data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn, + (data->'memory'->>'write_lsn')::bigint AS write_lsn, + (data->'memory'->>'num_computes')::bigint AS num_computes, + (data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn, + (data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno, + (data->'memory'->>'is_cancelled')::bool AS is_cancelled, + (data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn, + (data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn, + (data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term, + (data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn, + (data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn, + (data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn, + (data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn +FROM tmp_json +EOF