Compare commits

...

1 Commits

Author SHA1 Message Date
Arseny Sher
910640c955 Update sk_collect_dumps to new inventories, auth and LSN formatting. 2023-12-19 17:16:01 +03:00
3 changed files with 28 additions and 36 deletions

View File

@@ -1,25 +1,15 @@
# Collect /v1/debug_dump from all safekeeper nodes
1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
3. Issue admin token (add/remove .stage from url for staging/prod and setting proper API key):
```
AUTH_TOKEN=$(curl https://console.stage.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_STAGING_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
# check
echo $AUTH_TOKEN
```
2. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
```
# in aws repo, cd .github/ansible and run e.g. (ajusting profile and region in vars and limit):
AWS_DEFAULT_PROFILE=dev ansible-playbook -i inventory_aws_ec2.yaml -i staging.us-east-2.vars.yaml -e @ssm_config -l 'safekeeper:&us_east_2' -e "auth_token=${AUTH_TOKEN}" --check ~/neon/neon/scripts/sk_collect_dumps/remote.yaml
```
It will put the results to .results directory *near the playbook*.
2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
## How to use ansible (staging)
```
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
```
## How to use ansible (prod)
```
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
```

View File

@@ -1,5 +1,5 @@
- name: Fetch state dumps from safekeepers
hosts: safekeepers
hosts: safekeeper
gather_facts: False
remote_user: "{{ remote_user }}"
@@ -8,6 +8,8 @@
get_url:
url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
dest: "/tmp/{{ inventory_hostname }}.json"
headers:
Authorization: "Bearer {{ auth_token }}"
- name: Fetch file from remote hosts
fetch:

View File

@@ -31,22 +31,22 @@ SELECT
(data->>'tenant_id') AS tenant_id,
(data->>'timeline_id') AS timeline_id,
(data->'memory'->>'active')::bool AS active,
(data->'memory'->>'flush_lsn')::bigint AS flush_lsn,
(data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn,
(data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn,
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn,
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn,
(data->'memory'->>'write_lsn')::bigint AS write_lsn,
(data->'memory'->>'flush_lsn')::pg_lsn AS flush_lsn,
(data->'memory'->'mem_state'->>'backup_lsn')::pg_lsn AS backup_lsn,
(data->'memory'->'mem_state'->>'commit_lsn')::pg_lsn AS commit_lsn,
(data->'memory'->'mem_state'->>'peer_horizon_lsn')::pg_lsn AS peer_horizon_lsn,
(data->'memory'->'mem_state'->>'remote_consistent_lsn')::pg_lsn AS remote_consistent_lsn,
(data->'memory'->>'write_lsn')::pg_lsn AS write_lsn,
(data->'memory'->>'num_computes')::bigint AS num_computes,
(data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn,
(data->'memory'->>'epoch_start_lsn')::pg_lsn AS epoch_start_lsn,
(data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
(data->'memory'->>'is_cancelled')::bool AS is_cancelled,
(data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn,
(data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn,
(data->'control_file'->>'backup_lsn')::pg_lsn AS disk_backup_lsn,
(data->'control_file'->>'commit_lsn')::pg_lsn AS disk_commit_lsn,
(data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
(data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn,
(data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn,
(data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn,
(data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn
(data->'control_file'->>'local_start_lsn')::pg_lsn AS local_start_lsn,
(data->'control_file'->>'peer_horizon_lsn')::pg_lsn AS disk_peer_horizon_lsn,
(data->'control_file'->>'timeline_start_lsn')::pg_lsn AS timeline_start_lsn,
(data->'control_file'->>'remote_consistent_lsn')::pg_lsn AS disk_remote_consistent_lsn
FROM tmp_json
EOF