mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-18 19:02:56 +00:00
Compare commits
12 Commits
file_page_
...
d/hack
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
15425325c4 | ||
|
|
f602e21aaf | ||
|
|
a9c2124267 | ||
|
|
4539f5597e | ||
|
|
1bac60ba31 | ||
|
|
8832cc65fe | ||
|
|
cabbc33cbf | ||
|
|
13646d1f99 | ||
|
|
a0091038e3 | ||
|
|
3093d5ff94 | ||
|
|
9d903b6e8b | ||
|
|
3a3efe5a96 |
2
.github/PULL_REQUEST_TEMPLATE/release-pr.md
vendored
2
.github/PULL_REQUEST_TEMPLATE/release-pr.md
vendored
@@ -10,7 +10,7 @@
|
||||
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
|
||||
|
||||
### Checklist after release
|
||||
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
|
||||
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/120/files))
|
||||
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
|
||||
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
|
||||
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
|
||||
|
||||
4
.github/actions/allure-report/action.yml
vendored
4
.github/actions/allure-report/action.yml
vendored
@@ -47,7 +47,7 @@ runs:
|
||||
else
|
||||
key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
|
||||
fi
|
||||
echo "KEY=${key}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=KEY::${key}"
|
||||
|
||||
- uses: actions/setup-java@v3
|
||||
if: ${{ inputs.action == 'generate' }}
|
||||
@@ -186,7 +186,7 @@ runs:
|
||||
aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
|
||||
|
||||
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
|
||||
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=report-url::${REPORT_URL}"
|
||||
|
||||
- name: Release Allure lock
|
||||
if: ${{ inputs.action == 'generate' && always() }}
|
||||
|
||||
4
.github/actions/download/action.yml
vendored
4
.github/actions/download/action.yml
vendored
@@ -34,7 +34,7 @@ runs:
|
||||
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
|
||||
if [ -z "${S3_KEY}" ]; then
|
||||
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
|
||||
echo 'SKIPPED=true' >> $GITHUB_OUTPUT
|
||||
echo '::set-output name=SKIPPED::true'
|
||||
exit 0
|
||||
else
|
||||
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
|
||||
@@ -42,7 +42,7 @@ runs:
|
||||
fi
|
||||
fi
|
||||
|
||||
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
|
||||
echo '::set-output name=SKIPPED::false'
|
||||
|
||||
mkdir -p $(dirname $ARCHIVE)
|
||||
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
|
||||
|
||||
@@ -41,8 +41,8 @@ runs:
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=api_host::${API_HOST}"
|
||||
echo "::set-output name=region_id::${REGION_ID}"
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
REGION_ID: ${{ inputs.region_id }}
|
||||
@@ -72,10 +72,10 @@ runs:
|
||||
|
||||
dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
|
||||
echo "::add-mask::${dsn}"
|
||||
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=dsn::${dsn}"
|
||||
|
||||
project_id=$(echo $project | jq --raw-output '.id')
|
||||
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=project_id::${project_id}"
|
||||
env:
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
|
||||
@@ -32,7 +32,7 @@ runs:
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=api_host::${API_HOST}"
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
|
||||
|
||||
18
.github/actions/run-python-test-set/action.yml
vendored
18
.github/actions/run-python-test-set/action.yml
vendored
@@ -73,13 +73,6 @@ runs:
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Download compatibility snapshot for Postgres 14
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
|
||||
path: /tmp/compatibility_snapshot_pg14
|
||||
prefix: latest
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
NEON_BIN: /tmp/neon/bin
|
||||
@@ -87,8 +80,6 @@ runs:
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
|
||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
|
||||
ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# PLATFORM will be embedded in the perf test report
|
||||
@@ -163,15 +154,6 @@ runs:
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
fi
|
||||
|
||||
- name: Upload compatibility snapshot for Postgres 14
|
||||
if: github.ref_name == 'release'
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
|
||||
# The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
|
||||
path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Allure report
|
||||
if: always()
|
||||
uses: ./.github/actions/allure-report
|
||||
|
||||
3
.github/ansible/.gitignore
vendored
3
.github/ansible/.gitignore
vendored
@@ -2,6 +2,3 @@ zenith_install.tar.gz
|
||||
.zenith_current_version
|
||||
neon_install.tar.gz
|
||||
.neon_current_version
|
||||
|
||||
collections/*
|
||||
!collections/.keep
|
||||
|
||||
1
.github/ansible/ansible.cfg
vendored
1
.github/ansible/ansible.cfg
vendored
@@ -3,7 +3,6 @@
|
||||
localhost_warning = False
|
||||
host_key_checking = False
|
||||
timeout = 30
|
||||
collections_paths = ./collections
|
||||
|
||||
[ssh_connection]
|
||||
ssh_args = -F ./ansible.ssh.cfg
|
||||
|
||||
0
.github/ansible/collections/.keep
vendored
0
.github/ansible/collections/.keep
vendored
41
.github/ansible/deploy.yaml
vendored
41
.github/ansible/deploy.yaml
vendored
@@ -1,7 +1,7 @@
|
||||
- name: Upload Neon binaries
|
||||
hosts: storage
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
@@ -14,8 +14,7 @@
|
||||
- safekeeper
|
||||
|
||||
- name: inform about versions
|
||||
debug:
|
||||
msg: "Version to deploy - {{ current_version }}"
|
||||
debug: msg="Version to deploy - {{ current_version }}"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
@@ -36,7 +35,7 @@
|
||||
- name: Deploy pageserver
|
||||
hosts: pageservers
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
@@ -64,29 +63,15 @@
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: read the existing remote pageserver config
|
||||
ansible.builtin.slurp:
|
||||
src: /storage/pageserver/data/pageserver.toml
|
||||
register: _remote_ps_config
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: parse the existing pageserver configuration
|
||||
ansible.builtin.set_fact:
|
||||
_existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: construct the final pageserver configuration dict
|
||||
ansible.builtin.set_fact:
|
||||
pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: template the pageserver config
|
||||
template:
|
||||
src: templates/pageserver.toml.j2
|
||||
dest: /storage/pageserver/data/pageserver.toml
|
||||
- name: update remote storage (s3) config
|
||||
lineinfile:
|
||||
path: /storage/pageserver/data/pageserver.toml
|
||||
line: "{{ item }}"
|
||||
loop:
|
||||
- "[remote_storage]"
|
||||
- "bucket_name = '{{ bucket_name }}'"
|
||||
- "bucket_region = '{{ bucket_region }}'"
|
||||
- "prefix_in_bucket = '{{ inventory_hostname }}'"
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
@@ -124,7 +109,7 @@
|
||||
- name: Deploy safekeeper
|
||||
hosts: safekeepers
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
|
||||
1
.github/ansible/get_binaries.sh
vendored
1
.github/ansible/get_binaries.sh
vendored
@@ -23,7 +23,6 @@ docker cp ${ID}:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C neon_install
|
||||
mkdir neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
|
||||
|
||||
20
.github/ansible/neon-stress.hosts
vendored
Normal file
20
.github/ansible/neon-stress.hosts
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
[pageservers]
|
||||
neon-stress-ps-1 console_region_id=1
|
||||
neon-stress-ps-2 console_region_id=1
|
||||
|
||||
[safekeepers]
|
||||
neon-stress-sk-1 console_region_id=1
|
||||
neon-stress-sk-2 console_region_id=1
|
||||
neon-stress-sk-3 console_region_id=1
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
env_name = neon-stress
|
||||
console_mgmt_base_url = http://neon-stress-console.local
|
||||
bucket_name = neon-storage-ireland
|
||||
bucket_region = eu-west-1
|
||||
etcd_endpoints = etcd-stress.local:2379
|
||||
safekeeper_enable_s3_offload = false
|
||||
31
.github/ansible/neon-stress.hosts.yaml
vendored
31
.github/ansible/neon-stress.hosts.yaml
vendored
@@ -1,31 +0,0 @@
|
||||
storage:
|
||||
vars:
|
||||
bucket_name: neon-storage-ireland
|
||||
bucket_region: eu-west-1
|
||||
console_mgmt_base_url: http://neon-stress-console.local
|
||||
etcd_endpoints: neon-stress-etcd.local:2379
|
||||
safekeeper_enable_s3_offload: 'false'
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: neon-stress/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
neon-stress-ps-1:
|
||||
console_region_id: aws-eu-west-1
|
||||
neon-stress-ps-2:
|
||||
console_region_id: aws-eu-west-1
|
||||
safekeepers:
|
||||
hosts:
|
||||
neon-stress-sk-1:
|
||||
console_region_id: aws-eu-west-1
|
||||
neon-stress-sk-2:
|
||||
console_region_id: aws-eu-west-1
|
||||
neon-stress-sk-3:
|
||||
console_region_id: aws-eu-west-1
|
||||
20
.github/ansible/production.hosts
vendored
Normal file
20
.github/ansible/production.hosts
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
[pageservers]
|
||||
#zenith-1-ps-1 console_region_id=1
|
||||
zenith-1-ps-2 console_region_id=1
|
||||
zenith-1-ps-3 console_region_id=1
|
||||
|
||||
[safekeepers]
|
||||
zenith-1-sk-1 console_region_id=1
|
||||
zenith-1-sk-2 console_region_id=1
|
||||
zenith-1-sk-3 console_region_id=1
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
env_name = prod-1
|
||||
console_mgmt_base_url = http://console-release.local
|
||||
bucket_name = zenith-storage-oregon
|
||||
bucket_region = us-west-2
|
||||
etcd_endpoints = zenith-1-etcd.local:2379
|
||||
33
.github/ansible/production.hosts.yaml
vendored
33
.github/ansible/production.hosts.yaml
vendored
@@ -1,33 +0,0 @@
|
||||
---
|
||||
storage:
|
||||
vars:
|
||||
console_mgmt_base_url: http://console-release.local
|
||||
bucket_name: zenith-storage-oregon
|
||||
bucket_region: us-west-2
|
||||
etcd_endpoints: zenith-1-etcd.local:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: prod-1/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
zenith-1-ps-2:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-ps-3:
|
||||
console_region_id: aws-us-west-2
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
zenith-1-sk-1:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-sk-2:
|
||||
console_region_id: aws-us-west-2
|
||||
zenith-1-sk-3:
|
||||
console_region_id: aws-us-west-2
|
||||
9
.github/ansible/scripts/init_pageserver.sh
vendored
9
.github/ansible/scripts/init_pageserver.sh
vendored
@@ -12,19 +12,18 @@ cat <<EOF | tee /tmp/payload
|
||||
"version": 1,
|
||||
"host": "${HOST}",
|
||||
"port": 6400,
|
||||
"region_id": "{{ console_region_id }}",
|
||||
"region_id": {{ console_region_id }},
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"http_host": "${HOST}",
|
||||
"http_port": 9898,
|
||||
"active": false
|
||||
"http_port": 9898
|
||||
}
|
||||
EOF
|
||||
|
||||
# check if pageserver already registered or not
|
||||
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
|
||||
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/pageservers/${INSTANCE_ID} -o /dev/null; then
|
||||
|
||||
# not registered, so register it now
|
||||
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
|
||||
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/pageservers -d@/tmp/payload | jq -r '.ID')
|
||||
|
||||
# init pageserver
|
||||
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
|
||||
|
||||
10
.github/ansible/scripts/init_safekeeper.sh
vendored
10
.github/ansible/scripts/init_safekeeper.sh
vendored
@@ -14,18 +14,18 @@ cat <<EOF | tee /tmp/payload
|
||||
"host": "${HOST}",
|
||||
"port": 6500,
|
||||
"http_port": 7676,
|
||||
"region_id": "{{ console_region_id }}",
|
||||
"region_id": {{ console_region_id }},
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"availability_zone_id": "${AZ_ID}",
|
||||
"active": false
|
||||
"availability_zone_id": "${AZ_ID}"
|
||||
}
|
||||
EOF
|
||||
|
||||
# check if safekeeper already registered or not
|
||||
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
|
||||
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
|
||||
|
||||
# not registered, so register it now
|
||||
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
|
||||
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
|
||||
|
||||
# init safekeeper
|
||||
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
|
||||
fi
|
||||
|
||||
3
.github/ansible/ssm_config
vendored
3
.github/ansible/ssm_config
vendored
@@ -1,3 +0,0 @@
|
||||
ansible_connection: aws_ssm
|
||||
ansible_aws_ssm_bucket_name: neon-dev-bucket
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
25
.github/ansible/staging.hosts
vendored
Normal file
25
.github/ansible/staging.hosts
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
[pageservers]
|
||||
#zenith-us-stage-ps-1 console_region_id=27
|
||||
zenith-us-stage-ps-2 console_region_id=27
|
||||
zenith-us-stage-ps-3 console_region_id=27
|
||||
zenith-us-stage-ps-4 console_region_id=27
|
||||
zenith-us-stage-test-ps-1 console_region_id=28
|
||||
|
||||
[safekeepers]
|
||||
zenith-us-stage-sk-4 console_region_id=27
|
||||
zenith-us-stage-sk-5 console_region_id=27
|
||||
zenith-us-stage-sk-6 console_region_id=27
|
||||
zenith-us-stage-test-sk-1 console_region_id=28
|
||||
zenith-us-stage-test-sk-2 console_region_id=28
|
||||
zenith-us-stage-test-sk-3 console_region_id=28
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
env_name = us-stage
|
||||
console_mgmt_base_url = http://console-staging.local
|
||||
bucket_name = zenith-staging-storage-us-east-1
|
||||
bucket_region = us-east-1
|
||||
etcd_endpoints = zenith-us-stage-etcd.local:2379
|
||||
34
.github/ansible/staging.hosts.yaml
vendored
34
.github/ansible/staging.hosts.yaml
vendored
@@ -1,34 +0,0 @@
|
||||
storage:
|
||||
vars:
|
||||
bucket_name: zenith-staging-storage-us-east-1
|
||||
bucket_region: us-east-1
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
etcd_endpoints: zenith-us-stage-etcd.local:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: us-stage/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
zenith-us-stage-ps-2:
|
||||
console_region_id: aws-us-east-1
|
||||
zenith-us-stage-ps-3:
|
||||
console_region_id: aws-us-east-1
|
||||
zenith-us-stage-ps-4:
|
||||
console_region_id: aws-us-east-1
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
zenith-us-stage-sk-4:
|
||||
console_region_id: aws-us-east-1
|
||||
zenith-us-stage-sk-5:
|
||||
console_region_id: aws-us-east-1
|
||||
zenith-us-stage-sk-6:
|
||||
console_region_id: aws-us-east-1
|
||||
32
.github/ansible/staging.us-east-2.hosts.yaml
vendored
32
.github/ansible/staging.us-east-2.hosts.yaml
vendored
@@ -1,32 +0,0 @@
|
||||
storage:
|
||||
vars:
|
||||
bucket_name: neon-staging-storage-us-east-2
|
||||
bucket_region: us-east-2
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "pageserver/v1"
|
||||
safekeeper_s3_prefix: safekeeper/v1/wal
|
||||
hostname_suffix: ""
|
||||
remote_user: ssm-user
|
||||
ansible_aws_ssm_region: us-east-2
|
||||
console_region_id: aws-us-east-2
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
pageserver-0.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0c3e70929edb5d691
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
safekeeper-0.us-east-2.aws.neon.build:
|
||||
ansible_host: i-027662bd552bf5db0
|
||||
safekeeper-1.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0171efc3604a7b907
|
||||
safekeeper-2.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0de0b03a51676a6ce
|
||||
2
.github/ansible/systemd/pageserver.service
vendored
2
.github/ansible/systemd/pageserver.service
vendored
@@ -1,5 +1,5 @@
|
||||
[Unit]
|
||||
Description=Neon pageserver
|
||||
Description=Zenith pageserver
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
|
||||
4
.github/ansible/systemd/safekeeper.service
vendored
4
.github/ansible/systemd/safekeeper.service
vendored
@@ -1,12 +1,12 @@
|
||||
[Unit]
|
||||
Description=Neon safekeeper
|
||||
Description=Zenith safekeeper
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
|
||||
1
.github/ansible/templates/pageserver.toml.j2
vendored
1
.github/ansible/templates/pageserver.toml.j2
vendored
@@ -1 +0,0 @@
|
||||
{{ pageserver_config | sivel.toiletwater.to_toml }}
|
||||
@@ -1,31 +0,0 @@
|
||||
# Helm chart values for neon-proxy-scram.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-staging.local/management/api/v2"
|
||||
domain: "*.us-east-2.aws.neon.build"
|
||||
|
||||
# -- Additional labels for neon-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: dev
|
||||
zenith_region: us-east-2
|
||||
zenith_region_slug: us-east-2
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
|
||||
|
||||
#metrics:
|
||||
# enabled: true
|
||||
# serviceMonitor:
|
||||
# enabled: true
|
||||
# selector:
|
||||
# release: kube-prometheus-stack
|
||||
42
.github/workflows/benchmarking.yml
vendored
42
.github/workflows/benchmarking.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
runs-on: [self-hosted, zenith-benchmarker]
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /usr/pgsql
|
||||
POSTGRES_DISTRIB_DIR: /tmp/pg_install
|
||||
DEFAULT_PG_VERSION: 14
|
||||
|
||||
steps:
|
||||
@@ -138,31 +138,22 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-compare:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10gb"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# neon-captest-new: Run pgbench in a freshly created project
|
||||
# neon-captest-reuse: Same, but reusing existing project
|
||||
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
|
||||
db_size: [ 10gb ]
|
||||
include:
|
||||
- platform: neon-captest-new
|
||||
db_size: 50gb
|
||||
- platform: neon-captest-prefetch
|
||||
db_size: 50gb
|
||||
- platform: rds-aurora
|
||||
db_size: 50gb
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-aurora ]
|
||||
|
||||
runs-on: dev
|
||||
container:
|
||||
@@ -187,7 +178,7 @@ jobs:
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
|
||||
if: matrix.platform != 'neon-captest-reuse'
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
@@ -213,9 +204,11 @@ jobs:
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=connstr::${CONNSTR}"
|
||||
|
||||
psql ${CONNSTR} -c "SELECT version();"
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
@@ -234,6 +227,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -247,6 +241,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -260,6 +255,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -272,7 +268,7 @@ jobs:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
|
||||
if: ${{ matrix.platform != 'neon-captest-reuse' && always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
environment: dev
|
||||
|
||||
170
.github/workflows/build_and_test.yml
vendored
170
.github/workflows/build_and_test.yml
vendored
@@ -35,12 +35,12 @@ jobs:
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
echo "::set-output name=tag::$GITHUB_RUN_ID"
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
@@ -78,12 +78,12 @@ jobs:
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
@@ -127,8 +127,8 @@ jobs:
|
||||
target/
|
||||
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
||||
key: |
|
||||
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-
|
||||
v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
@@ -389,7 +389,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git/
|
||||
target/
|
||||
key: v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
key: v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
|
||||
|
||||
- name: Get Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
@@ -481,7 +481,6 @@ jobs:
|
||||
|
||||
neon-image:
|
||||
runs-on: dev
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
steps:
|
||||
@@ -495,11 +494,10 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build neon
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
|
||||
|
||||
compute-tools-image:
|
||||
runs-on: dev
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
steps:
|
||||
@@ -510,12 +508,11 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute tools
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
|
||||
|
||||
compute-node-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -530,12 +527,11 @@ jobs:
|
||||
# cloud repo depends on this image name, thus duplicating it
|
||||
# remove compute-node when cloud repo is updated
|
||||
- name: Kaniko build compute node with extensions v14 (compatibility)
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
|
||||
|
||||
compute-node-image-v14:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -547,13 +543,12 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute node with extensions v14
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID
|
||||
|
||||
|
||||
compute-node-image-v15:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -565,11 +560,11 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute node with extensions v15
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID
|
||||
|
||||
promote-images:
|
||||
runs-on: dev
|
||||
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
|
||||
needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-tools-image ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
@@ -578,13 +573,12 @@ jobs:
|
||||
# compute-node uses postgres 14, which is default now
|
||||
# cloud repo depends on this image name, thus duplicating it
|
||||
# remove compute-node when cloud repo is updated
|
||||
name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]
|
||||
name: [ neon, compute-node, compute-node-v14, compute-tools ]
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
run: |
|
||||
export MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=${{needs.tag.outputs.build-tag}} --query 'images[].imageManifest' --output text)
|
||||
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
run:
|
||||
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
push-docker-hub:
|
||||
runs-on: dev
|
||||
@@ -603,19 +597,16 @@ jobs:
|
||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||
|
||||
- name: Pull neon image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
|
||||
|
||||
- name: Pull compute tools image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
|
||||
|
||||
- name: Pull compute node image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
|
||||
|
||||
- name: Pull compute node v14 image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
|
||||
|
||||
- name: Pull compute node v15 image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14
|
||||
|
||||
- name: Pull rust image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
|
||||
@@ -625,11 +616,9 @@ jobs:
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
|
||||
|
||||
- name: Configure Docker Hub login
|
||||
run: |
|
||||
@@ -649,9 +638,6 @@ jobs:
|
||||
- name: Push compute node v14 image to Docker Hub
|
||||
run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute node v15 image to Docker Hub
|
||||
run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push rust image to Docker Hub
|
||||
run: crane push rust neondatabase/rust:pinned
|
||||
|
||||
@@ -664,7 +650,6 @@ jobs:
|
||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
calculate-deploy-targets:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
@@ -677,12 +662,12 @@ jobs:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
|
||||
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
|
||||
echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
|
||||
echo "::set-output name=include::[$PRODUCTION]"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
@@ -718,7 +703,7 @@ jobs:
|
||||
- name: Setup ansible
|
||||
run: |
|
||||
export PATH="/root/.local/bin:$PATH"
|
||||
pip install --progress-bar off --user ansible boto3 toml
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
@@ -740,48 +725,8 @@ jobs:
|
||||
chmod 0600 ssh-key
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-new:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
./get_binaries.sh
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
RELEASE=true ./get_binaries.sh
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
@@ -823,54 +768,5 @@ jobs:
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-new:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner
|
||||
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
promote-compatibility-test-snapshot:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ deploy, deploy-proxy ]
|
||||
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Promote compatibility snapshot for the release
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
PREFIX: artifacts/latest
|
||||
run: |
|
||||
for build_type in debug release; do
|
||||
OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
|
||||
NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
|
||||
|
||||
time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
|
||||
done
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
8
.github/workflows/codestyle.yml
vendored
8
.github/workflows/codestyle.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
@@ -56,12 +56,12 @@ jobs:
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
@@ -106,7 +106,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
|
||||
key: v4-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
979
Cargo.lock
generated
979
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@@ -1,14 +1,3 @@
|
||||
# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
|
||||
# build work with older cargo versions.
|
||||
#
|
||||
# We have this because as of this writing, the latest cargo Debian package
|
||||
# that's available is 1.56. (Confusingly, the Debian package version number
|
||||
# is 0.57, whereas 'cargo --version' says 1.56.)
|
||||
#
|
||||
# See https://tracker.debian.org/pkg/cargo for the current status of the
|
||||
# package. When that gets updated, we can remove this.
|
||||
cargo-features = ["named-profiles"]
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"compute_tools",
|
||||
|
||||
11
Dockerfile
11
Dockerfile
@@ -44,7 +44,7 @@ COPY . .
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
||||
RUN set -e \
|
||||
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
|
||||
&& mold -run cargo build --bin pageserver --bin safekeeper --bin proxy --locked --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Build final image
|
||||
@@ -63,11 +63,9 @@ RUN set -e \
|
||||
&& useradd -d /data neon \
|
||||
&& chown -R neon:neon /data
|
||||
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
|
||||
@@ -87,3 +85,4 @@ VOLUME ["/data"]
|
||||
USER neon
|
||||
EXPOSE 6400
|
||||
EXPOSE 9898
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
@@ -1,26 +1,24 @@
|
||||
#
|
||||
# This file is identical to the Dockerfile.compute-node-v15 file
|
||||
# except for the version of Postgres that is built.
|
||||
#
|
||||
|
||||
ARG TAG=pinned
|
||||
# apparently, ARGs don't get replaced in RUN commands in kaniko
|
||||
# ARG POSTGIS_VERSION=3.3.0
|
||||
# ARG PLV8_VERSION=3.1.4
|
||||
# ARG PG_VERSION=v14
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "build-deps"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim AS build-deps
|
||||
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update
|
||||
RUN apt update && \
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build"
|
||||
# Build Postgres from the neon postgres repository.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-build
|
||||
COPY vendor/postgres-v14 postgres
|
||||
RUN cd postgres && \
|
||||
@@ -31,20 +29,22 @@ RUN cd postgres && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "postgis-build"
|
||||
# Build PostGIS from the upstream PostGIS mirror.
|
||||
#
|
||||
#########################################################################################
|
||||
# PostGIS compiles against neon postgres sources without changes. Perhaps we
|
||||
# could even use the upstream binaries, compiled against vanilla Postgres, but
|
||||
# it would require some investigation to check that it works, and also keeps
|
||||
# working in the future. So for now, we compile our own binaries.
|
||||
FROM build-deps AS postgis-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
tar xvzf postgis-3.3.1.tar.gz && \
|
||||
cd postgis-3.3.1 && \
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
|
||||
tar xvzf postgis-3.3.0.tar.gz && \
|
||||
cd postgis-3.3.0 && \
|
||||
./autogen.sh && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
./configure && \
|
||||
@@ -57,55 +57,39 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "plv8-build"
|
||||
# Build plv8
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
|
||||
|
||||
# https://github.com/plv8/plv8/issues/475:
|
||||
# v8 uses gold for linking and sets `--thread-count=4` which breaks
|
||||
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
|
||||
# Install newer gold version manually as debian-testing binutils version updates
|
||||
# libc version, which in turn breaks other extension built against non-testing libc.
|
||||
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
|
||||
tar xvzf binutils-2.38.tar.gz && \
|
||||
cd binutils-2.38 && \
|
||||
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
cd ../bfd && ./configure && make bfdver.h && \
|
||||
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
|
||||
cp /usr/local/bin/ld.gold /usr/bin/gold
|
||||
# https://github.com/plv8/plv8/issues/475
|
||||
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing binutils
|
||||
|
||||
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
|
||||
tar xvzf v3.1.4.tar.gz && \
|
||||
cd plv8-3.1.4 && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
|
||||
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
rm -rf /plv8-* && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "h3-pg-build"
|
||||
# Build h3_pg
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# packaged cmake is too old
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
|
||||
-q -O /tmp/cmake-install.sh \
|
||||
&& chmod u+x /tmp/cmake-install.sh \
|
||||
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
|
||||
&& rm /tmp/cmake-install.sh
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing cmake
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
|
||||
tar xvzf h3.tgz && \
|
||||
@@ -124,18 +108,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
# compile neon extensions
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS neon-pg-ext-build
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# plv8 still sometimes crashes during the creation
|
||||
# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /h3/usr /
|
||||
COPY pgxn/ pgxn/
|
||||
@@ -145,22 +127,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Clean up postgres folder before inclusion
|
||||
#
|
||||
#########################################################################################
|
||||
FROM neon-pg-ext-build AS postgres-cleanup-layer
|
||||
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
|
||||
|
||||
@@ -178,12 +154,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
# Put it all together into the final image
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim
|
||||
# Add user postgres
|
||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
@@ -200,6 +174,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
# libreadline8 for psql
|
||||
# libossp-uuid16 for extension ossp-uuid
|
||||
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
|
||||
# GLIBC 2.34 for plv8.
|
||||
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
|
||||
#
|
||||
# Lastly, link compute_ctl into zenith_ctl while we're at it,
|
||||
# so that we don't need to put this in another layer.
|
||||
@@ -212,6 +188,12 @@ RUN apt update && \
|
||||
libproj19 \
|
||||
libprotobuf-c1 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
echo "Installing GLIBC 2.34" && \
|
||||
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends -t testing libc6 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
USER postgres
|
||||
|
||||
@@ -4,23 +4,23 @@
|
||||
#
|
||||
|
||||
ARG TAG=pinned
|
||||
# apparently, ARGs don't get replaced in RUN commands in kaniko
|
||||
# ARG POSTGIS_VERSION=3.3.0
|
||||
# ARG PLV8_VERSION=3.1.4
|
||||
# ARG PG_VERSION=v15
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "build-deps"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim AS build-deps
|
||||
RUN apt update && \
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libcurl4-openssl-dev libossp-uuid-dev
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build"
|
||||
# Build Postgres from the neon postgres repository.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-build
|
||||
COPY vendor/postgres-v15 postgres
|
||||
RUN cd postgres && \
|
||||
@@ -31,20 +31,22 @@ RUN cd postgres && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "postgis-build"
|
||||
# Build PostGIS from the upstream PostGIS mirror.
|
||||
#
|
||||
#########################################################################################
|
||||
# PostGIS compiles against neon postgres sources without changes. Perhaps we
|
||||
# could even use the upstream binaries, compiled against vanilla Postgres, but
|
||||
# it would require some investigation to check that it works, and also keeps
|
||||
# working in the future. So for now, we compile our own binaries.
|
||||
FROM build-deps AS postgis-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
|
||||
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
tar xvzf postgis-3.3.1.tar.gz && \
|
||||
cd postgis-3.3.1 && \
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
|
||||
tar xvzf postgis-3.3.0.tar.gz && \
|
||||
cd postgis-3.3.0 && \
|
||||
./autogen.sh && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
./configure && \
|
||||
@@ -57,87 +59,37 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "plv8-build"
|
||||
# Build plv8
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
|
||||
apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev
|
||||
|
||||
# https://github.com/plv8/plv8/issues/475:
|
||||
# v8 uses gold for linking and sets `--thread-count=4` which breaks
|
||||
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
|
||||
# Install newer gold version manually as debian-testing binutils version updates
|
||||
# libc version, which in turn breaks other extension built against non-testing libc.
|
||||
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
|
||||
tar xvzf binutils-2.38.tar.gz && \
|
||||
cd binutils-2.38 && \
|
||||
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
cd ../bfd && ./configure && make bfdver.h && \
|
||||
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
|
||||
cp /usr/local/bin/ld.gold /usr/bin/gold
|
||||
# https://github.com/plv8/plv8/issues/475
|
||||
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
|
||||
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends -t testing binutils
|
||||
|
||||
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
|
||||
tar xvzf v3.1.4.tar.gz && \
|
||||
cd plv8-3.1.4 && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
|
||||
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
rm -rf /plv8-* && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "h3-pg-build"
|
||||
# Build h3_pg
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# packaged cmake is too old
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
|
||||
-q -O /tmp/cmake-install.sh \
|
||||
&& chmod u+x /tmp/cmake-install.sh \
|
||||
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
|
||||
&& rm /tmp/cmake-install.sh
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
|
||||
tar xvzf h3.tgz && \
|
||||
cd h3-4.0.1 && \
|
||||
mkdir build && \
|
||||
cd build && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/h3 make install && \
|
||||
cp -R /h3/usr / && \
|
||||
rm -rf build
|
||||
|
||||
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
|
||||
tar xvzf h3-pg.tgz && \
|
||||
cd h3-pg-4.0.1 && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
# compile neon extensions
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS neon-pg-ext-build
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /h3/usr /
|
||||
COPY pgxn/ pgxn/
|
||||
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
@@ -145,22 +97,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Clean up postgres folder before inclusion
|
||||
#
|
||||
#########################################################################################
|
||||
FROM neon-pg-ext-build AS postgres-cleanup-layer
|
||||
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
|
||||
|
||||
@@ -178,12 +124,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
# Put it all together into the final image
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim
|
||||
# Add user postgres
|
||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
@@ -193,6 +137,8 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
chmod 0750 /var/db/postgres/compute && \
|
||||
echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
|
||||
|
||||
# TODO: Check if we can make the extension setup more modular versus a linear build
|
||||
# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
|
||||
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
|
||||
@@ -200,6 +146,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
# libreadline8 for psql
|
||||
# libossp-uuid16 for extension ossp-uuid
|
||||
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
|
||||
# GLIBC 2.34 for plv8.
|
||||
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
|
||||
#
|
||||
# Lastly, link compute_ctl into zenith_ctl while we're at it,
|
||||
# so that we don't need to put this in another layer.
|
||||
@@ -212,6 +160,12 @@ RUN apt update && \
|
||||
libproj19 \
|
||||
libprotobuf-c1 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
echo "Installing GLIBC 2.34" && \
|
||||
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends -t testing libc6 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
USER postgres
|
||||
|
||||
@@ -6,12 +6,10 @@ edition = "2021"
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
chrono = "0.4"
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
futures = "0.3.13"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
log = { version = "0.4", features = ["std", "serde"] }
|
||||
notify = "5.0.0"
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
regex = "1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
@@ -51,19 +51,53 @@ fn main() -> Result<()> {
|
||||
// TODO: re-use `utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let matches = cli().get_matches();
|
||||
// Env variable is set by `cargo`
|
||||
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
|
||||
let matches = clap::App::new("compute_ctl")
|
||||
.version(version.unwrap_or("unknown"))
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let pgdata = matches
|
||||
.get_one::<String>("pgdata")
|
||||
.expect("PGDATA path is required");
|
||||
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.get_one::<String>("connstr")
|
||||
.value_of("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let spec = matches.value_of("spec");
|
||||
let spec_path = matches.value_of("spec-path");
|
||||
|
||||
// Try to use just 'postgres' if no path is provided
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap();
|
||||
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
|
||||
|
||||
let spec: ComputeSpec = match spec {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
@@ -139,48 +173,3 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> clap::Command {
|
||||
// Env variable is set by `cargo`
|
||||
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
|
||||
clap::Command::new("compute_ctl")
|
||||
.version(version)
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.default_value("postgres")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert()
|
||||
}
|
||||
|
||||
@@ -178,6 +178,7 @@ impl ComputeNode {
|
||||
.args(&["--sync-safekeepers"])
|
||||
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("postgres --sync-safekeepers failed to start");
|
||||
|
||||
@@ -190,10 +191,10 @@ impl ComputeNode {
|
||||
|
||||
if !sync_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
|
||||
sync_output.status,
|
||||
String::from_utf8(sync_output.stdout)
|
||||
.expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
|
||||
String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
|
||||
String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -257,7 +258,14 @@ impl ComputeNode {
|
||||
.spawn()
|
||||
.expect("cannot start postgres process");
|
||||
|
||||
wait_for_postgres(&mut pg, pgdata_path)?;
|
||||
// Try default Postgres port if it is not provided
|
||||
let port = self
|
||||
.spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("port")
|
||||
.unwrap_or_else(|| "5432".to_string());
|
||||
wait_for_postgres(&mut pg, &port, pgdata_path)?;
|
||||
|
||||
// If connection fails,
|
||||
// it may be the old node with `zenith_admin` superuser.
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use std::fmt::Write;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::net::{SocketAddr, TcpStream};
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::process::Child;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::str::FromStr;
|
||||
use std::{fs, thread, time};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
use postgres::{Client, Transaction};
|
||||
use serde::Deserialize;
|
||||
|
||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||
const POSTGRES_WAIT_TIMEOUT: u64 = 60 * 1000; // milliseconds
|
||||
|
||||
/// Rust representation of Postgres role info with only those fields
|
||||
/// that matter for us.
|
||||
@@ -168,7 +168,7 @@ impl Database {
|
||||
/// it may require a proper quoting too.
|
||||
pub fn to_pg_options(&self) -> String {
|
||||
let mut params: String = self.options.as_pg_options();
|
||||
write!(params, " OWNER {}", &self.owner.pg_quote())
|
||||
write!(params, " OWNER {}", &self.owner.quote())
|
||||
.expect("String is documented to not to error during write operations");
|
||||
|
||||
params
|
||||
@@ -179,17 +179,18 @@ impl Database {
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
|
||||
/// Generic trait used to provide quoting / encoding for strings used in the
|
||||
/// Postgres SQL queries and DATABASE_URL.
|
||||
pub trait Escaping {
|
||||
fn pg_quote(&self) -> String;
|
||||
/// Generic trait used to provide quoting for strings used in the
|
||||
/// Postgres SQL queries. Currently used only to implement quoting
|
||||
/// of identifiers, but could be used for literals in the future.
|
||||
pub trait PgQuote {
|
||||
fn quote(&self) -> String;
|
||||
}
|
||||
|
||||
impl Escaping for PgIdent {
|
||||
impl PgQuote for PgIdent {
|
||||
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
|
||||
/// always quotes provided string with `""` and escapes every `"`.
|
||||
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
|
||||
fn pg_quote(&self) -> String {
|
||||
/// always quotes provided string with `""` and escapes every `"`. Not idempotent,
|
||||
/// i.e. if string is already escaped it will be escaped again.
|
||||
fn quote(&self) -> String {
|
||||
let result = format!("\"{}\"", self.replace('"', "\"\""));
|
||||
result
|
||||
}
|
||||
@@ -229,112 +230,52 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
|
||||
Ok(postgres_dbs)
|
||||
}
|
||||
|
||||
/// Wait for Postgres to become ready to accept connections. It's ready to
|
||||
/// accept connections when the state-field in `pgdata/postmaster.pid` says
|
||||
/// 'ready'.
|
||||
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
|
||||
/// Wait for Postgres to become ready to accept connections:
|
||||
/// - state should be `ready` in the `pgdata/postmaster.pid`
|
||||
/// - and we should be able to connect to 127.0.0.1:5432
|
||||
pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
|
||||
let pid_path = pgdata.join("postmaster.pid");
|
||||
let mut slept: u64 = 0; // ms
|
||||
let pause = time::Duration::from_millis(100);
|
||||
|
||||
// PostgreSQL writes line "ready" to the postmaster.pid file, when it has
|
||||
// completed initialization and is ready to accept connections. We want to
|
||||
// react quickly and perform the rest of our initialization as soon as
|
||||
// PostgreSQL starts accepting connections. Use 'notify' to be notified
|
||||
// whenever the PID file is changed, and whenever it changes, read it to
|
||||
// check if it's now "ready".
|
||||
//
|
||||
// You cannot actually watch a file before it exists, so we first watch the
|
||||
// data directory, and once the postmaster.pid file appears, we switch to
|
||||
// watch the file instead. We also wake up every 100 ms to poll, just in
|
||||
// case we miss some events for some reason. Not strictly necessary, but
|
||||
// better safe than sorry.
|
||||
let (tx, rx) = std::sync::mpsc::channel();
|
||||
let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
|
||||
let _ = tx.send(res);
|
||||
}) {
|
||||
Ok(watcher) => (Box::new(watcher), rx),
|
||||
Err(e) => {
|
||||
match e.kind {
|
||||
notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
|
||||
// docker on m1 macs does not support recommended_watcher
|
||||
// but return "Function not implemented (os error 38)"
|
||||
// see https://github.com/notify-rs/notify/issues/423
|
||||
let (tx, rx) = std::sync::mpsc::channel();
|
||||
let timeout = time::Duration::from_millis(10);
|
||||
let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();
|
||||
|
||||
// let's poll it faster than what we check the results for (100ms)
|
||||
let config =
|
||||
notify::Config::default().with_poll_interval(Duration::from_millis(50));
|
||||
|
||||
let watcher = notify::PollWatcher::new(
|
||||
move |res| {
|
||||
let _ = tx.send(res);
|
||||
},
|
||||
config,
|
||||
)?;
|
||||
|
||||
(Box::new(watcher), rx)
|
||||
}
|
||||
_ => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
|
||||
|
||||
let started_at = Instant::now();
|
||||
let mut postmaster_pid_seen = false;
|
||||
loop {
|
||||
// Sleep POSTGRES_WAIT_TIMEOUT at max (a bit longer actually if consider a TCP timeout,
|
||||
// but postgres starts listening almost immediately, even if it is not really
|
||||
// ready to accept connections).
|
||||
if slept >= POSTGRES_WAIT_TIMEOUT {
|
||||
bail!("timed out while waiting for Postgres to start");
|
||||
}
|
||||
|
||||
if let Ok(Some(status)) = pg.try_wait() {
|
||||
// Postgres exited, that is not what we expected, bail out earlier.
|
||||
let code = status.code().unwrap_or(-1);
|
||||
bail!("Postgres exited unexpectedly with code {}", code);
|
||||
}
|
||||
|
||||
let res = rx.recv_timeout(Duration::from_millis(100));
|
||||
log::debug!("woken up by notify: {res:?}");
|
||||
// If there are multiple events in the channel already, we only need to be
|
||||
// check once. Swallow the extra events before we go ahead to check the
|
||||
// pid file.
|
||||
while let Ok(res) = rx.try_recv() {
|
||||
log::debug!("swallowing extra event: {res:?}");
|
||||
}
|
||||
|
||||
// Check that we can open pid file first.
|
||||
if let Ok(file) = File::open(&pid_path) {
|
||||
if !postmaster_pid_seen {
|
||||
log::debug!("postmaster.pid appeared");
|
||||
watcher
|
||||
.unwatch(pgdata)
|
||||
.expect("Failed to remove pgdata dir watch");
|
||||
watcher
|
||||
.watch(&pid_path, RecursiveMode::NonRecursive)
|
||||
.expect("Failed to add postmaster.pid file watch");
|
||||
postmaster_pid_seen = true;
|
||||
}
|
||||
|
||||
let file = BufReader::new(file);
|
||||
let last_line = file.lines().last();
|
||||
|
||||
// Pid file could be there and we could read it, but it could be empty, for example.
|
||||
if let Some(Ok(line)) = last_line {
|
||||
let status = line.trim();
|
||||
log::debug!("last line of postmaster.pid: {status:?}");
|
||||
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
|
||||
|
||||
// Now Postgres is ready to accept connections
|
||||
if status == "ready" {
|
||||
if status == "ready" && can_connect {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Give up after POSTGRES_WAIT_TIMEOUT.
|
||||
let duration = started_at.elapsed();
|
||||
if duration >= POSTGRES_WAIT_TIMEOUT {
|
||||
bail!("timed out while waiting for Postgres to start");
|
||||
}
|
||||
thread::sleep(pause);
|
||||
slept += 100;
|
||||
}
|
||||
|
||||
log::info!("PostgreSQL is now running, continuing to configure it");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use log::{info, log_enabled, warn, Level};
|
||||
use postgres::config::Config;
|
||||
use postgres::{Client, NoTls};
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -117,8 +115,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if existing_roles.iter().any(|r| r.name == op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER ROLE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
op.name.quote(),
|
||||
new_name.quote()
|
||||
);
|
||||
|
||||
warn!("renaming role '{}' to '{}'", op.name, new_name);
|
||||
@@ -164,7 +162,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
}
|
||||
|
||||
if update_role {
|
||||
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
|
||||
let mut query: String = format!("ALTER ROLE {} ", name.quote());
|
||||
info_print!(" -> update");
|
||||
|
||||
query.push_str(&role.to_pg_options());
|
||||
@@ -172,7 +170,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
}
|
||||
} else {
|
||||
info!("role name: '{}'", &name);
|
||||
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
|
||||
let mut query: String = format!("CREATE ROLE {} ", name.quote());
|
||||
info!("role create query: '{}'", &query);
|
||||
info_print!(" -> create");
|
||||
|
||||
@@ -181,7 +179,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
let grant_query = format!(
|
||||
"GRANT pg_read_all_data, pg_write_all_data TO {}",
|
||||
name.pg_quote()
|
||||
name.quote()
|
||||
);
|
||||
xact.execute(grant_query.as_str(), &[])?;
|
||||
info!("role grant query: '{}'", &grant_query);
|
||||
@@ -217,7 +215,7 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
|
||||
// We do not check either role exists or not,
|
||||
// Postgres will take care of it for us
|
||||
if op.action == "delete_role" {
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting role '{}'", &op.name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
@@ -232,16 +230,17 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
|
||||
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &node.spec.cluster.databases {
|
||||
if db.owner != *role_name {
|
||||
let mut conf = Config::from_str(node.connstr.as_str())?;
|
||||
conf.dbname(&db.name);
|
||||
let mut connstr = node.connstr.clone();
|
||||
// database name is always the last and the only component of the path
|
||||
connstr.set_path(&db.name);
|
||||
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
|
||||
// This will reassign all dependent objects to the db owner
|
||||
let reassign_query = format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
role_name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
role_name.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info!(
|
||||
"reassigning objects owned by '{}' in db '{}' to '{}'",
|
||||
@@ -250,7 +249,7 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
|
||||
client.simple_query(&reassign_query)?;
|
||||
|
||||
// This now will only drop privileges of the role
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
|
||||
client.simple_query(&drop_query)?;
|
||||
}
|
||||
}
|
||||
@@ -280,7 +279,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
// We do not check either DB exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_db" => {
|
||||
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
|
||||
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting database '{}'", &op.name);
|
||||
client.execute(query.as_str(), &[])?;
|
||||
@@ -292,8 +291,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if existing_dbs.iter().any(|r| r.name == op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
op.name.quote(),
|
||||
new_name.quote()
|
||||
);
|
||||
|
||||
warn!("renaming database '{}' to '{}'", op.name, new_name);
|
||||
@@ -321,7 +320,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
// XXX: db owner name is returned as quoted string from Postgres,
|
||||
// when quoting is needed.
|
||||
let new_owner = if r.owner.starts_with('"') {
|
||||
db.owner.pg_quote()
|
||||
db.owner.quote()
|
||||
} else {
|
||||
db.owner.clone()
|
||||
};
|
||||
@@ -329,15 +328,15 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if new_owner != r.owner {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} OWNER TO {}",
|
||||
name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
name.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info_print!(" -> update");
|
||||
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
} else {
|
||||
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
|
||||
let mut query: String = format!("CREATE DATABASE {} ", name.quote());
|
||||
info_print!(" -> create");
|
||||
|
||||
query.push_str(&db.to_pg_options());
|
||||
@@ -367,7 +366,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
.cluster
|
||||
.roles
|
||||
.iter()
|
||||
.map(|r| r.name.pg_quote())
|
||||
.map(|r| r.name.quote())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for db in &spec.cluster.databases {
|
||||
@@ -375,7 +374,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
|
||||
let query: String = format!(
|
||||
"GRANT CREATE ON DATABASE {} TO {}",
|
||||
dbname.pg_quote(),
|
||||
dbname.quote(),
|
||||
roles.join(", ")
|
||||
);
|
||||
info!("grant query {}", &query);
|
||||
@@ -386,11 +385,12 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||
// atomically.
|
||||
let mut db_connstr = node.connstr.clone();
|
||||
for db in &node.spec.cluster.databases {
|
||||
let mut conf = Config::from_str(node.connstr.as_str())?;
|
||||
conf.dbname(&db.name);
|
||||
// database name is always the last and the only component of the path
|
||||
db_connstr.set_path(&db.name);
|
||||
|
||||
let mut db_client = conf.connect(NoTls)?;
|
||||
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
|
||||
|
||||
// This will only change ownership on the schema itself, not the objects
|
||||
// inside it. Without it owner of the `public` schema will be `cloud_admin`
|
||||
@@ -419,36 +419,9 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;",
|
||||
db.owner.pg_quote()
|
||||
db.owner.quote()
|
||||
);
|
||||
db_client.simple_query(&alter_query)?;
|
||||
|
||||
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
|
||||
// This is needed because since postgres 15 this privilege is removed by default.
|
||||
let grant_query = "DO $$\n\
|
||||
BEGIN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT nspname\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
) AND\n\
|
||||
current_setting('server_version_num')::int/10000 >= 15\n\
|
||||
THEN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT rolname\n\
|
||||
FROM pg_catalog.pg_roles\n\
|
||||
WHERE rolname = 'web_access'\n\
|
||||
)\n\
|
||||
THEN\n\
|
||||
GRANT CREATE ON SCHEMA public TO web_access;\n\
|
||||
END IF;\n\
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;"
|
||||
.to_string();
|
||||
|
||||
info!("grant query for db {} : {}", &db.name, &grant_query);
|
||||
db_client.simple_query(&grant_query)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -33,9 +33,9 @@ mod pg_helpers_tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_pg_quote() {
|
||||
fn quote_ident() {
|
||||
let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
|
||||
|
||||
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
|
||||
assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,24 +4,22 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
clap = "4.0"
|
||||
comfy-table = "6.1"
|
||||
clap = "3.0"
|
||||
comfy-table = "5.0.1"
|
||||
git-version = "0.3.5"
|
||||
tar = "0.4.38"
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
serde_with = "1.12.0"
|
||||
toml = "0.5"
|
||||
once_cell = "1.13.0"
|
||||
regex = "1"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1"
|
||||
nix = "0.25"
|
||||
nix = "0.23"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
|
||||
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
|
||||
# instead, so that recompile times are better.
|
||||
pageserver_api = { path = "../libs/pageserver_api" }
|
||||
safekeeper_api = { path = "../libs/safekeeper_api" }
|
||||
pageserver = { path = "../pageserver" }
|
||||
safekeeper = { path = "../safekeeper" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
@@ -6,18 +6,18 @@
|
||||
//! rely on `neon_local` to set up the environment for each test.
|
||||
//!
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env::{EtcdBroker, LocalEnv};
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage::PageServerNode;
|
||||
use control_plane::{etcd, local_env};
|
||||
use pageserver_api::models::TimelineInfo;
|
||||
use pageserver_api::{
|
||||
use pageserver::config::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
|
||||
DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
|
||||
};
|
||||
use safekeeper_api::{
|
||||
use pageserver::http::models::TimelineInfo;
|
||||
use safekeeper::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
};
|
||||
@@ -85,7 +85,212 @@ struct TimelineTreeEl {
|
||||
// * Providing CLI api to the pageserver
|
||||
// * TODO: export/import to/from usual postgres
|
||||
fn main() -> Result<()> {
|
||||
let matches = cli().get_matches();
|
||||
let branch_name_arg = Arg::new("branch-name")
|
||||
.long("branch-name")
|
||||
.takes_value(true)
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
let tenant_id_arg = Arg::new("tenant-id")
|
||||
.long("tenant-id")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let timeline_id_arg = Arg::new("timeline-id")
|
||||
.long("timeline-id")
|
||||
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let pg_version_arg = Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.default_value(DEFAULT_PG_VERSION);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
.required(false)
|
||||
.value_name("port");
|
||||
|
||||
let stop_mode_arg = Arg::new("stop-mode")
|
||||
.short('m')
|
||||
.takes_value(true)
|
||||
.possible_values(&["fast", "immediate"])
|
||||
.help("If 'immediate', don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::new("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple_occurrences(true)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let lsn_arg = Arg::new("lsn")
|
||||
.long("lsn")
|
||||
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let matches = App::new("Neon CLI")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
App::new("init")
|
||||
.about("Initialize a new Neon repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
App::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.subcommand(App::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name").takes_value(true)
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn").takes_value(true)
|
||||
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a new blank timeline")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("import")
|
||||
.about("Import timeline from basebackup directory")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(Arg::new("node-name").long("node-name").takes_value(true)
|
||||
.help("Name to assign to the imported timeline"))
|
||||
.arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
|
||||
.help("Basebackup tarfile to import"))
|
||||
.arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
|
||||
.help("Lsn the basebackup starts at"))
|
||||
.arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
|
||||
.help("Wal to add after base"))
|
||||
.arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
|
||||
.help("Lsn the basebackup ends at"))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
).subcommand(
|
||||
App::new("tenant")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage tenants")
|
||||
.subcommand(App::new("list"))
|
||||
.subcommand(App::new("create")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("config")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("pageserver")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(App::new("status"))
|
||||
.subcommand(App::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(App::new("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(App::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
|
||||
)
|
||||
.subcommand(
|
||||
App::new("safekeeper")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage safekeepers")
|
||||
.subcommand(App::new("start")
|
||||
.about("Start local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("stop")
|
||||
.about("Stop local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("restart")
|
||||
.about("Restart local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("pg")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(App::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
.long("config-only")
|
||||
.required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
.subcommand(
|
||||
App::new("start")
|
||||
.about("Start page server and safekeepers")
|
||||
.arg(pageserver_config_args)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.about("Stop page server and safekeepers")
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let (sub_name, sub_args) = match matches.subcommand() {
|
||||
Some(subcommand_data) => subcommand_data,
|
||||
@@ -153,7 +358,9 @@ fn print_timelines_tree(
|
||||
|
||||
// Memorize all direct children of each timeline.
|
||||
for timeline in timelines.iter() {
|
||||
if let Some(ancestor_timeline_id) = timeline.ancestor_timeline_id {
|
||||
if let Some(ancestor_timeline_id) =
|
||||
timeline.local.as_ref().and_then(|l| l.ancestor_timeline_id)
|
||||
{
|
||||
timelines_hash
|
||||
.get_mut(&ancestor_timeline_id)
|
||||
.context("missing timeline info in the HashMap")?
|
||||
@@ -164,7 +371,13 @@ fn print_timelines_tree(
|
||||
|
||||
for timeline in timelines_hash.values() {
|
||||
// Start with root local timelines (no ancestors) first.
|
||||
if timeline.info.ancestor_timeline_id.is_none() {
|
||||
if timeline
|
||||
.info
|
||||
.local
|
||||
.as_ref()
|
||||
.and_then(|l| l.ancestor_timeline_id)
|
||||
.is_none()
|
||||
{
|
||||
print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;
|
||||
}
|
||||
}
|
||||
@@ -181,8 +394,17 @@ fn print_timeline(
|
||||
timeline: &TimelineTreeEl,
|
||||
timelines: &HashMap<TimelineId, TimelineTreeEl>,
|
||||
) -> Result<()> {
|
||||
let local_remote = match (timeline.info.local.as_ref(), timeline.info.remote.as_ref()) {
|
||||
(None, None) => unreachable!("in this case no info for a timeline is found"),
|
||||
(None, Some(_)) => "(R)",
|
||||
(Some(_), None) => "(L)",
|
||||
(Some(_), Some(_)) => "(L+R)",
|
||||
};
|
||||
// Draw main padding
|
||||
print!("{} ", local_remote);
|
||||
|
||||
if nesting_level > 0 {
|
||||
let ancestor_lsn = match timeline.info.ancestor_lsn {
|
||||
let ancestor_lsn = match timeline.info.local.as_ref().and_then(|i| i.ancestor_lsn) {
|
||||
Some(lsn) => lsn.to_string(),
|
||||
None => "Unknown Lsn".to_string(),
|
||||
};
|
||||
@@ -270,16 +492,16 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
|
||||
|
||||
fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
|
||||
sub_match
|
||||
.get_one::<String>("tenant-id")
|
||||
.map(|tenant_id| TenantId::from_str(tenant_id))
|
||||
.value_of("tenant-id")
|
||||
.map(TenantId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse tenant id from the argument string")
|
||||
}
|
||||
|
||||
fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
|
||||
sub_match
|
||||
.get_one::<String>("timeline-id")
|
||||
.map(|timeline_id| TimelineId::from_str(timeline_id))
|
||||
.value_of("timeline-id")
|
||||
.map(TimelineId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse timeline id from the argument string")
|
||||
}
|
||||
@@ -288,22 +510,19 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
let initial_timeline_id_arg = parse_timeline_id(init_match)?;
|
||||
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
|
||||
let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
|
||||
// load and parse the file
|
||||
std::fs::read_to_string(config_path).with_context(|| {
|
||||
format!(
|
||||
"Could not read configuration file '{}'",
|
||||
config_path.display()
|
||||
)
|
||||
})?
|
||||
std::fs::read_to_string(std::path::Path::new(config_path))
|
||||
.with_context(|| format!("Could not read configuration file '{config_path}'"))?
|
||||
} else {
|
||||
// Built-in default config
|
||||
default_conf(&EtcdBroker::locate_etcd()?)
|
||||
};
|
||||
|
||||
let pg_version = init_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut env =
|
||||
@@ -339,10 +558,9 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
|
||||
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
|
||||
init_match
|
||||
.get_many::<String>("pageserver-config-override")
|
||||
.values_of("pageserver-config-override")
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|s| s.as_str())
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -357,7 +575,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
Some(("create", create_match)) => {
|
||||
let initial_tenant_id = parse_tenant_id(create_match)?;
|
||||
let tenant_conf: HashMap<_, _> = create_match
|
||||
.get_many::<String>("config")
|
||||
.values_of("config")
|
||||
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
|
||||
.unwrap_or_default();
|
||||
let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
|
||||
@@ -366,8 +584,9 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
// Create an initial timeline for the new tenant
|
||||
let new_timeline_id = parse_timeline_id(create_match)?;
|
||||
let pg_version = create_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let timeline_info = pageserver.timeline_create(
|
||||
@@ -378,7 +597,10 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
Some(pg_version),
|
||||
)?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
let last_record_lsn = timeline_info.last_record_lsn;
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
|
||||
.last_record_lsn;
|
||||
|
||||
env.register_branch_mapping(
|
||||
DEFAULT_BRANCH_NAME.to_string(),
|
||||
@@ -393,7 +615,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
Some(("config", create_match)) => {
|
||||
let tenant_id = get_tenant_id(create_match, env)?;
|
||||
let tenant_conf: HashMap<_, _> = create_match
|
||||
.get_many::<String>("config")
|
||||
.values_of("config")
|
||||
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
@@ -420,19 +642,23 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
Some(("create", create_match)) => {
|
||||
let tenant_id = get_tenant_id(create_match, env)?;
|
||||
let new_branch_name = create_match
|
||||
.get_one::<String>("branch-name")
|
||||
.value_of("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
|
||||
let pg_version = create_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let timeline_info =
|
||||
pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
|
||||
let last_record_lsn = timeline_info.last_record_lsn;
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.expect("no local timeline info")
|
||||
.last_record_lsn;
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
println!(
|
||||
@@ -444,32 +670,35 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
let tenant_id = get_tenant_id(import_match, env)?;
|
||||
let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
|
||||
let name = import_match
|
||||
.get_one::<String>("node-name")
|
||||
.value_of("node-name")
|
||||
.ok_or_else(|| anyhow!("No node name provided"))?;
|
||||
|
||||
// Parse base inputs
|
||||
let base_tarfile = import_match
|
||||
.get_one::<PathBuf>("base-tarfile")
|
||||
.ok_or_else(|| anyhow!("No base-tarfile provided"))?
|
||||
.to_owned();
|
||||
.value_of("base-tarfile")
|
||||
.map(|s| PathBuf::from_str(s).unwrap())
|
||||
.ok_or_else(|| anyhow!("No base-tarfile provided"))?;
|
||||
let base_lsn = Lsn::from_str(
|
||||
import_match
|
||||
.get_one::<String>("base-lsn")
|
||||
.value_of("base-lsn")
|
||||
.ok_or_else(|| anyhow!("No base-lsn provided"))?,
|
||||
)?;
|
||||
let base = (base_lsn, base_tarfile);
|
||||
|
||||
// Parse pg_wal inputs
|
||||
let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
|
||||
let wal_tarfile = import_match
|
||||
.value_of("wal-tarfile")
|
||||
.map(|s| PathBuf::from_str(s).unwrap());
|
||||
let end_lsn = import_match
|
||||
.get_one::<String>("end-lsn")
|
||||
.value_of("end-lsn")
|
||||
.map(|s| Lsn::from_str(s).unwrap());
|
||||
// TODO validate both or none are provided
|
||||
let pg_wal = end_lsn.zip(wal_tarfile);
|
||||
|
||||
let pg_version = import_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
@@ -484,11 +713,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
Some(("branch", branch_match)) => {
|
||||
let tenant_id = get_tenant_id(branch_match, env)?;
|
||||
let new_branch_name = branch_match
|
||||
.get_one::<String>("branch-name")
|
||||
.value_of("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
let ancestor_branch_name = branch_match
|
||||
.get_one::<String>("ancestor-branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("ancestor-branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let ancestor_timeline_id = env
|
||||
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
|
||||
@@ -497,8 +725,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
})?;
|
||||
|
||||
let start_lsn = branch_match
|
||||
.get_one::<String>("ancestor-start-lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("ancestor-start-lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||
let timeline_info = pageserver.timeline_create(
|
||||
@@ -510,7 +738,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
)?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
|
||||
let last_record_lsn = timeline_info.last_record_lsn;
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.expect("no local timeline info")
|
||||
.last_record_lsn;
|
||||
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
@@ -570,7 +801,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
// Use the LSN at the end of the timeline.
|
||||
timeline_infos
|
||||
.get(&node.timeline_id)
|
||||
.map(|bi| bi.last_record_lsn.to_string())
|
||||
.and_then(|bi| bi.local.as_ref().map(|l| l.last_record_lsn.to_string()))
|
||||
.unwrap_or_else(|| "?".to_string())
|
||||
}
|
||||
Some(lsn) => {
|
||||
@@ -599,39 +830,45 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
}
|
||||
"create" => {
|
||||
let branch_name = sub_args
|
||||
.get_one::<String>("branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.map(|node_name| node_name.to_string())
|
||||
.unwrap_or_else(|| format!("{branch_name}_node"));
|
||||
.value_of("node")
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_else(|| format!("{}_node", branch_name));
|
||||
|
||||
let lsn = sub_args
|
||||
.get_one::<String>("lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{}'", branch_name))?;
|
||||
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let pg_version = sub_args
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
|
||||
}
|
||||
"start" => {
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
|
||||
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_owned()));
|
||||
|
||||
let auth_token = if matches!(env.pageserver.auth_type, AuthType::NeonJWT) {
|
||||
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
|
||||
@@ -642,33 +879,36 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
};
|
||||
|
||||
if let Some(node) = node {
|
||||
println!("Starting existing postgres {node_name}...");
|
||||
println!("Starting existing postgres {}...", node_name);
|
||||
node.start(&auth_token)?;
|
||||
} else {
|
||||
let branch_name = sub_args
|
||||
.get_one::<String>("branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Found no timeline id for branch name '{branch_name}'")
|
||||
anyhow!("Found no timeline id for branch name '{}'", branch_name)
|
||||
})?;
|
||||
let lsn = sub_args
|
||||
.get_one::<String>("lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
let pg_version = sub_args
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.context("Failed to `pg-version` from the argument string")?;
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
// when used with custom port this results in non obvious behaviour
|
||||
// port is remembered from first start command, i e
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
|
||||
println!(
|
||||
"Starting new postgres (v{}) {} on timeline {} ...",
|
||||
pg_version, node_name, timeline_id
|
||||
);
|
||||
|
||||
let node =
|
||||
cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
|
||||
@@ -677,18 +917,18 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
}
|
||||
"stop" => {
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
|
||||
let destroy = sub_args.get_flag("destroy");
|
||||
let destroy = sub_args.is_present("destroy");
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenant_id, node_name.to_string()))
|
||||
.with_context(|| format!("postgres {node_name} is not found"))?;
|
||||
.get(&(tenant_id, node_name.to_owned()))
|
||||
.with_context(|| format!("postgres {} is not found", node_name))?;
|
||||
node.stop(destroy)?;
|
||||
}
|
||||
|
||||
_ => bail!("Unexpected pg subcommand '{sub_name}'"),
|
||||
_ => bail!("Unexpected pg subcommand '{}'", sub_name),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -706,10 +946,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
Some(("stop", stop_match)) => {
|
||||
let immediate = stop_match
|
||||
.get_one::<String>("stop-mode")
|
||||
.map(|s| s.as_str())
|
||||
== Some("immediate");
|
||||
let immediate = stop_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
@@ -759,7 +996,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
};
|
||||
|
||||
// All the commands take an optional safekeeper name argument
|
||||
let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
|
||||
let sk_id = if let Some(id_str) = sub_args.value_of("id") {
|
||||
NodeId(id_str.parse().context("while parsing safekeeper id")?)
|
||||
} else {
|
||||
DEFAULT_SAFEKEEPER_ID
|
||||
@@ -775,8 +1012,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
"stop" => {
|
||||
let immediate =
|
||||
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
@@ -785,8 +1021,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
"restart" => {
|
||||
let immediate =
|
||||
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
@@ -830,8 +1065,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
|
||||
}
|
||||
|
||||
fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let immediate =
|
||||
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
@@ -864,219 +1098,3 @@ fn try_stop_etcd_process(env: &local_env::LocalEnv) {
|
||||
eprintln!("etcd stop failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
let branch_name_arg = Arg::new("branch-name")
|
||||
.long("branch-name")
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
let tenant_id_arg = Arg::new("tenant-id")
|
||||
.long("tenant-id")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
.required(false);
|
||||
|
||||
let timeline_id_arg = Arg::new("timeline-id")
|
||||
.long("timeline-id")
|
||||
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
|
||||
.required(false);
|
||||
|
||||
let pg_version_arg = Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u32))
|
||||
.default_value(DEFAULT_PG_VERSION);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u16))
|
||||
.value_name("port");
|
||||
|
||||
let stop_mode_arg = Arg::new("stop-mode")
|
||||
.short('m')
|
||||
.value_parser(["fast", "immediate"])
|
||||
.help("If 'immediate', don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::new("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.num_args(1)
|
||||
.action(ArgAction::Append)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let lsn_arg = Arg::new("lsn")
|
||||
.long("lsn")
|
||||
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
|
||||
.required(false);
|
||||
|
||||
Command::new("Neon CLI")
|
||||
.arg_required_else_help(true)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
Command::new("init")
|
||||
.about("Initialize a new Neon repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.subcommand(Command::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
.subcommand(Command::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
|
||||
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
|
||||
.subcommand(Command::new("create")
|
||||
.about("Create a new blank timeline")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("import")
|
||||
.about("Import timeline from basebackup directory")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(Arg::new("node-name").long("node-name")
|
||||
.help("Name to assign to the imported timeline"))
|
||||
.arg(Arg::new("base-tarfile")
|
||||
.long("base-tarfile")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Basebackup tarfile to import")
|
||||
)
|
||||
.arg(Arg::new("base-lsn").long("base-lsn")
|
||||
.help("Lsn the basebackup starts at"))
|
||||
.arg(Arg::new("wal-tarfile")
|
||||
.long("wal-tarfile")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Wal to add after base")
|
||||
)
|
||||
.arg(Arg::new("end-lsn").long("end-lsn")
|
||||
.help("Lsn the basebackup ends at"))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
).subcommand(
|
||||
Command::new("tenant")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage tenants")
|
||||
.subcommand(Command::new("list"))
|
||||
.subcommand(Command::new("create")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("config")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pageserver")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(Command::new("status"))
|
||||
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(Command::new("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(Command::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("safekeeper")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage safekeepers")
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("stop")
|
||||
.about("Stop local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("restart")
|
||||
.about("Restart local safekeeper")
|
||||
.arg(safekeeper_id_arg)
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pg")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(Command::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(Command::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
.long("config-only")
|
||||
.required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg)
|
||||
.arg(timeline_id_arg)
|
||||
.arg(lsn_arg)
|
||||
.arg(port_arg)
|
||||
.arg(pg_version_arg)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
.arg(pg_node_arg)
|
||||
.arg(tenant_id_arg)
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.action(ArgAction::SetTrue)
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("start")
|
||||
.about("Start page server and safekeepers")
|
||||
.arg(pageserver_config_args)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
.about("Stop page server and safekeepers")
|
||||
.arg(stop_mode_arg)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -183,18 +183,18 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
|
||||
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
|
||||
let pg_path = self.env.pg_bin_dir(pg_version).join("postgres");
|
||||
let mut cmd = Command::new(&pg_path);
|
||||
|
||||
cmd.arg("--sync-safekeepers")
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env("PGDATA", self.pgdata().to_str().unwrap())
|
||||
.stdout(Stdio::piped())
|
||||
@@ -282,7 +282,9 @@ impl PostgresNode {
|
||||
fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
|
||||
let mut conf = PostgresConf::new();
|
||||
conf.append("max_wal_senders", "10");
|
||||
conf.append("wal_log_hints", "off");
|
||||
// wal_log_hints is mandatory when running against pageserver (see gh issue#192)
|
||||
// TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
|
||||
conf.append("wal_log_hints", "on");
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
conf.append("shared_buffers", "1MB");
|
||||
@@ -420,7 +422,7 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version).join("pg_ctl");
|
||||
let mut cmd = Command::new(pg_ctl_path);
|
||||
cmd.args(
|
||||
[
|
||||
@@ -438,11 +440,11 @@ impl PostgresNode {
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
|
||||
);
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("ZENITH_AUTH_TOKEN", token);
|
||||
|
||||
@@ -52,10 +52,6 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
// size smaller. Our test etcd clusters are very small.
|
||||
// See https://github.com/etcd-io/etcd/issues/7910
|
||||
"--quota-backend-bytes=100000000".to_string(),
|
||||
// etcd doesn't compact (vacuum) with default settings,
|
||||
// enable it to prevent space exhaustion.
|
||||
"--auto-compaction-mode=revision".to_string(),
|
||||
"--auto-compaction-retention=1".to_string(),
|
||||
])
|
||||
.stdout(Stdio::from(etcd_stdout_file))
|
||||
.stderr(Stdio::from(etcd_stderr_file))
|
||||
|
||||
@@ -201,28 +201,28 @@ impl LocalEnv {
|
||||
self.pg_distrib_dir.clone()
|
||||
}
|
||||
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => path.join(format!("v{pg_version}")),
|
||||
15 => path.join(format!("v{pg_version}")),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,7 +285,7 @@ impl LocalEnv {
|
||||
branch_name: &str,
|
||||
tenant_id: TenantId,
|
||||
) -> Option<TimelineId> {
|
||||
self.branch_name_mappings
|
||||
dbg!(&self.branch_name_mappings)
|
||||
.get(branch_name)?
|
||||
.iter()
|
||||
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
|
||||
@@ -422,10 +422,10 @@ impl LocalEnv {
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.display()
|
||||
);
|
||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||
if !self.pg_bin_dir(pg_version).join("postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_bin_dir(pg_version)?.display()
|
||||
self.pg_bin_dir(pg_version).display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
|
||||
@@ -12,8 +12,13 @@ use nix::unistd::Pid;
|
||||
use postgres::Config;
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use safekeeper::http::models::TimelineCreateRequest;
|
||||
use thiserror::Error;
|
||||
use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
@@ -123,6 +128,7 @@ impl SafekeeperNode {
|
||||
.args(&["--id", self.id.to_string().as_ref()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--recall", "1 second"])
|
||||
.arg("--daemonize"),
|
||||
);
|
||||
if !self.conf.sync {
|
||||
@@ -275,4 +281,24 @@ impl SafekeeperNode {
|
||||
.error_from_body()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn timeline_create(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<()> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
timeline_id,
|
||||
peer_ids,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json()?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use anyhow::{bail, Context};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::models::{
|
||||
use pageserver::http::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use postgres::{Config, NoTls};
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
PG_VERSION=${PG_VERSION:-14}
|
||||
|
||||
SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
|
||||
SPEC_FILE=/tmp/spec.json
|
||||
|
||||
echo "Waiting pageserver become ready."
|
||||
while ! nc -z pageserver 6400; do
|
||||
sleep 1;
|
||||
done
|
||||
echo "Page server is ready."
|
||||
|
||||
echo "Create a tenant and timeline"
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{}"
|
||||
http://pageserver:9898/v1/tenant/
|
||||
)
|
||||
tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
|
||||
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
echo "Overwrite tenant id and timeline id in spec file"
|
||||
tenant_id=$(echo ${result} | jq -r .tenant_id)
|
||||
timeline_id=$(echo ${result} | jq -r .timeline_id)
|
||||
|
||||
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
|
||||
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
|
||||
|
||||
cat ${SPEC_FILE}
|
||||
|
||||
echo "Start compute node"
|
||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||
-b /usr/local/bin/postgres \
|
||||
-S ${SPEC_FILE}
|
||||
@@ -1,141 +0,0 @@
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2022-10-12T18:00:00.000Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "docker_compose",
|
||||
"name": "docker_compose_test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
{
|
||||
"name": "cloud_admin",
|
||||
"encrypted_password": "b093c0d3b281ba6da1eacc608620abd8",
|
||||
"options": null
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "replica",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": "55433",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "1MB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "5s",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_keep_size",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "restart_after_crash",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "safekeeper1:5454,safekeeper2:5454,safekeeper3:5454",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "TIMELINE_ID",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "TENANT_ID",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=pageserver port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_write_lag",
|
||||
"value": "500MB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_flush_lag",
|
||||
"value": "10GB",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"delta_operations": [
|
||||
]
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.4
|
||||
ports:
|
||||
- 2379:2379
|
||||
- 2380:2380
|
||||
environment:
|
||||
# This signifficantly speeds up etcd and we anyway don't data persistency there.
|
||||
ETCD_UNSAFE_NO_FSYNC: "1"
|
||||
command:
|
||||
- "etcd"
|
||||
- "--auto-compaction-mode=revision"
|
||||
- "--auto-compaction-retention=1"
|
||||
- "--name=etcd-cluster"
|
||||
- "--initial-cluster-state=new"
|
||||
- "--initial-cluster-token=etcd-cluster-1"
|
||||
- "--initial-cluster=etcd-cluster=http://etcd:2380"
|
||||
- "--initial-advertise-peer-urls=http://etcd:2380"
|
||||
- "--advertise-client-urls=http://etcd:2379"
|
||||
- "--listen-client-urls=http://0.0.0.0:2379"
|
||||
- "--listen-peer-urls=http://0.0.0.0:2380"
|
||||
- "--quota-backend-bytes=134217728" # 128 MB
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minio
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
command: server /data --address :9000 --console-address ":9001"
|
||||
|
||||
minio_create_buckets:
|
||||
image: minio/mc
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minio
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
|
||||
echo 'Waiting to start minio...' && sleep 1;
|
||||
done;
|
||||
/usr/bin/mc mb minio/neon --region=eu-north-1;
|
||||
exit 0;"
|
||||
depends_on:
|
||||
- minio
|
||||
|
||||
pageserver:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- BROKER_ENDPOINT='http://etcd:2379'
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 6400:6400 # pg protocol handler
|
||||
- 9898:9898 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "/usr/local/bin/pageserver -D /data/.neon/
|
||||
-c \"broker_endpoints=[$$BROKER_ENDPOINT]\"
|
||||
-c \"listen_pg_addr='0.0.0.0:6400'\"
|
||||
-c \"listen_http_addr='0.0.0.0:9898'\"
|
||||
-c \"remote_storage={endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/pageserver/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper1:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
|
||||
- SAFEKEEPER_ID=1
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7676:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper2:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
|
||||
- SAFEKEEPER_ID=2
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7677:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper3:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
|
||||
- SAFEKEEPER_ID=3
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7678:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
compute:
|
||||
build:
|
||||
context: ./image/compute
|
||||
args:
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-14}
|
||||
#- RUST_BACKTRACE=1
|
||||
volumes:
|
||||
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
|
||||
- ./compute/shell/:/shell/
|
||||
ports:
|
||||
- 55433:55433 # pg protocol handler
|
||||
- 3080:3080 # http endpoints
|
||||
entrypoint:
|
||||
- "/shell/compute.sh"
|
||||
depends_on:
|
||||
- safekeeper1
|
||||
- safekeeper2
|
||||
- safekeeper3
|
||||
- pageserver
|
||||
|
||||
compute_is_ready:
|
||||
image: postgres:latest
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
command:
|
||||
- "until pg_isready -h compute -p 55433 ; do
|
||||
echo 'Waiting to start compute...' && sleep 1;
|
||||
done"
|
||||
depends_on:
|
||||
- compute
|
||||
@@ -1,10 +0,0 @@
|
||||
ARG COMPUTE_IMAGE=compute-node-v14:latest
|
||||
FROM neondatabase/${COMPUTE_IMAGE}
|
||||
|
||||
USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
netcat
|
||||
|
||||
USER postgres
|
||||
@@ -80,6 +80,4 @@
|
||||
- [015-storage-messaging](rfcs/015-storage-messaging.md)
|
||||
- [016-connection-routing](rfcs/016-connection-routing.md)
|
||||
- [017-timeline-data-management](rfcs/017-timeline-data-management.md)
|
||||
- [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
|
||||
- [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
|
||||
- [cluster-size-limits](rfcs/cluster-size-limits.md)
|
||||
|
||||
@@ -18,67 +18,3 @@ We build all images after a successful `release` tests run and push automaticall
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
|
||||
|
||||
2. `neondatabase/neon`
|
||||
|
||||
## Docker Compose example
|
||||
|
||||
You can see a [docker compose](https://docs.docker.com/compose/) example to create a neon cluster in [/docker-compose/docker-compose.yml](/docker-compose/docker-compose.yml). It creates the following conatainers.
|
||||
|
||||
- etcd x 1
|
||||
- pageserver x 1
|
||||
- safekeeper x 3
|
||||
- compute x 1
|
||||
- MinIO x 1 # This is Amazon S3 compatible object storage
|
||||
|
||||
### How to use
|
||||
|
||||
1. create containers
|
||||
|
||||
You can specify version of neon cluster using following environment values.
|
||||
- PG_VERSION: postgres version for compute (default is 14)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags) (default is latest), which is tagged in [CI test](/.github/workflows/build_and_test.yml)
|
||||
```
|
||||
$ cd docker-compose/docker-compose.yml
|
||||
$ docker-compose down # remove the conainers if exists
|
||||
$ PG_VERSION=15 TAG=2221 docker-compose up --build -d # You can specify the postgres and image version
|
||||
Creating network "dockercompose_default" with the default driver
|
||||
Creating dockercompose_etcd3_1 ...
|
||||
(...omit...)
|
||||
```
|
||||
|
||||
2. connect compute node
|
||||
```
|
||||
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
||||
$ psql -h localhost -p 55433 -U cloud_admin
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t values(1,1);
|
||||
INSERT 0 1
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
-----+-------
|
||||
1 | 1
|
||||
(1 row)
|
||||
```
|
||||
|
||||
3. If you want to see the log, you can use `docker-compose logs` command.
|
||||
```
|
||||
# check the container name you want to see
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
d6968a5ae912 dockercompose_compute "/shell/compute.sh" 5 minutes ago Up 5 minutes 0.0.0.0:3080->3080/tcp, 0.0.0.0:55433->55433/tcp dockercompose_compute_1
|
||||
(...omit...)
|
||||
|
||||
$ docker logs -f dockercompose_compute_1
|
||||
2022-10-21 06:15:48.757 GMT [56] LOG: connection authorized: user=cloud_admin database=postgres application_name=psql
|
||||
2022-10-21 06:17:00.307 GMT [56] LOG: [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'
|
||||
(...omit...)
|
||||
```
|
||||
|
||||
4. If you want to see durable data in MinIO which is s3 compatible storage
|
||||
|
||||
Access http://localhost:9001 and sign in.
|
||||
|
||||
- Username: `minio`
|
||||
- Password: `password`
|
||||
|
||||
You can see durable pages and WAL data in `neon` bucket.
|
||||
@@ -1,163 +0,0 @@
|
||||
# Storage messaging
|
||||
|
||||
Safekeepers need to communicate to each other to
|
||||
* Trim WAL on safekeepers;
|
||||
* Decide on which SK should push WAL to the S3;
|
||||
* Decide on when to shut down SK<->pageserver connection;
|
||||
* Understand state of each other to perform peer recovery;
|
||||
|
||||
Pageservers need to communicate to safekeepers to decide which SK should provide
|
||||
WAL to the pageserver.
|
||||
|
||||
This is an iteration on [015-storage-messaging](https://github.com/neondatabase/neon/blob/main/docs/rfcs/015-storage-messaging.md) describing current situation,
|
||||
potential performance issue and ways to address it.
|
||||
|
||||
## Background
|
||||
|
||||
What we have currently is very close to etcd variant described in
|
||||
015-storage-messaging. Basically, we have single `SkTimelineInfo` message
|
||||
periodically sent by all safekeepers to etcd for each timeline.
|
||||
* Safekeepers subscribe to it to learn status of peers (currently they subscribe to
|
||||
'everything', but they can and should fetch data only for timelines they hold).
|
||||
* Pageserver subscribes to it (separate watch per timeline) to learn safekeepers
|
||||
positions; based on that, it decides from which safekeepers to pull WAL.
|
||||
|
||||
Also, safekeepers use etcd elections API to make sure only single safekeeper
|
||||
offloads WAL.
|
||||
|
||||
It works, and callmemaybe is gone. However, this has a performance
|
||||
hazard. Currently deployed etcd can do about 6k puts per second (using its own
|
||||
`benchmark` tool); on my 6 core laptop, while running on tmpfs, this gets to
|
||||
35k. Making benchmark closer to our usage [etcd watch bench](https://github.com/arssher/etcd-client/blob/watch-bench/examples/watch_bench.rs),
|
||||
I get ~10k received messages per second with various number of publisher-subscribers
|
||||
(laptop, tmpfs). Diving this by 12 (3 sks generate msg, 1 ps + 3 sk consume them) we
|
||||
get about 800 active timelines, if message is sent each second. Not extremely
|
||||
low, but quite reachable.
|
||||
|
||||
A lot of idle watches seem to be ok though -- which is good, as pageserver
|
||||
subscribes to all its timelines regardless of their activity.
|
||||
|
||||
Also, running etcd with fsyncs disabled is messy -- data dir must be wiped on
|
||||
each restart or there is a risk of corruption errors.
|
||||
|
||||
The reason is etcd making much more than what we need; it is a fault tolerant
|
||||
store with strong consistency, but I claim all we need here is just simplest pub
|
||||
sub with best effort delivery, because
|
||||
* We already have centralized source of truth for long running data, like which
|
||||
tlis are on which nodes -- the console.
|
||||
* Momentary data (safekeeper/pageserver progress) doesn't make sense to persist.
|
||||
Instead of putting each change to broker, expecting it to reliably deliver it
|
||||
is better to just have constant flow of data for active timelines: 1) they
|
||||
serve as natural heartbeats -- if node can't send, we shouldn't pull WAL from
|
||||
it 2) it is simpler -- no need to track delivery to/from the broker.
|
||||
Moreover, latency here is important: the faster we obtain fresh data, the
|
||||
faster we can switch to proper safekeeper after failure.
|
||||
* As for WAL offloading leader election, it is trivial to achieve through these
|
||||
heartbeats -- just take suitable node through deterministic rule (min node
|
||||
id). Once network is stable, this is a converging process (well, except
|
||||
complicated failure topology, but even then making it converge is not
|
||||
hard). Such elections bear some risk of several offloaders running
|
||||
concurrently for a short period of time, but that's harmless.
|
||||
|
||||
Generally, if one needs strong consistency, electing leader per se is not
|
||||
enough; it must be accompanied with number (logical clock ts), checked at
|
||||
every action to track causality. s3 doesn't provide CAS, so it can't
|
||||
differentiate old/new leader, this must be solved differently.
|
||||
|
||||
We could use etcd CAS (its most powerful/useful primitive actually) to issue
|
||||
these leader numbers (and e.g. prefix files in s3), but currently I don't see
|
||||
need for that.
|
||||
|
||||
|
||||
Obviously best effort pub sub is much more simpler and performant; the one proposed is
|
||||
|
||||
## gRPC broker
|
||||
|
||||
I took tonic and [prototyped](https://github.com/neondatabase/neon/blob/asher/neon-broker/broker/src/broker.rs) the replacement of functionality we currently use
|
||||
with grpc streams and tokio mpsc channels. The implementation description is at the file header.
|
||||
|
||||
It is just 500 lines of code and core functionality is complete. 1-1 pub sub
|
||||
gives about 120k received messages per second; having multiple subscribers in
|
||||
different connecitons quickly scales to 1 million received messages per second.
|
||||
I had concerns about many concurrent streams in singe connection, but 2^20
|
||||
subscribers still work (though eat memory, with 10 publishers 20GB are consumed;
|
||||
in this implementation each publisher holds full copy of all subscribers). There
|
||||
is `bench.rs` nearby which I used for testing.
|
||||
|
||||
`SkTimelineInfo` is wired here, but another message can be added (e.g. if
|
||||
pageservers want to communicate with each other) with templating.
|
||||
|
||||
### Fault tolerance
|
||||
|
||||
Since such broker is stateless, we can run it under k8s. Or add proxying to
|
||||
other members, with best-effort this is simple.
|
||||
|
||||
### Security implications
|
||||
|
||||
Communication happens in a private network that is not exposed to users;
|
||||
additionaly we can add auth to the broker.
|
||||
|
||||
## Alternative: get existing pub-sub
|
||||
|
||||
We could take some existing pub sub solution, e.g. RabbitMQ, Redis. But in this
|
||||
case IMV simplicity of our own outweights external dependency costs (RabbitMQ is
|
||||
much more complicated and needs VM; Redis Rust client maintenance is not
|
||||
ideal...). Also note that projects like CockroachDB and TiDB are based on gRPC
|
||||
as well.
|
||||
|
||||
## Alternative: direct communication
|
||||
|
||||
Apart from being transport, broker solves one more task: discovery, i.e. letting
|
||||
safekeepers and pageservers find each other. We can let safekeepers know, for
|
||||
each timeline, both other safekeepers for this timeline and pageservers serving
|
||||
it. In this case direct communication is possible:
|
||||
- each safekeeper pushes to each other safekeeper status of timelines residing
|
||||
on both of them, letting remove WAL, decide who offloads, decide on peer
|
||||
recovery;
|
||||
- each safekeeper pushes to each pageserver status of timelines residing on
|
||||
both of them, letting pageserver choose from which sk to pull WAL;
|
||||
|
||||
It was mostly described in [014-safekeeper-gossip](https://github.com/neondatabase/neon/blob/main/docs/rfcs/014-safekeepers-gossip.md), but I want to recap on that.
|
||||
|
||||
The main pro is less one dependency: less moving parts, easier to run Neon
|
||||
locally/manually, less places to monitor. Fault tolerance for broker disappears,
|
||||
no kuber or something. To me this is a big thing.
|
||||
|
||||
Also (though not a big thing) idle watches for inactive timelines disappear:
|
||||
naturally safekeepers learn about compute connection first and start pushing
|
||||
status to pageserver(s), notifying it should pull.
|
||||
|
||||
Importantly, I think that eventually knowing and persisting peers and
|
||||
pageservers on safekeepers is inevitable:
|
||||
- Knowing peer safekeepers for the timeline is required for correct
|
||||
automatic membership change -- new member set must be hardened on old
|
||||
majority before proceeding. It is required to get rid of sync-safekeepers
|
||||
as well (peer recovery up to flush_lsn).
|
||||
- Knowing pageservers where the timeline is attached is needed to
|
||||
1. Understand when to shut down activity on the timeline, i.e. push data to
|
||||
the broker. We can have a lot of timelines sleeping quietly which
|
||||
shouldn't occupy resources.
|
||||
2. Preserve WAL for these (currently we offload to s3 and take it from there,
|
||||
but serving locally is better, and we get one less condition on which WAL
|
||||
can be removed from s3).
|
||||
|
||||
I suppose this membership data should be passed to safekeepers directly from the
|
||||
console because
|
||||
1. Console is the original source of this data, conceptually this is the
|
||||
simplest way (rather than passing it through compute or something).
|
||||
2. We already have similar code for deleting timeline on safekeepers
|
||||
(and attaching/detaching timeline on pageserver), this is a typical
|
||||
action -- queue operation against storage node and execute it until it
|
||||
completes (or timeline is dropped).
|
||||
|
||||
Cons of direct communication are
|
||||
- It is more complicated: each safekeeper should maintain set of peers it talks
|
||||
to, and set of timelines for each such peer -- they ought to be multiplexed
|
||||
into single connection.
|
||||
- Totally, we have O(n^2) connections instead of O(n) with broker schema
|
||||
(still O(n) on each node). However, these are relatively stable, async and
|
||||
thus not very expensive, I don't think this is a big problem. Up to 10k
|
||||
storage nodes I doubt connection overhead would be noticeable.
|
||||
|
||||
I'd use gRPC for direct communication, and in this sense gRPC based broker is a
|
||||
step towards it.
|
||||
@@ -1,91 +0,0 @@
|
||||
# Managing Tenant and Timeline lifecycles
|
||||
|
||||
## Summary
|
||||
|
||||
The pageserver has a Tenant object in memory for each tenant it manages, and a
|
||||
Timeline for each timeline. There are a lot of tasks that operate on the tenants
|
||||
and timelines with references to those objects. We have some mechanisms to track
|
||||
which tasks are operating on each Tenant and Timeline, and to request them to
|
||||
shutdown when a tenant or timeline is deleted, but it does not cover all uses,
|
||||
and as a result we have many race conditions around tenant/timeline shutdown.
|
||||
|
||||
## Motivation
|
||||
|
||||
We have a bunch of race conditions that can produce weird errors and can be hard
|
||||
to track down.
|
||||
|
||||
## Non Goals
|
||||
|
||||
This RFC only covers the problem of ensuring that a task/thread isn't operating
|
||||
on a Tenant or Timeline. It does not cover what states, aside from Active and
|
||||
non-Active, each Tenant and Timeline should have, or when exactly the transitions
|
||||
should happen.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Pageserver. Although I wonder if the safekeeper should have a similar mechanism.
|
||||
|
||||
## Current situation
|
||||
|
||||
Most pageserver tasks of are managed by task_mgr.rs:
|
||||
|
||||
- LibpqEndpointListener
|
||||
- HttpEndPointListener
|
||||
- WalReceiverManager and -Connection
|
||||
- GarbageCollector and Compaction
|
||||
- InitialLogicalSizeCalculation
|
||||
|
||||
In addition to those tasks, the walreceiver performs some direct tokio::spawn
|
||||
calls to spawn tasks that are not registered with 'task_mgr'. And all of these
|
||||
tasks can spawn extra operations with tokio spawn_blocking.
|
||||
|
||||
Whenever a tenant or timeline is removed from the system, by pageserver
|
||||
shutdown, delete_timeline or tenant-detach operation, we rely on the task
|
||||
registry in 'task_mgr.rs' to wait until there are no tasks operating on the
|
||||
tenant or timeline, before its Tenant/Timeline object is removed. That relies on
|
||||
each task to register itself with the tenant/timeline ID in
|
||||
'task_mgr.rs'. However, there are many gaps in that. For example,
|
||||
GarbageCollection and Compaction tasks are registered with the tenant, but when
|
||||
they proceed to operate on a particular timeline of the tenant, they don't
|
||||
register with timeline ID. Because of that, the timeline can be deleted while GC
|
||||
or compaction is running on it, causing failures in the GC or compaction (see
|
||||
https://github.com/neondatabase/neon/issues/2442).
|
||||
|
||||
Another problem is that the task registry only works for tokio Tasks. There is
|
||||
no way to register a piece of code that runs inside spawn_blocking(), for
|
||||
example.
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
This "voluntary" registration of tasks is fragile. Let's use Rust language features
|
||||
to enforce that a tenant/timeline cannot be removed from the system when there is
|
||||
still some code operating on it.
|
||||
|
||||
Let's introduce new Guard objects for Tenant and Timeline, and do all actions through
|
||||
the Guard object. Something like:
|
||||
|
||||
TenantActiveGuard: Guard object over Arc<Tenant>. When you acquire the guard,
|
||||
the code checks that the tenant is in Active state. If it's not, you get an
|
||||
error. You can change the state of the tenant to Stopping while there are
|
||||
ActiveTenantGuard objects still on it, to prevent new ActiveTenantGuards from
|
||||
being acquired, but the Tenant cannot be removed until all the guards are gone.
|
||||
|
||||
TenantMaintenanceGuard: Like ActiveTenantGuard, but can be held even when the
|
||||
tenant is not in Active state. Used for operations like attach/detach. Perhaps
|
||||
allow only one such guard on a Tenant at a time.
|
||||
|
||||
Similarly for Timelines. We don't currentl have a "state" on Timeline, but I think
|
||||
we need at least two states: Active and Stopping. The Stopping state is used at
|
||||
deletion, to prevent new TimelineActiveGuards from appearing, while you wait for
|
||||
existing TimelineActiveGuards to die out.
|
||||
|
||||
The shutdown-signaling, using shutdown_watcher() and is_shutdown_requested(),
|
||||
probably also needs changes to deal with the new Guards. The rule is that if you
|
||||
have a TenantActiveGuard, and the tenant's state changes from Active to
|
||||
Stopping, the is_shutdown_requested() function should return true, and
|
||||
shutdown_watcher() future should return.
|
||||
|
||||
This signaling doesn't neessarily need to cover all cases. For example, if you
|
||||
have a block of code in spawn_blocking(), it might be acceptable if
|
||||
is_shutdown_requested() doesn't return true even though the tenant is in
|
||||
Stopping state, as long as the code finishes reasonably fast.
|
||||
@@ -96,7 +96,7 @@ A single virtual environment with all dependencies is described in the single `P
|
||||
sudo apt install python3.9
|
||||
```
|
||||
- Install `poetry`
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
|
||||
- Install dependencies via `./scripts/pysync`.
|
||||
- Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
|
||||
so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
|
||||
111
hack/demo.py
Executable file
111
hack/demo.py
Executable file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import testgres
|
||||
|
||||
|
||||
def run_command(args):
|
||||
print('> Cmd:', ' '.join(args))
|
||||
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||
ret = p.wait()
|
||||
output = p.stdout.read().strip()
|
||||
if output:
|
||||
print(textwrap.indent(output, '>> '))
|
||||
if ret != 0:
|
||||
raise subprocess.CalledProcessError(ret, args)
|
||||
|
||||
|
||||
def make_tarfile(output_filename, source_dir):
|
||||
print("* Packing the backup into a tarball")
|
||||
cmd = ["tar", r"--transform=s/\.\///", "-C", str(source_dir), "-cf", str(output_filename), "."]
|
||||
run_command(cmd)
|
||||
|
||||
|
||||
def create_tenant(tenant_id):
|
||||
print("* Creating a new tenant")
|
||||
cmd = ["neon_local", "tenant", "create", f"--tenant-id={tenant_id}"]
|
||||
run_command(cmd)
|
||||
|
||||
|
||||
def import_backup(args, backup_dir: Path):
|
||||
tar = Path('/tmp/base.tar')
|
||||
make_tarfile(tar, backup_dir / 'data')
|
||||
|
||||
print("* Importing the timeline into the pageserver")
|
||||
|
||||
manifest = json.loads((backup_dir / "data" / "backup_manifest").read_text())
|
||||
start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
|
||||
end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
|
||||
print("> LSNs:", start_lsn, end_lsn)
|
||||
|
||||
cmd = (
|
||||
"neon_local timeline import "
|
||||
f"--tenant-id {args.tenant_id} "
|
||||
f"--base-lsn {start_lsn} "
|
||||
f"--end-lsn {end_lsn} "
|
||||
f"--base-tarfile {tar} "
|
||||
f"--timeline-id {args.timeline_id} "
|
||||
f"--node-name {args.node}"
|
||||
)
|
||||
|
||||
run_command(cmd.split())
|
||||
|
||||
|
||||
def debug_prints(node):
|
||||
tuples = node.execute("table foo")
|
||||
oid = node.execute("select 'foo'::regclass::oid")[0][0]
|
||||
print("> foo's tuples:", tuples, "&", "oid:", oid)
|
||||
print("> DBs:", node.execute("select oid, datname from pg_database"))
|
||||
|
||||
|
||||
def main(args):
|
||||
print("* Creating a node")
|
||||
node = testgres.get_new_node()
|
||||
node.init(unix_sockets=False, allow_streaming=True).start()
|
||||
node.execute("create table foo as select 1")
|
||||
debug_prints(node)
|
||||
# node.pgbench_init(scale=1)
|
||||
|
||||
print("* Creating a backup")
|
||||
backup = node.backup()
|
||||
backup_dir = Path(backup.base_dir)
|
||||
print("> Backup dir:", backup_dir)
|
||||
|
||||
# pr = backup.spawn_primary().start()
|
||||
# debug_prints(pr)
|
||||
# exit(1)
|
||||
|
||||
create_tenant(args.tenant_id)
|
||||
import_backup(args, backup_dir)
|
||||
|
||||
print("> Tenant:", args.tenant_id)
|
||||
print("> Timeline:", args.timeline_id)
|
||||
print("> Node:", args.node)
|
||||
|
||||
print("* Starting postgres")
|
||||
cmd = ["neon_local", "pg", "start", f"--tenant-id={args.tenant_id}", f"--timeline-id={args.timeline_id}", args.node]
|
||||
run_command(cmd)
|
||||
|
||||
print("* Opening psql session...")
|
||||
cmd = ["psql", f"host=127.0.0.1 port=55432 user={os.getlogin()} dbname=postgres"]
|
||||
subprocess.call(cmd)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tenant_id = uuid.uuid4().hex
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--tenant-id", default=tenant_id)
|
||||
parser.add_argument("--timeline-id", default=tenant_id)
|
||||
parser.add_argument("node")
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
main(args)
|
||||
@@ -8,7 +8,7 @@
|
||||
regex = "1.4.5"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "2.0"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.13.0"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
|
||||
@@ -29,9 +29,6 @@ pub struct SkTimelineInfo {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub peer_horizon_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub local_start_lsn: Option<Lsn>,
|
||||
/// A connection string to use for WAL receiving.
|
||||
#[serde(default)]
|
||||
pub safekeeper_connstr: Option<String>,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Otherwise, we might not see all metrics registered via
|
||||
//! a default registry.
|
||||
use once_cell::sync::Lazy;
|
||||
use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
|
||||
use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
|
||||
pub use prometheus::opts;
|
||||
pub use prometheus::register;
|
||||
pub use prometheus::{core, default_registry, proto};
|
||||
@@ -17,7 +17,6 @@ pub use prometheus::{register_int_counter_vec, IntCounterVec};
|
||||
pub use prometheus::{register_int_gauge, IntGauge};
|
||||
pub use prometheus::{register_int_gauge_vec, IntGaugeVec};
|
||||
pub use prometheus::{Encoder, TextEncoder};
|
||||
use prometheus::{Registry, Result};
|
||||
|
||||
mod wrappers;
|
||||
pub use wrappers::{CountedReader, CountedWriter};
|
||||
@@ -33,27 +32,13 @@ macro_rules! register_uint_gauge_vec {
|
||||
}};
|
||||
}
|
||||
|
||||
/// Special internal registry, to collect metrics independently from the default registry.
|
||||
/// Was introduced to fix deadlock with lazy registration of metrics in the default registry.
|
||||
static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
|
||||
|
||||
/// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
|
||||
/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
|
||||
/// while holding the lock.
|
||||
pub fn register_internal(c: Box<dyn Collector>) -> Result<()> {
|
||||
INTERNAL_REGISTRY.register(c)
|
||||
}
|
||||
|
||||
/// Gathers all Prometheus metrics and records the I/O stats just before that.
|
||||
///
|
||||
/// Metrics gathering is a relatively simple and standalone operation, so
|
||||
/// it might be fine to do it this way to keep things simple.
|
||||
pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
|
||||
update_rusage_metrics();
|
||||
let mut mfs = prometheus::gather();
|
||||
let mut internal_mfs = INTERNAL_REGISTRY.gather();
|
||||
mfs.append(&mut internal_mfs);
|
||||
mfs
|
||||
prometheus::gather()
|
||||
}
|
||||
|
||||
static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
@@ -77,16 +62,6 @@ pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
|
||||
0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
|
||||
];
|
||||
|
||||
pub fn set_build_info_metric(revision: &str) {
|
||||
let metric = register_int_gauge_vec!(
|
||||
"libmetrics_build_info",
|
||||
"Build/version information",
|
||||
&["revision"]
|
||||
)
|
||||
.expect("Failed to register build info metric");
|
||||
metric.with_label_values(&[revision]).set(1);
|
||||
}
|
||||
|
||||
// Records I/O stats in a "cross-platform" way.
|
||||
// Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.
|
||||
// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
[package]
|
||||
name = "pageserver_api"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
const_format = "0.2.21"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
bytes = "1.0.1"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
@@ -1,10 +0,0 @@
|
||||
use const_format::formatcp;
|
||||
|
||||
/// Public API types
|
||||
pub mod models;
|
||||
pub mod reltag;
|
||||
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
||||
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
||||
@@ -1,382 +0,0 @@
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// A state of a tenant in pageserver's memory.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum TenantState {
|
||||
/// Tenant is fully operational, its background jobs might be running or not.
|
||||
Active { background_jobs_running: bool },
|
||||
/// A tenant is recognized by pageserver, but not yet ready to operate:
|
||||
/// e.g. not present locally and being downloaded or being read into memory from the file system.
|
||||
Paused,
|
||||
/// A tenant is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
|
||||
Broken,
|
||||
}
|
||||
|
||||
/// A state of a timeline in pageserver's memory.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum TimelineState {
|
||||
/// Timeline is fully operational, its background jobs are running.
|
||||
Active,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate.
|
||||
/// The status indicates, that the timeline could eventually go back to Active automatically:
|
||||
/// for example, if the owning tenant goes back to Active again.
|
||||
Suspended,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to
|
||||
/// automatically become Active after certain events: only a management call can change this status.
|
||||
Paused,
|
||||
/// A timeline is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
|
||||
Broken,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_start_lsn: Option<Lsn>,
|
||||
pub pg_version: Option<u32>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<TenantId>,
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
pub walreceiver_connect_timeout: Option<String>,
|
||||
pub lagging_wal_timeout: Option<String>,
|
||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub TenantId);
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct StatusResponse {
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
impl TenantCreateRequest {
|
||||
pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
|
||||
TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TenantConfigRequest {
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
pub walreceiver_connect_timeout: Option<String>,
|
||||
pub lagging_wal_timeout: Option<String>,
|
||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||
}
|
||||
|
||||
impl TenantConfigRequest {
|
||||
pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
|
||||
TenantConfigRequest {
|
||||
tenant_id,
|
||||
checkpoint_distance: None,
|
||||
checkpoint_timeout: None,
|
||||
compaction_target_size: None,
|
||||
compaction_period: None,
|
||||
compaction_threshold: None,
|
||||
gc_horizon: None,
|
||||
gc_period: None,
|
||||
image_creation_threshold: None,
|
||||
pitr_interval: None,
|
||||
walreceiver_connect_timeout: None,
|
||||
lagging_wal_timeout: None,
|
||||
max_lsn_wal_lag: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
pub state: TenantState,
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub has_in_progress_downloads: Option<bool>,
|
||||
}
|
||||
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub last_record_lsn: Lsn,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub latest_gc_cutoff_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_logical_size_non_incremental: Option<u64>,
|
||||
pub current_physical_size_non_incremental: Option<u64>,
|
||||
|
||||
pub wal_source_connstr: Option<String>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub last_received_msg_lsn: Option<Lsn>,
|
||||
/// the timestamp (in microseconds) of the last received message
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
pub pg_version: u32,
|
||||
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub remote_consistent_lsn: Option<Lsn>,
|
||||
pub awaits_download: bool,
|
||||
|
||||
pub state: TimelineState,
|
||||
|
||||
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
|
||||
// compatility with older clients.
|
||||
pub local: LocalTimelineInfo,
|
||||
pub remote: RemoteTimelineInfo,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LocalTimelineInfo {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_lsn: Option<Lsn>,
|
||||
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RemoteTimelineInfo {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub remote_consistent_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
|
||||
|
||||
/// Information for configuring a single fail point
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FailpointConfig {
|
||||
/// Name of the fail point
|
||||
pub name: String,
|
||||
/// List of actions to take, using the format described in `fail::cfg`
|
||||
///
|
||||
/// We also support `actions = "exit"` to cause the fail point to immediately exit.
|
||||
pub actions: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct TimelineGcRequest {
|
||||
pub gc_horizon: Option<u64>,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
pub fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// TODO these gets can fail
|
||||
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.get_u8();
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
blkno: body.get_u32(),
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
dbnode: body.get_u32(),
|
||||
})),
|
||||
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(100); /* tag from pagestore_client.h */
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(101); /* tag from pagestore_client.h */
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(102); /* tag from pagestore_client.h */
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(103); /* tag from pagestore_client.h */
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,7 @@ crc32c = "0.6.0"
|
||||
hex = "0.4.3"
|
||||
once_cell = "1.13.0"
|
||||
log = "0.4.14"
|
||||
memoffset = "0.7"
|
||||
memoffset = "0.6.2"
|
||||
thiserror = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
utils = { path = "../utils" }
|
||||
@@ -26,4 +26,4 @@ wal_craft = { path = "wal_craft" }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0"
|
||||
bindgen = "0.61"
|
||||
bindgen = "0.60.1"
|
||||
|
||||
@@ -3,11 +3,9 @@
|
||||
#![allow(non_snake_case)]
|
||||
// bindgen creates some unsafe code with no doc comments.
|
||||
#![allow(clippy::missing_safety_doc)]
|
||||
// noted at 1.63 that in many cases there's a u32 -> u32 transmutes in bindgen code.
|
||||
#![allow(clippy::useless_transmute)]
|
||||
// modules included with the postgres_ffi macro depend on the types of the specific version's
|
||||
// types, and trigger a too eager lint.
|
||||
#![allow(clippy::duplicate_mod)]
|
||||
// suppress warnings on rust 1.53 due to bindgen unit tests.
|
||||
// https://github.com/rust-lang/rust-bindgen/issues/1651
|
||||
#![allow(deref_nullptr)]
|
||||
|
||||
use bytes::Bytes;
|
||||
use utils::bin_ser::SerializeError;
|
||||
|
||||
@@ -57,10 +57,12 @@ pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
|
||||
/// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.
|
||||
const XID_CHECKPOINT_INTERVAL: u32 = 1024;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
(0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegNoOffsetToRecPtr(
|
||||
segno: XLogSegNo,
|
||||
offset: u32,
|
||||
@@ -69,6 +71,7 @@ pub fn XLogSegNoOffsetToRecPtr(
|
||||
segno * (wal_segsz_bytes as u64) + (offset as u64)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||
format!(
|
||||
"{:>08X}{:>08X}{:>08X}",
|
||||
@@ -78,6 +81,7 @@ pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
|
||||
let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
|
||||
let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
|
||||
@@ -85,10 +89,12 @@ pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLin
|
||||
(log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn IsXLogFileName(fname: &str) -> bool {
|
||||
return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn IsPartialXLogFileName(fname: &str) -> bool {
|
||||
fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8])
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
log = "0.4"
|
||||
once_cell = "1.13.0"
|
||||
|
||||
@@ -1,19 +1,68 @@
|
||||
use anyhow::*;
|
||||
use clap::{value_parser, Arg, ArgMatches, Command};
|
||||
use std::{path::PathBuf, str::FromStr};
|
||||
use clap::{App, Arg, ArgMatches};
|
||||
use std::str::FromStr;
|
||||
use wal_craft::*;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
|
||||
.init();
|
||||
let arg_matches = cli().get_matches();
|
||||
let type_arg = &Arg::new("type")
|
||||
.takes_value(true)
|
||||
.help("Type of WAL to craft")
|
||||
.possible_values([
|
||||
Simple::NAME,
|
||||
LastWalRecordXlogSwitch::NAME,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
|
||||
LastWalRecordCrossingSegment::NAME,
|
||||
])
|
||||
.required(true);
|
||||
let arg_matches = App::new("Postgres WAL crafter")
|
||||
.about("Crafts Postgres databases with specific WAL properties")
|
||||
.subcommand(
|
||||
App::new("print-postgres-config")
|
||||
.about("Print the configuration required for PostgreSQL server before running this script")
|
||||
)
|
||||
.subcommand(
|
||||
App::new("with-initdb")
|
||||
.about("Craft WAL in a new data directory first initialized with initdb")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.takes_value(true)
|
||||
.help("Data directory for the Postgres server")
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.takes_value(true)
|
||||
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("in-existing")
|
||||
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("connection")
|
||||
.takes_value(true)
|
||||
.help("Connection string to the Postgres database to populate")
|
||||
.required(true)
|
||||
)
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let wal_craft = |arg_matches: &ArgMatches, client| {
|
||||
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches
|
||||
.get_one::<String>("type")
|
||||
.map(|s| s.as_str())
|
||||
.context("'type' is required")?
|
||||
{
|
||||
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
|
||||
Simple::NAME => Simple::craft(client)?,
|
||||
LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
|
||||
@@ -23,12 +72,12 @@ fn main() -> Result<()> {
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
|
||||
}
|
||||
LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
|
||||
a => panic!("Unknown --type argument: {a}"),
|
||||
a => panic!("Unknown --type argument: {}", a),
|
||||
};
|
||||
for lsn in intermediate_lsns {
|
||||
println!("intermediate_lsn = {lsn}");
|
||||
println!("intermediate_lsn = {}", lsn);
|
||||
}
|
||||
println!("end_of_wal = {end_of_wal_lsn}");
|
||||
println!("end_of_wal = {}", end_of_wal_lsn);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
@@ -36,24 +85,20 @@ fn main() -> Result<()> {
|
||||
None => panic!("No subcommand provided"),
|
||||
Some(("print-postgres-config", _)) => {
|
||||
for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
|
||||
println!("{cfg}");
|
||||
println!("{}", cfg);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Some(("with-initdb", arg_matches)) => {
|
||||
let cfg = Conf {
|
||||
pg_version: *arg_matches
|
||||
.get_one::<u32>("pg-version")
|
||||
.context("'pg-version' is required")?,
|
||||
pg_distrib_dir: arg_matches
|
||||
.get_one::<PathBuf>("pg-distrib-dir")
|
||||
.context("'pg-distrib-dir' is required")?
|
||||
.to_owned(),
|
||||
datadir: arg_matches
|
||||
.get_one::<PathBuf>("datadir")
|
||||
.context("'datadir' is required")?
|
||||
.to_owned(),
|
||||
pg_version: arg_matches
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?,
|
||||
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
||||
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
||||
};
|
||||
cfg.initdb()?;
|
||||
let srv = cfg.start_server()?;
|
||||
@@ -63,77 +108,9 @@ fn main() -> Result<()> {
|
||||
}
|
||||
Some(("in-existing", arg_matches)) => wal_craft(
|
||||
arg_matches,
|
||||
&mut postgres::Config::from_str(
|
||||
arg_matches
|
||||
.get_one::<String>("connection")
|
||||
.context("'connection' is required")?,
|
||||
)
|
||||
.context(
|
||||
"'connection' argument value could not be parsed as a postgres connection string",
|
||||
)?
|
||||
.connect(postgres::NoTls)?,
|
||||
&mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
|
||||
.connect(postgres::NoTls)?,
|
||||
),
|
||||
Some(_) => panic!("Unknown subcommand"),
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
let type_arg = &Arg::new("type")
|
||||
.help("Type of WAL to craft")
|
||||
.value_parser([
|
||||
Simple::NAME,
|
||||
LastWalRecordXlogSwitch::NAME,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
|
||||
LastWalRecordCrossingSegment::NAME,
|
||||
])
|
||||
.required(true);
|
||||
|
||||
Command::new("Postgres WAL crafter")
|
||||
.about("Crafts Postgres databases with specific WAL properties")
|
||||
.subcommand(
|
||||
Command::new("print-postgres-config")
|
||||
.about("Print the configuration required for PostgreSQL server before running this script")
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("with-initdb")
|
||||
.about("Craft WAL in a new data directory first initialized with initdb")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.help("Data directory for the Postgres server")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.value_parser(value_parser!(u32))
|
||||
.required(true)
|
||||
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("in-existing")
|
||||
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("connection")
|
||||
.help("Connection string to the Postgres database to populate")
|
||||
.required(true)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -37,22 +37,22 @@ pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
impl Conf {
|
||||
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match self.pg_version {
|
||||
14 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
15 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
_ => bail!("Unsupported postgres version: {}", self.pg_version),
|
||||
14 => path.join(format!("v{}", self.pg_version)),
|
||||
15 => path.join(format!("v{}", self.pg_version)),
|
||||
_ => panic!("Unsupported postgres version: {}", self.pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_bin_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir()?.join("bin"))
|
||||
fn pg_bin_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir().join("bin")
|
||||
}
|
||||
|
||||
fn pg_lib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir()?.join("lib"))
|
||||
fn pg_lib_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir().join("lib")
|
||||
}
|
||||
|
||||
pub fn wal_dir(&self) -> PathBuf {
|
||||
@@ -60,12 +60,12 @@ impl Conf {
|
||||
}
|
||||
|
||||
fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
|
||||
let path = self.pg_bin_dir()?.join(command);
|
||||
let path = self.pg_bin_dir().join(command);
|
||||
ensure!(path.exists(), "Command {:?} does not exist", path);
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir())
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
toml_edit = { version = "0.14", features = ["easy"] }
|
||||
toml_edit = { version = "0.13", features = ["easy"] }
|
||||
tracing = "0.1.27"
|
||||
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -16,7 +16,7 @@ use tokio::{
|
||||
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
|
||||
};
|
||||
use tracing::*;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::crashsafe_dir::path_with_suffix_extension;
|
||||
|
||||
use crate::{Download, DownloadError, RemoteObjectId};
|
||||
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
[package]
|
||||
name = "safekeeper_api"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
const_format = "0.2.21"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
@@ -1,10 +0,0 @@
|
||||
use const_format::formatcp;
|
||||
|
||||
/// Public API types
|
||||
pub mod models;
|
||||
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
|
||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;
|
||||
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
||||
@@ -1,24 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
pub peer_ids: Option<Vec<NodeId>>,
|
||||
pub pg_version: u32,
|
||||
pub system_id: Option<u64>,
|
||||
pub wal_seg_size: Option<u32>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub commit_lsn: Lsn,
|
||||
// If not passed, it is assigned to the beginning of commit_lsn segment.
|
||||
pub local_start_lsn: Option<Lsn>,
|
||||
}
|
||||
@@ -19,8 +19,8 @@ thiserror = "1.0"
|
||||
tokio = { version = "1.17", features = ["macros"]}
|
||||
tokio-rustls = "0.23"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
||||
nix = "0.25"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
nix = "0.23.0"
|
||||
signal-hook = "0.3.10"
|
||||
rand = "0.8.3"
|
||||
jsonwebtoken = "8"
|
||||
@@ -28,10 +28,8 @@ hex = { version = "0.4.3", features = ["serde"] }
|
||||
rustls = "0.20.2"
|
||||
rustls-split = "0.3.0"
|
||||
git-version = "0.3.5"
|
||||
serde_with = "2.0"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.13.0"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
|
||||
|
||||
metrics = { path = "../metrics" }
|
||||
@@ -42,7 +40,7 @@ byteorder = "1.4.3"
|
||||
bytes = "1.0.1"
|
||||
hex-literal = "0.3"
|
||||
tempfile = "3.2"
|
||||
criterion = "0.4"
|
||||
criterion = "0.3"
|
||||
rustls-pemfile = "1"
|
||||
|
||||
[[bench]]
|
||||
|
||||
@@ -12,8 +12,16 @@ pub fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
let path = path.as_ref();
|
||||
|
||||
fs::create_dir(path)?;
|
||||
fsync_file_and_parent(path)?;
|
||||
Ok(())
|
||||
File::open(path)?.sync_all()?;
|
||||
|
||||
if let Some(parent) = path.parent() {
|
||||
File::open(parent)?.sync_all()
|
||||
} else {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"can't find parent",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Similar to [`std::fs::create_dir_all`], except we fsync all
|
||||
@@ -57,12 +65,12 @@ pub fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
|
||||
// Fsync the created directories from child to parent.
|
||||
for &path in dirs_to_create.iter() {
|
||||
fsync(path)?;
|
||||
File::open(path)?.sync_all()?;
|
||||
}
|
||||
|
||||
// If we created any new directories, fsync the parent.
|
||||
if !dirs_to_create.is_empty() {
|
||||
fsync(path)?;
|
||||
File::open(path)?.sync_all()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -84,33 +92,6 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
|
||||
.with_extension(new_extension.as_ref())
|
||||
}
|
||||
|
||||
pub fn fsync_file_and_parent(file_path: &Path) -> io::Result<()> {
|
||||
let parent = file_path.parent().ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("File {file_path:?} has no parent"),
|
||||
)
|
||||
})?;
|
||||
|
||||
fsync(file_path)?;
|
||||
fsync(parent)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fsync(path: &Path) -> io::Result<()> {
|
||||
File::open(path)
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
|
||||
.and_then(|file| {
|
||||
file.sync_all().map_err(|e| {
|
||||
io::Error::new(
|
||||
e.kind(),
|
||||
format!("Failed to sync file {path:?} data and metadata: {e}"),
|
||||
)
|
||||
})
|
||||
})
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::tempdir;
|
||||
@@ -9,7 +9,6 @@ use once_cell::sync::Lazy;
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RequestInfo;
|
||||
use routerify::{Middleware, Router, RouterBuilder, RouterService};
|
||||
use tokio::task::JoinError;
|
||||
use tracing::info;
|
||||
|
||||
use std::future::Future;
|
||||
@@ -36,13 +35,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
|
||||
let mut buffer = vec![];
|
||||
let encoder = TextEncoder::new();
|
||||
|
||||
let metrics = tokio::task::spawn_blocking(move || {
|
||||
// Currently we take a lot of mutexes while collecting metrics, so it's
|
||||
// better to spawn a blocking task to avoid blocking the event loop.
|
||||
metrics::gather()
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
let metrics = metrics::gather();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
|
||||
let response = Response::builder()
|
||||
|
||||
@@ -75,12 +75,6 @@ impl From<[u8; 16]> for Id {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Id> for u128 {
|
||||
fn from(id: Id) -> Self {
|
||||
u128::from_le_bytes(id.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Id {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.hex_encode())
|
||||
@@ -142,12 +136,6 @@ macro_rules! id_newtype {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<$t> for u128 {
|
||||
fn from(id: $t) -> Self {
|
||||
u128::from(id.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for $t {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
|
||||
@@ -22,8 +22,8 @@ pub mod pq_proto;
|
||||
// dealing with connstring parsing and handy access to it's parts
|
||||
pub mod connstring;
|
||||
|
||||
// helper functions for creating and fsyncing
|
||||
pub mod crashsafe;
|
||||
// helper functions for creating and fsyncing directories/trees
|
||||
pub mod crashsafe_dir;
|
||||
|
||||
// common authentication routines
|
||||
pub mod auth;
|
||||
|
||||
@@ -1,35 +1,11 @@
|
||||
use std::{
|
||||
fs::{File, OpenOptions},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use strum_macros::{EnumString, EnumVariantNames};
|
||||
|
||||
#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum LogFormat {
|
||||
Plain,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogFormat {
|
||||
pub fn from_config(s: &str) -> anyhow::Result<LogFormat> {
|
||||
use strum::VariantNames;
|
||||
LogFormat::from_str(s).with_context(|| {
|
||||
format!(
|
||||
"Unrecognized log format. Please specify one of: {:?}",
|
||||
LogFormat::VARIANTS
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
pub fn init(
|
||||
log_filename: impl AsRef<Path>,
|
||||
daemonize: bool,
|
||||
log_format: LogFormat,
|
||||
) -> Result<File> {
|
||||
pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
|
||||
// Don't open the same file for output multiple times;
|
||||
// the different fds could overwrite each other's output.
|
||||
let log_file = OpenOptions::new()
|
||||
@@ -45,50 +21,22 @@ pub fn init(
|
||||
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));
|
||||
|
||||
let x: File = log_file.try_clone().unwrap();
|
||||
let base_logger = tracing_subscriber::fmt()
|
||||
.with_env_filter(env_filter)
|
||||
.with_target(false)
|
||||
.with_ansi(false)
|
||||
.with_writer(move || -> Box<dyn std::io::Write> {
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
if daemonize {
|
||||
Box::new(x.try_clone().unwrap())
|
||||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
}
|
||||
});
|
||||
.with_target(false) // don't include event targets
|
||||
.with_ansi(false); // don't use colors in log file;
|
||||
|
||||
match log_format {
|
||||
LogFormat::Json => base_logger.json().init(),
|
||||
LogFormat::Plain => base_logger.init(),
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
if daemonize {
|
||||
let x = log_file.try_clone().unwrap();
|
||||
base_logger
|
||||
.with_writer(move || x.try_clone().unwrap())
|
||||
.init();
|
||||
} else {
|
||||
base_logger.init();
|
||||
}
|
||||
|
||||
Ok(log_file)
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// Due to global logger, can't run tests in same process.
|
||||
// So until there's a non-global one, the tests are in ../tests/ as separate files.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! test_init_file_logger {
|
||||
($log_level:expr, $log_format:expr) => {{
|
||||
use std::str::FromStr;
|
||||
std::env::set_var("RUST_LOG", $log_level);
|
||||
|
||||
let tmp_dir = tempfile::TempDir::new().unwrap();
|
||||
let log_file_path = tmp_dir.path().join("logfile");
|
||||
|
||||
let log_format = $crate::logging::LogFormat::from_str($log_format).unwrap();
|
||||
let _log_file = $crate::logging::init(&log_file_path, true, log_format).unwrap();
|
||||
|
||||
let log_file = std::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&log_file_path)
|
||||
.unwrap();
|
||||
|
||||
log_file
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -66,11 +66,6 @@ impl Lsn {
|
||||
(self.0 % seg_sz as u64) as usize
|
||||
}
|
||||
|
||||
/// Compute LSN of the segment start.
|
||||
pub fn segment_lsn(self, seg_sz: usize) -> Lsn {
|
||||
Lsn(self.0 - (self.0 % seg_sz as u64))
|
||||
}
|
||||
|
||||
/// Compute the segment number
|
||||
pub fn segment_number(self, seg_sz: usize) -> u64 {
|
||||
self.0 / seg_sz as u64
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::sync::Arc;
|
||||
use std::task::Poll;
|
||||
use tracing::{debug, error, trace};
|
||||
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -66,8 +66,8 @@ pub enum ProcessMsgResult {
|
||||
/// Always-writeable sock_split stream.
|
||||
/// May not be readable. See [`PostgresBackend::take_stream_in`]
|
||||
pub enum Stream {
|
||||
Unencrypted(BufReader<tokio::net::TcpStream>),
|
||||
Tls(Box<tokio_rustls::server::TlsStream<BufReader<tokio::net::TcpStream>>>),
|
||||
Unencrypted(tokio::net::TcpStream),
|
||||
Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
|
||||
Broken,
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ impl PostgresBackend {
|
||||
let peer_addr = socket.peer_addr()?;
|
||||
|
||||
Ok(Self {
|
||||
stream: Stream::Unencrypted(BufReader::new(socket)),
|
||||
stream: Stream::Unencrypted(socket),
|
||||
buf_out: BytesMut::with_capacity(10 * 1024),
|
||||
state: ProtoState::Initialization,
|
||||
md5_salt: [0u8; 4],
|
||||
|
||||
@@ -10,7 +10,6 @@ use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
fmt,
|
||||
future::Future,
|
||||
io::{self, Cursor},
|
||||
str,
|
||||
@@ -125,19 +124,6 @@ pub struct CancelKeyData {
|
||||
pub cancel_key: i32,
|
||||
}
|
||||
|
||||
impl fmt::Display for CancelKeyData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let hi = (self.backend_pid as u64) << 32;
|
||||
let lo = self.cancel_key as u64;
|
||||
let id = hi | lo;
|
||||
|
||||
// This format is more compact and might work better for logs.
|
||||
f.debug_tuple("CancelKeyData")
|
||||
.field(&format_args!("{:x}", id))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
use rand::distributions::{Distribution, Standard};
|
||||
impl Distribution<CancelKeyData> for Standard {
|
||||
fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "info");
|
||||
|
||||
let log_file = test_init_file_logger!("info", "json");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
let json_object = serde_json::from_str::<serde_json::Value>(&content).unwrap();
|
||||
|
||||
assert_eq!(json_object["fields"]["custom"], "hi");
|
||||
assert_eq!(json_object["fields"]["message"], "test log message");
|
||||
|
||||
assert_ne!(json_object["level"], "TRACE");
|
||||
assert_ne!(json_object["level"], "DEBUG");
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plain_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "warn");
|
||||
|
||||
let log_file = test_init_file_logger!("warn", "plain");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
serde_json::from_str::<serde_json::Value>(&content).unwrap_err();
|
||||
assert!(content.contains("custom=hi"));
|
||||
assert!(content.contains("test log message"));
|
||||
|
||||
assert!(!content.contains("TRACE"));
|
||||
assert!(!content.contains("DEBUG"));
|
||||
assert!(!content.contains("INFO"));
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ futures = "0.3.13"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
itertools = "0.10.3"
|
||||
clap = { version = "4.0", features = ["string"] }
|
||||
clap = "3.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
|
||||
@@ -38,27 +38,26 @@ tar = "0.4.33"
|
||||
humantime = "2.1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "2.0"
|
||||
serde_with = "1.12.0"
|
||||
humantime-serde = "1.1.1"
|
||||
|
||||
pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }
|
||||
|
||||
toml_edit = { version = "0.14", features = ["easy"] }
|
||||
toml_edit = { version = "0.13", features = ["easy"] }
|
||||
scopeguard = "1.1.0"
|
||||
const_format = "0.2.21"
|
||||
tracing = "0.1.36"
|
||||
signal-hook = "0.3.10"
|
||||
url = "2"
|
||||
nix = "0.25"
|
||||
nix = "0.23"
|
||||
once_cell = "1.13.0"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = "0.5.0"
|
||||
git-version = "0.3.5"
|
||||
rstar = "0.9.3"
|
||||
num-traits = "0.2.15"
|
||||
amplify_num = { git = "https://github.com/hlinnaka/rust-amplify.git", branch = "unsigned-int-perf" }
|
||||
amplify_num = "0.4.1"
|
||||
|
||||
pageserver_api = { path = "../libs/pageserver_api" }
|
||||
postgres_ffi = { path = "../libs/postgres_ffi" }
|
||||
etcd_broker = { path = "../libs/etcd_broker" }
|
||||
metrics = { path = "../libs/metrics" }
|
||||
@@ -67,13 +66,7 @@ remote_storage = { path = "../libs/remote_storage" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
close_fds = "0.3.2"
|
||||
walkdir = "2.3.2"
|
||||
svg_fmt = "0.4.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
hex-literal = "0.3"
|
||||
tempfile = "3.2"
|
||||
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
harness = false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,8 +22,8 @@ use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
//! A tool for visualizing the arrangement of layerfiles within a timeline.
|
||||
//!
|
||||
//! It reads filenames from stdin and prints a svg on stdout. The image is a plot in
|
||||
//! page-lsn space, where every delta layer is a rectangle and every image layer is a
|
||||
//! thick line. Legend:
|
||||
//! - The x axis (left to right) represents page index.
|
||||
//! - The y axis represents LSN, growing upwards.
|
||||
//!
|
||||
//! Coordinates in both axis are compressed for better readability.
|
||||
//! (see https://medium.com/algorithms-digest/coordinate-compression-2fff95326fb)
|
||||
//!
|
||||
//! Example use:
|
||||
//! ```
|
||||
//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
|
||||
//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ firefox out.svg
|
||||
//! ```
|
||||
//!
|
||||
//! This API was chosen so that we can easily work with filenames extracted from ssh,
|
||||
//! or from pageserver log files.
|
||||
//!
|
||||
//! TODO Consider shipping this as a grafana panel plugin:
|
||||
//! https://grafana.com/tutorials/build-a-panel-plugin/
|
||||
use anyhow::Result;
|
||||
use pageserver::repository::Key;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
ops::Range,
|
||||
};
|
||||
use svg_fmt::{rectangle, rgb, BeginSvg, EndSvg, Fill, Stroke};
|
||||
use utils::{lsn::Lsn, project_git_version};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
// Map values to their compressed coordinate - the index the value
|
||||
// would have in a sorted and deduplicated list of all values.
|
||||
fn build_coordinate_compression_map<T: Ord + Copy>(coords: Vec<T>) -> BTreeMap<T, usize> {
|
||||
let set: BTreeSet<T> = coords.into_iter().collect();
|
||||
|
||||
let mut map: BTreeMap<T, usize> = BTreeMap::new();
|
||||
for (i, e) in set.iter().enumerate() {
|
||||
map.insert(*e, i);
|
||||
}
|
||||
|
||||
map
|
||||
}
|
||||
|
||||
fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let mut lsns: Vec<&str> = split[1].split('-').collect();
|
||||
if lsns.len() == 1 {
|
||||
lsns.push(lsns[0]);
|
||||
}
|
||||
|
||||
let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
|
||||
let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
|
||||
(keys, lsns)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// Parse layer filenames from stdin
|
||||
let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
|
||||
let stdin = io::stdin();
|
||||
for line in stdin.lock().lines() {
|
||||
let range = parse_filename(&line.unwrap());
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
// Collect all coordinates
|
||||
let mut keys: Vec<Key> = vec![];
|
||||
let mut lsns: Vec<Lsn> = vec![];
|
||||
for (keyr, lsnr) in &ranges {
|
||||
keys.push(keyr.start);
|
||||
keys.push(keyr.end);
|
||||
lsns.push(lsnr.start);
|
||||
lsns.push(lsnr.end);
|
||||
}
|
||||
|
||||
// Analyze
|
||||
let key_map = build_coordinate_compression_map(keys);
|
||||
let lsn_map = build_coordinate_compression_map(lsns);
|
||||
|
||||
// Initialize stats
|
||||
let mut num_deltas = 0;
|
||||
let mut num_images = 0;
|
||||
|
||||
// Draw
|
||||
let stretch = 3.0; // Stretch out vertically for better visibility
|
||||
println!(
|
||||
"{}",
|
||||
BeginSvg {
|
||||
w: key_map.len() as f32,
|
||||
h: stretch * lsn_map.len() as f32
|
||||
}
|
||||
);
|
||||
for (keyr, lsnr) in &ranges {
|
||||
let key_start = *key_map.get(&keyr.start).unwrap();
|
||||
let key_end = *key_map.get(&keyr.end).unwrap();
|
||||
let key_diff = key_end - key_start;
|
||||
let lsn_max = lsn_map.len();
|
||||
|
||||
if key_start >= key_end {
|
||||
panic!("Invalid key range {}-{}", key_start, key_end);
|
||||
}
|
||||
|
||||
let lsn_start = *lsn_map.get(&lsnr.start).unwrap();
|
||||
let lsn_end = *lsn_map.get(&lsnr.end).unwrap();
|
||||
|
||||
let mut lsn_diff = (lsn_end - lsn_start) as f32;
|
||||
let mut fill = Fill::None;
|
||||
let mut margin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas
|
||||
let mut lsn_offset = 0.0;
|
||||
|
||||
// Fill in and thicken rectangle if it's an
|
||||
// image layer so that we can see it.
|
||||
match lsn_start.cmp(&lsn_end) {
|
||||
Ordering::Less => num_deltas += 1,
|
||||
Ordering::Equal => {
|
||||
num_images += 1;
|
||||
lsn_diff = 0.3;
|
||||
lsn_offset = -lsn_diff / 2.0;
|
||||
margin = 0.05;
|
||||
fill = Fill::Color(rgb(0, 0, 0));
|
||||
}
|
||||
Ordering::Greater => panic!("Invalid lsn range {}-{}", lsn_start, lsn_end),
|
||||
}
|
||||
|
||||
println!(
|
||||
" {}",
|
||||
rectangle(
|
||||
key_start as f32 + stretch * margin,
|
||||
stretch * (lsn_max as f32 - (lsn_end as f32 - margin - lsn_offset)),
|
||||
key_diff as f32 - stretch * 2.0 * margin,
|
||||
stretch * (lsn_diff - 2.0 * margin)
|
||||
)
|
||||
.fill(fill)
|
||||
.stroke(Stroke::Color(rgb(0, 0, 0), 0.1))
|
||||
.border_radius(0.4)
|
||||
);
|
||||
}
|
||||
println!("{}", EndSvg);
|
||||
|
||||
eprintln!("num_images: {}", num_images);
|
||||
eprintln!("num_deltas: {}", num_deltas);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
35
pageserver/src/bin/dump_layerfile.rs
Normal file
35
pageserver/src/bin/dump_layerfile.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
//! Main entry point for the dump_layerfile executable
|
||||
//!
|
||||
//! A handy tool for debugging, that's all.
|
||||
use anyhow::Result;
|
||||
use clap::{App, Arg};
|
||||
use pageserver::page_cache;
|
||||
use pageserver::tenant::dump_layerfile_from_path;
|
||||
use pageserver::virtual_file;
|
||||
use std::path::PathBuf;
|
||||
use utils::project_git_version;
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Neon dump_layerfile utility")
|
||||
.about("Dump contents of one layer file, for debugging")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.help("Path to file to dump")
|
||||
.required(true)
|
||||
.index(1),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let path = PathBuf::from(arg_matches.value_of("path").unwrap());
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(10);
|
||||
page_cache::init(100);
|
||||
|
||||
dump_layerfile_from_path(&path, true)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -6,15 +6,13 @@ use tracing::*;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use clap::{App, Arg};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use fail::FailScenario;
|
||||
use metrics::set_build_info_metric;
|
||||
|
||||
use pageserver::{
|
||||
config::{defaults::*, PageServerConf},
|
||||
http, page_cache, page_image_cache, page_service, profiling, task_mgr,
|
||||
http, page_cache, page_service, profiling, task_mgr,
|
||||
task_mgr::TaskKind,
|
||||
task_mgr::{
|
||||
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
|
||||
@@ -33,35 +31,72 @@ use utils::{
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const FEATURES: &[&str] = &[
|
||||
#[cfg(feature = "testing")]
|
||||
"testing",
|
||||
#[cfg(feature = "fail/failpoints")]
|
||||
"fail/failpoints",
|
||||
#[cfg(feature = "profiling")]
|
||||
"profiling",
|
||||
];
|
||||
|
||||
fn version() -> String {
|
||||
format!(
|
||||
"{GIT_VERSION} failpoints: {}, features: {:?}",
|
||||
fail::has_failpoints(),
|
||||
FEATURES,
|
||||
"{GIT_VERSION} profiling:{} failpoints:{}",
|
||||
cfg!(feature = "profiling"),
|
||||
fail::has_failpoints()
|
||||
)
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = cli().get_matches();
|
||||
let arg_matches = App::new("Neon page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(&*version())
|
||||
.arg(
|
||||
|
||||
if arg_matches.get_flag("enabled-features") {
|
||||
println!("{{\"features\": {FEATURES:?} }}");
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize pageserver with all given config overrides"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
.short('D')
|
||||
.long("workdir")
|
||||
.takes_value(true)
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
.short('c')
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple_occurrences(true)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(Arg::new("update-config").long("update-config").takes_value(false).help(
|
||||
"Update the config file when started",
|
||||
))
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
.takes_value(false)
|
||||
.help("Show enabled compile time features"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if arg_matches.is_present("enabled-features") {
|
||||
let features: &[&str] = &[
|
||||
#[cfg(feature = "testing")]
|
||||
"testing",
|
||||
#[cfg(feature = "profiling")]
|
||||
"profiling",
|
||||
];
|
||||
println!("{{\"features\": {features:?} }}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let workdir = arg_matches
|
||||
.get_one::<String>("workdir")
|
||||
.map(Path::new)
|
||||
.unwrap_or_else(|| Path::new(".neon"));
|
||||
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
|
||||
let workdir = workdir
|
||||
.canonicalize()
|
||||
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
|
||||
@@ -75,7 +110,7 @@ fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
})?;
|
||||
|
||||
let daemonize = arg_matches.get_flag("daemonize");
|
||||
let daemonize = arg_matches.is_present("daemonize");
|
||||
|
||||
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
|
||||
ControlFlow::Continue(conf) => conf,
|
||||
@@ -87,7 +122,7 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
let tenants_path = conf.tenants_path();
|
||||
if !tenants_path.exists() {
|
||||
utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
format!(
|
||||
"Failed to create tenants root dir at '{}'",
|
||||
tenants_path.display()
|
||||
@@ -101,7 +136,6 @@ fn main() -> anyhow::Result<()> {
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
page_image_cache::init(64 * conf.page_cache_size); // temporary hack for benchmarking
|
||||
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
|
||||
|
||||
@@ -114,8 +148,8 @@ fn initialize_config(
|
||||
arg_matches: clap::ArgMatches,
|
||||
workdir: &Path,
|
||||
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
|
||||
let init = arg_matches.get_flag("init");
|
||||
let update_config = init || arg_matches.get_flag("update-config");
|
||||
let init = arg_matches.is_present("init");
|
||||
let update_config = init || arg_matches.is_present("update-config");
|
||||
|
||||
let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
|
||||
if init {
|
||||
@@ -157,10 +191,13 @@ fn initialize_config(
|
||||
)
|
||||
};
|
||||
|
||||
if let Some(values) = arg_matches.get_many::<String>("config-override") {
|
||||
if let Some(values) = arg_matches.values_of("config-override") {
|
||||
for option_line in values {
|
||||
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
|
||||
format!("Option '{option_line}' could not be parsed as a toml document")
|
||||
format!(
|
||||
"Option '{}' could not be parsed as a toml document",
|
||||
option_line
|
||||
)
|
||||
})?;
|
||||
|
||||
for (key, item) in doc.iter() {
|
||||
@@ -200,9 +237,9 @@ fn initialize_config(
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
|
||||
// Initialize logger
|
||||
let log_file = logging::init(LOG_FILE_NAME, daemonize, conf.log_format)?;
|
||||
let log_file = logging::init(LOG_FILE_NAME, daemonize)?;
|
||||
|
||||
info!("version: {}", version());
|
||||
info!("version: {GIT_VERSION}");
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
@@ -319,8 +356,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
},
|
||||
);
|
||||
|
||||
set_build_info_metric(GIT_VERSION);
|
||||
|
||||
// All started up! Now just sit and wait for shutdown signal.
|
||||
signals.handle(|signal| match signal {
|
||||
Signal::Quit => {
|
||||
@@ -343,55 +378,3 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("Neon page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(version())
|
||||
.arg(
|
||||
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Initialize pageserver with all given config overrides"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
.short('D')
|
||||
.long("workdir")
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
.short('c')
|
||||
.num_args(1)
|
||||
.action(ArgAction::Append)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("update-config")
|
||||
.long("update-config")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Update the config file when started"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Show enabled compile time features"),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
//! A helper tool to manage pageserver binary files.
|
||||
//! Accepts a file as an argument, attempts to parse it with all ways possible
|
||||
//! and prints its interpreted context.
|
||||
//!
|
||||
//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{value_parser, Arg, Command};
|
||||
|
||||
use pageserver::{
|
||||
page_cache,
|
||||
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
|
||||
virtual_file,
|
||||
};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use utils::{lsn::Lsn, project_git_version};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const METADATA_SUBCOMMAND: &str = "metadata";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = cli().get_matches();
|
||||
|
||||
match arg_matches.subcommand() {
|
||||
Some((subcommand_name, subcommand_matches)) => {
|
||||
let path = subcommand_matches
|
||||
.get_one::<PathBuf>("metadata_path")
|
||||
.context("'metadata_path' argument is missing")?
|
||||
.to_path_buf();
|
||||
anyhow::ensure!(
|
||||
subcommand_name == METADATA_SUBCOMMAND,
|
||||
"Unknown subcommand {subcommand_name}"
|
||||
);
|
||||
handle_metadata(&path, subcommand_matches)?;
|
||||
}
|
||||
None => {
|
||||
let path = arg_matches
|
||||
.get_one::<PathBuf>("path")
|
||||
.context("'path' argument is missing")?
|
||||
.to_path_buf();
|
||||
println!(
|
||||
"No subcommand specified, attempting to guess the format for file {}",
|
||||
path.display()
|
||||
);
|
||||
if let Err(e) = read_pg_control_file(&path) {
|
||||
println!(
|
||||
"Failed to read input file as a pg control one: {e:#}\n\
|
||||
Attempting to read it as layer file"
|
||||
);
|
||||
print_layerfile(&path)?;
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
|
||||
let control_file = ControlFileData::decode(&std::fs::read(&control_file_path)?)?;
|
||||
println!("{control_file:?}");
|
||||
let control_file_initdb = Lsn(control_file.checkPoint);
|
||||
println!(
|
||||
"pg_initdb_lsn: {}, aligned: {}",
|
||||
control_file_initdb,
|
||||
control_file_initdb.align()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_layerfile(path: &Path) -> anyhow::Result<()> {
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(10);
|
||||
page_cache::init(100);
|
||||
dump_layerfile_from_path(path, true)
|
||||
}
|
||||
|
||||
fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), anyhow::Error> {
|
||||
let metadata_bytes = std::fs::read(&path)?;
|
||||
let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
|
||||
println!("Current metadata:\n{meta:?}");
|
||||
let mut update_meta = false;
|
||||
if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
Lsn::from_str(disk_consistent_lsn)?,
|
||||
meta.prev_record_lsn(),
|
||||
meta.ancestor_timeline(),
|
||||
meta.ancestor_lsn(),
|
||||
meta.latest_gc_cutoff_lsn(),
|
||||
meta.initdb_lsn(),
|
||||
meta.pg_version(),
|
||||
);
|
||||
update_meta = true;
|
||||
}
|
||||
if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
meta.disk_consistent_lsn(),
|
||||
Some(Lsn::from_str(prev_record_lsn)?),
|
||||
meta.ancestor_timeline(),
|
||||
meta.ancestor_lsn(),
|
||||
meta.latest_gc_cutoff_lsn(),
|
||||
meta.initdb_lsn(),
|
||||
meta.pg_version(),
|
||||
);
|
||||
update_meta = true;
|
||||
}
|
||||
|
||||
if update_meta {
|
||||
let metadata_bytes = meta.to_bytes()?;
|
||||
std::fs::write(&path, &metadata_bytes)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("Neon Pageserver binutils")
|
||||
.about("Reads pageserver (and related) binary files management utility")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.help("Input file path")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(false),
|
||||
)
|
||||
.subcommand(
|
||||
Command::new(METADATA_SUBCOMMAND)
|
||||
.about("Read and update pageserver metadata file")
|
||||
.arg(
|
||||
Arg::new("metadata_path")
|
||||
.help("Input metadata file path")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("disk_consistent_lsn")
|
||||
.long("disk_consistent_lsn")
|
||||
.help("Replace disk consistent Lsn"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("prev_record_lsn")
|
||||
.long("prev_record_lsn")
|
||||
.help("Replace previous record Lsn"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
75
pageserver/src/bin/update_metadata.rs
Normal file
75
pageserver/src/bin/update_metadata.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
//! Main entry point for the edit_metadata executable
|
||||
//!
|
||||
//! A handy tool for debugging, that's all.
|
||||
use anyhow::Result;
|
||||
use clap::{App, Arg};
|
||||
use pageserver::tenant::metadata::TimelineMetadata;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use utils::{lsn::Lsn, project_git_version};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Neon update metadata utility")
|
||||
.about("Dump or update metadata file")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.help("Path to metadata file")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("disk_lsn")
|
||||
.short('d')
|
||||
.long("disk_lsn")
|
||||
.takes_value(true)
|
||||
.help("Replace disk constistent lsn"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("prev_lsn")
|
||||
.short('p')
|
||||
.long("prev_lsn")
|
||||
.takes_value(true)
|
||||
.help("Previous record LSN"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let path = PathBuf::from(arg_matches.value_of("path").unwrap());
|
||||
let metadata_bytes = std::fs::read(&path)?;
|
||||
let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
|
||||
println!("Current metadata:\n{:?}", &meta);
|
||||
|
||||
let mut update_meta = false;
|
||||
|
||||
if let Some(disk_lsn) = arg_matches.value_of("disk_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
Lsn::from_str(disk_lsn)?,
|
||||
meta.prev_record_lsn(),
|
||||
meta.ancestor_timeline(),
|
||||
meta.ancestor_lsn(),
|
||||
meta.latest_gc_cutoff_lsn(),
|
||||
meta.initdb_lsn(),
|
||||
meta.pg_version(),
|
||||
);
|
||||
update_meta = true;
|
||||
}
|
||||
|
||||
if let Some(prev_lsn) = arg_matches.value_of("prev_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
meta.disk_consistent_lsn(),
|
||||
Some(Lsn::from_str(prev_lsn)?),
|
||||
meta.ancestor_timeline(),
|
||||
meta.ancestor_lsn(),
|
||||
meta.latest_gc_cutoff_lsn(),
|
||||
meta.initdb_lsn(),
|
||||
meta.pg_version(),
|
||||
);
|
||||
update_meta = true;
|
||||
}
|
||||
if update_meta {
|
||||
let metadata_bytes = meta.to_bytes()?;
|
||||
std::fs::write(&path, &metadata_bytes)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -7,7 +7,6 @@
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::env;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
@@ -17,7 +16,6 @@ use toml_edit::{Document, Item};
|
||||
use url::Url;
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
logging::LogFormat,
|
||||
postgres_backend::AuthType,
|
||||
};
|
||||
|
||||
@@ -26,17 +24,16 @@ use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||
|
||||
/// The name of the metadata file pageserver creates per timeline.
|
||||
pub const METADATA_FILE_NAME: &str = "metadata";
|
||||
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
|
||||
const TENANT_CONFIG_NAME: &str = "config";
|
||||
|
||||
pub mod defaults {
|
||||
use crate::tenant_config::defaults::*;
|
||||
use const_format::formatcp;
|
||||
|
||||
pub use pageserver_api::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
|
||||
DEFAULT_PG_LISTEN_PORT,
|
||||
};
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
||||
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
||||
|
||||
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
|
||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||
@@ -46,8 +43,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
|
||||
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
|
||||
|
||||
pub const DEFAULT_LOG_FORMAT: &str = "plain";
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -66,7 +61,6 @@ pub mod defaults {
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
#initial_superuser_name = '{DEFAULT_SUPERUSER}'
|
||||
|
||||
#log_format = '{DEFAULT_LOG_FORMAT}'
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
@@ -130,8 +124,6 @@ pub struct PageServerConf {
|
||||
|
||||
/// Etcd broker endpoints to connect to.
|
||||
pub broker_endpoints: Vec<Url>,
|
||||
|
||||
pub log_format: LogFormat,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -198,8 +190,6 @@ struct PageServerConfigBuilder {
|
||||
profiling: BuilderValue<ProfilingConfig>,
|
||||
broker_etcd_prefix: BuilderValue<String>,
|
||||
broker_endpoints: BuilderValue<Vec<Url>>,
|
||||
|
||||
log_format: BuilderValue<LogFormat>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
@@ -227,7 +217,6 @@ impl Default for PageServerConfigBuilder {
|
||||
profiling: Set(ProfilingConfig::Disabled),
|
||||
broker_etcd_prefix: Set(etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string()),
|
||||
broker_endpoints: Set(Vec::new()),
|
||||
log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -300,10 +289,6 @@ impl PageServerConfigBuilder {
|
||||
self.profiling = BuilderValue::Set(profiling)
|
||||
}
|
||||
|
||||
pub fn log_format(&mut self, log_format: LogFormat) {
|
||||
self.log_format = BuilderValue::Set(log_format)
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
let broker_endpoints = self
|
||||
.broker_endpoints
|
||||
@@ -348,7 +333,6 @@ impl PageServerConfigBuilder {
|
||||
broker_etcd_prefix: self
|
||||
.broker_etcd_prefix
|
||||
.ok_or(anyhow!("missing broker_etcd_prefix"))?,
|
||||
log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -380,17 +364,6 @@ impl PageServerConf {
|
||||
self.timelines_path(tenant_id).join(timeline_id.to_string())
|
||||
}
|
||||
|
||||
pub fn timeline_uninit_mark_file_path(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> PathBuf {
|
||||
path_with_suffix_extension(
|
||||
self.timeline_path(&timeline_id, &tenant_id),
|
||||
TIMELINE_UNINIT_MARK_SUFFIX,
|
||||
)
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain timeline's metadata file should be located.
|
||||
pub fn metadata_path(&self, timeline_id: TimelineId, tenant_id: TenantId) -> PathBuf {
|
||||
@@ -401,28 +374,28 @@ impl PageServerConf {
|
||||
//
|
||||
// Postgres distribution paths
|
||||
//
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => path.join(format!("v{pg_version}")),
|
||||
15 => path.join(format!("v{pg_version}")),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -473,9 +446,6 @@ impl PageServerConf {
|
||||
})
|
||||
.collect::<anyhow::Result<_>>()?,
|
||||
),
|
||||
"log_format" => builder.log_format(
|
||||
LogFormat::from_config(&parse_toml_string(key, item)?)?
|
||||
),
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
@@ -588,7 +558,6 @@ impl PageServerConf {
|
||||
default_tenant_conf: TenantConf::dummy_conf(),
|
||||
broker_endpoints: Vec::new(),
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -683,8 +652,6 @@ max_file_descriptors = 333
|
||||
initial_superuser_name = 'zzzz'
|
||||
id = 10
|
||||
|
||||
log_format = 'json'
|
||||
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
@@ -724,7 +691,6 @@ log_format = 'json'
|
||||
.parse()
|
||||
.expect("Failed to parse a valid broker endpoint URL")],
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -769,7 +735,6 @@ log_format = 'json'
|
||||
.parse()
|
||||
.expect("Failed to parse a valid broker endpoint URL")],
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::Json,
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
pub mod models;
|
||||
pub mod routes;
|
||||
pub use routes::make_router;
|
||||
|
||||
pub use pageserver_api::models;
|
||||
|
||||
182
pageserver/src/http/models.rs
Normal file
182
pageserver/src/http/models.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::tenant::TenantState;
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_start_lsn: Option<Lsn>,
|
||||
pub pg_version: Option<u32>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<TenantId>,
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
pub walreceiver_connect_timeout: Option<String>,
|
||||
pub lagging_wal_timeout: Option<String>,
|
||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub TenantId);
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct StatusResponse {
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
impl TenantCreateRequest {
|
||||
pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
|
||||
TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TenantConfigRequest {
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
pub walreceiver_connect_timeout: Option<String>,
|
||||
pub lagging_wal_timeout: Option<String>,
|
||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||
}
|
||||
|
||||
impl TenantConfigRequest {
|
||||
pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
|
||||
TenantConfigRequest {
|
||||
tenant_id,
|
||||
checkpoint_distance: None,
|
||||
checkpoint_timeout: None,
|
||||
compaction_target_size: None,
|
||||
compaction_period: None,
|
||||
compaction_threshold: None,
|
||||
gc_horizon: None,
|
||||
gc_period: None,
|
||||
image_creation_threshold: None,
|
||||
pitr_interval: None,
|
||||
walreceiver_connect_timeout: None,
|
||||
lagging_wal_timeout: None,
|
||||
max_lsn_wal_lag: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
pub state: TenantState,
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub has_in_progress_downloads: Option<bool>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct LocalTimelineInfo {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub last_record_lsn: Lsn,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub latest_gc_cutoff_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_logical_size_non_incremental: Option<u64>,
|
||||
pub current_physical_size_non_incremental: Option<u64>,
|
||||
|
||||
pub wal_source_connstr: Option<String>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub last_received_msg_lsn: Option<Lsn>,
|
||||
/// the timestamp (in microseconds) of the last received message
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
pub pg_version: u32,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RemoteTimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
pub awaits_download: bool,
|
||||
}
|
||||
|
||||
///
|
||||
/// This represents the output of the "timeline_detail" API call.
|
||||
///
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: TimelineId,
|
||||
pub local: Option<LocalTimelineInfo>,
|
||||
pub remote: Option<RemoteTimelineInfo>,
|
||||
}
|
||||
|
||||
pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
|
||||
|
||||
/// Information for configuring a single fail point
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FailpointConfig {
|
||||
/// Name of the fail point
|
||||
pub name: String,
|
||||
/// List of actions to take, using the format described in `fail::cfg`
|
||||
///
|
||||
/// We also support `actions = "exit"` to cause the fail point to immediately exit.
|
||||
pub actions: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct TimelineGcRequest {
|
||||
pub gc_horizon: Option<u64>,
|
||||
}
|
||||
@@ -1,11 +1,7 @@
|
||||
openapi: "3.0.2"
|
||||
info:
|
||||
title: Page Server API
|
||||
description: Neon Pageserver API
|
||||
version: "1.0"
|
||||
license:
|
||||
name: "Apache"
|
||||
url: https://github.com/neondatabase/neon/blob/main/LICENSE
|
||||
servers:
|
||||
- url: ""
|
||||
paths:
|
||||
@@ -211,61 +207,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: Get LSN by a timestamp
|
||||
parameters:
|
||||
- name: timestamp
|
||||
in: query
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: date-time
|
||||
description: A timestamp to get the LSN
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: string
|
||||
"400":
|
||||
description: Error when no tenant id found in path, no timeline id or invalid timestamp
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
/v1/tenant/{tenant_id}/attach:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
@@ -615,10 +556,6 @@ components:
|
||||
required:
|
||||
- timeline_id
|
||||
- tenant_id
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
- awaits_download
|
||||
- state
|
||||
properties:
|
||||
timeline_id:
|
||||
type: string
|
||||
@@ -626,15 +563,33 @@ components:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
local:
|
||||
$ref: "#/components/schemas/LocalTimelineInfo"
|
||||
remote:
|
||||
$ref: "#/components/schemas/RemoteTimelineInfo"
|
||||
RemoteTimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
- awaits_download
|
||||
- remote_consistent_lsn
|
||||
properties:
|
||||
awaits_download:
|
||||
type: boolean
|
||||
remote_consistent_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
LocalTimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
properties:
|
||||
last_record_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
disk_consistent_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
remote_consistent_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
@@ -659,41 +614,7 @@ components:
|
||||
format: hex
|
||||
last_received_msg_ts:
|
||||
type: integer
|
||||
awaits_download:
|
||||
type: boolean
|
||||
state:
|
||||
type: string
|
||||
|
||||
# These 'local' and 'remote' fields just duplicate some of the fields
|
||||
# above. They are kept for backwards-compatibility. They can be removed,
|
||||
# when the control plane has been updated to look at the above fields
|
||||
# directly.
|
||||
local:
|
||||
$ref: "#/components/schemas/LocalTimelineInfo"
|
||||
remote:
|
||||
$ref: "#/components/schemas/RemoteTimelineInfo"
|
||||
|
||||
LocalTimelineInfo:
|
||||
type: object
|
||||
properties:
|
||||
ancestor_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
ancestor_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
current_logical_size:
|
||||
type: integer
|
||||
current_physical_size:
|
||||
type: integer
|
||||
RemoteTimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
- remote_consistent_lsn
|
||||
properties:
|
||||
remote_consistent_lsn:
|
||||
type: string
|
||||
format: hex
|
||||
Error:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -12,7 +12,6 @@ use super::models::{
|
||||
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
|
||||
TimelineCreateRequest,
|
||||
};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::storage_sync;
|
||||
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
|
||||
use crate::tenant::{TenantState, Timeline};
|
||||
@@ -79,13 +78,13 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
|
||||
get_state(request).conf
|
||||
}
|
||||
|
||||
// Helper function to construct a TimelineInfo struct for a timeline
|
||||
async fn build_timeline_info(
|
||||
state: &State,
|
||||
// Helper functions to construct a LocalTimelineInfo struct for a timeline
|
||||
|
||||
fn local_timeline_info_from_timeline(
|
||||
timeline: &Arc<Timeline>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
) -> anyhow::Result<LocalTimelineInfo> {
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
@@ -100,48 +99,24 @@ async fn build_timeline_info(
|
||||
}
|
||||
};
|
||||
|
||||
let (remote_consistent_lsn, awaits_download) = if let Some(remote_entry) = state
|
||||
.remote_index
|
||||
.read()
|
||||
.await
|
||||
.timeline_entry(&TenantTimelineId {
|
||||
tenant_id: timeline.tenant_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
}) {
|
||||
(
|
||||
Some(remote_entry.metadata.disk_consistent_lsn()),
|
||||
remote_entry.awaits_download,
|
||||
)
|
||||
} else {
|
||||
(None, false)
|
||||
};
|
||||
|
||||
let ancestor_timeline_id = timeline.get_ancestor_timeline_id();
|
||||
let ancestor_lsn = match timeline.get_ancestor_lsn() {
|
||||
Lsn(0) => None,
|
||||
lsn @ Lsn(_) => Some(lsn),
|
||||
};
|
||||
let current_logical_size = match timeline.get_current_logical_size() {
|
||||
Ok(size) => Some(size),
|
||||
Err(err) => {
|
||||
error!("Timeline info creation failed to get current logical size: {err:?}");
|
||||
None
|
||||
}
|
||||
};
|
||||
let current_physical_size = Some(timeline.get_physical_size());
|
||||
let state = timeline.current_state();
|
||||
|
||||
let info = TimelineInfo {
|
||||
tenant_id: timeline.tenant_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
let info = LocalTimelineInfo {
|
||||
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
|
||||
ancestor_lsn: {
|
||||
match timeline.get_ancestor_lsn() {
|
||||
Lsn(0) => None,
|
||||
lsn @ Lsn(_) => Some(lsn),
|
||||
}
|
||||
},
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
last_record_lsn,
|
||||
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
|
||||
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
|
||||
current_logical_size,
|
||||
current_physical_size,
|
||||
current_logical_size: Some(
|
||||
timeline
|
||||
.get_current_logical_size()
|
||||
.context("Timeline info creation failed to get current logical size")?,
|
||||
),
|
||||
current_physical_size: Some(timeline.get_physical_size()),
|
||||
current_logical_size_non_incremental: if include_non_incremental_logical_size {
|
||||
Some(timeline.get_current_logical_size_non_incremental(last_record_lsn)?)
|
||||
} else {
|
||||
@@ -156,26 +131,32 @@ async fn build_timeline_info(
|
||||
last_received_msg_lsn,
|
||||
last_received_msg_ts,
|
||||
pg_version: timeline.pg_version,
|
||||
|
||||
remote_consistent_lsn,
|
||||
awaits_download,
|
||||
state,
|
||||
|
||||
// Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
|
||||
// with the control plane.
|
||||
local: LocalTimelineInfo {
|
||||
ancestor_timeline_id,
|
||||
ancestor_lsn,
|
||||
current_logical_size,
|
||||
current_physical_size,
|
||||
},
|
||||
remote: RemoteTimelineInfo {
|
||||
remote_consistent_lsn,
|
||||
},
|
||||
};
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn list_local_timelines(
|
||||
tenant_id: TenantId,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> Result<Vec<(TimelineId, LocalTimelineInfo)>> {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
|
||||
let timelines = tenant.list_timelines();
|
||||
|
||||
let mut local_timeline_info = Vec::with_capacity(timelines.len());
|
||||
for (timeline_id, repository_timeline) in timelines {
|
||||
local_timeline_info.push((
|
||||
timeline_id,
|
||||
local_timeline_info_from_timeline(
|
||||
&repository_timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)?,
|
||||
))
|
||||
}
|
||||
Ok(local_timeline_info)
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let config = get_config(&request);
|
||||
@@ -187,8 +168,6 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let new_timeline_info = async {
|
||||
match tenant.create_timeline(
|
||||
@@ -199,10 +178,14 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
).await {
|
||||
Ok(Some(new_timeline)) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let timeline_info = build_timeline_info(state, &new_timeline, false, false)
|
||||
.await
|
||||
let local_info = local_timeline_info_from_timeline(&new_timeline, false, false)
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
Ok(Some(timeline_info))
|
||||
Ok(Some(TimelineInfo {
|
||||
tenant_id,
|
||||
timeline_id: new_timeline.timeline_id,
|
||||
local: Some(local_info),
|
||||
remote: None,
|
||||
}))
|
||||
}
|
||||
Ok(None) => Ok(None), // timeline already exists
|
||||
Err(err) => Err(ApiError::InternalServerError(err)),
|
||||
@@ -225,8 +208,6 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let timelines = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
@@ -236,18 +217,36 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
state,
|
||||
for (timeline_id, timeline) in timelines {
|
||||
let local = match local_timeline_info_from_timeline(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.await
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
) {
|
||||
Ok(local) => Some(local),
|
||||
Err(e) => {
|
||||
error!("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
response_data.push(timeline_info);
|
||||
response_data.push(TimelineInfo {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
local,
|
||||
remote: get_state(&request)
|
||||
.remote_index
|
||||
.read()
|
||||
.await
|
||||
.timeline_entry(&TenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
.map(|remote_entry| RemoteTimelineInfo {
|
||||
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.awaits_download,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
@@ -266,23 +265,6 @@ fn query_param_present(request: &Request<Body>, param: &str) -> bool {
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn get_query_param(request: &Request<Body>, param_name: &str) -> Result<String, ApiError> {
|
||||
request.uri().query().map_or(
|
||||
Err(ApiError::BadRequest(anyhow!("empty query in request"))),
|
||||
|v| {
|
||||
url::form_urlencoded::parse(v.as_bytes())
|
||||
.into_owned()
|
||||
.find(|(k, _)| k == param_name)
|
||||
.map_or(
|
||||
Err(ApiError::BadRequest(anyhow!(
|
||||
"no {param_name} specified in query parameters"
|
||||
))),
|
||||
|(_, v)| Ok(v),
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -292,59 +274,59 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let timeline_info = async {
|
||||
let (local_timeline_info, remote_timeline_info) = async {
|
||||
let timeline = tokio::task::spawn_blocking(move || {
|
||||
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id, false)
|
||||
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
|
||||
let timeline = timeline.map_err(ApiError::NotFound)?;
|
||||
let local_timeline_info = match timeline.and_then(|timeline| {
|
||||
local_timeline_info_from_timeline(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
}) {
|
||||
Ok(local_info) => Some(local_info),
|
||||
Err(e) => {
|
||||
error!("Failed to get local timeline info: {e:#}");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let timeline_info = build_timeline_info(
|
||||
state,
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.await
|
||||
.context("Failed to get local timeline info: {e:#}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
Ok::<_, ApiError>(timeline_info)
|
||||
let remote_timeline_info = {
|
||||
let remote_index_read = get_state(&request).remote_index.read().await;
|
||||
remote_index_read
|
||||
.timeline_entry(&TenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
.map(|remote_entry| RemoteTimelineInfo {
|
||||
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.awaits_download,
|
||||
})
|
||||
};
|
||||
Ok::<_, ApiError>((local_timeline_info, remote_timeline_info))
|
||||
}
|
||||
.instrument(info_span!("timeline_detail", tenant = %tenant_id, timeline = %timeline_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, timeline_info)
|
||||
}
|
||||
|
||||
async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let timestamp_raw = get_query_param(&request, "timestamp")?;
|
||||
let timestamp = humantime::parse_rfc3339(timestamp_raw.as_str())
|
||||
.with_context(|| format!("Invalid time: {:?}", timestamp_raw))
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
|
||||
|
||||
let timeline = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let result = match timeline
|
||||
.find_lsn_for_timestamp(timestamp_pg)
|
||||
.map_err(ApiError::InternalServerError)?
|
||||
{
|
||||
LsnForTimestamp::Present(lsn) => format!("{lsn}"),
|
||||
LsnForTimestamp::Future(_lsn) => "future".into(),
|
||||
LsnForTimestamp::Past(_lsn) => "past".into(),
|
||||
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
|
||||
};
|
||||
json_response(StatusCode::OK, result)
|
||||
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
|
||||
Err(ApiError::NotFound(anyhow!(
|
||||
"Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
|
||||
)))
|
||||
} else {
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
TimelineInfo {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
local: local_timeline_info,
|
||||
remote: remote_timeline_info,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
|
||||
@@ -355,16 +337,9 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
info!("Handling tenant attach {tenant_id}");
|
||||
|
||||
tokio::task::spawn_blocking(move || match tenant_mgr::get_tenant(tenant_id, false) {
|
||||
Ok(tenant) => {
|
||||
if tenant.list_timelines().is_empty() {
|
||||
info!("Attaching to tenant {tenant_id} with zero timelines");
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ApiError::Conflict(
|
||||
"Tenant is already present locally".to_owned(),
|
||||
))
|
||||
}
|
||||
}
|
||||
Ok(_) => Err(ApiError::Conflict(
|
||||
"Tenant is already present locally".to_owned(),
|
||||
)),
|
||||
Err(_) => Ok(()),
|
||||
})
|
||||
.await
|
||||
@@ -387,7 +362,7 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
}
|
||||
return json_response(StatusCode::ACCEPTED, ());
|
||||
}
|
||||
// no tenant in the index, release the lock to make the potentially lengthy download operation
|
||||
// no tenant in the index, release the lock to make the potentially lengthy download opetation
|
||||
drop(index_accessor);
|
||||
|
||||
// download index parts for every tenant timeline
|
||||
@@ -539,27 +514,36 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
false
|
||||
});
|
||||
|
||||
let (tenant_state, current_physical_size) = match tenant {
|
||||
Ok(tenant) => {
|
||||
let timelines = tenant.list_timelines();
|
||||
// Calculate total physical size of all timelines
|
||||
let mut current_physical_size = 0;
|
||||
for timeline in timelines {
|
||||
current_physical_size += timeline.get_physical_size();
|
||||
}
|
||||
|
||||
(tenant.current_state(), Some(current_physical_size))
|
||||
}
|
||||
let tenant_state = match tenant {
|
||||
Ok(tenant) => tenant.current_state(),
|
||||
Err(e) => {
|
||||
error!("Failed to get local tenant state: {e:#}");
|
||||
if has_in_progress_downloads {
|
||||
(TenantState::Paused, None)
|
||||
TenantState::Paused
|
||||
} else {
|
||||
(TenantState::Broken, None)
|
||||
TenantState::Broken
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let current_physical_size =
|
||||
match tokio::task::spawn_blocking(move || list_local_timelines(tenant_id, false, false))
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
|
||||
{
|
||||
Err(err) => {
|
||||
// Getting local timelines can fail when no local tenant directory is on disk (e.g, when tenant data is being downloaded).
|
||||
// In that case, put a warning message into log and operate normally.
|
||||
warn!("Failed to get local timelines for tenant {tenant_id}: {err}");
|
||||
None
|
||||
}
|
||||
Ok(local_timeline_infos) => Some(
|
||||
local_timeline_infos
|
||||
.into_iter()
|
||||
.fold(0, |acc, x| acc + x.1.current_physical_size.unwrap()),
|
||||
),
|
||||
};
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
TenantInfo {
|
||||
@@ -748,7 +732,7 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[cfg(any(feature = "testing", feature = "failpoints"))]
|
||||
async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
@@ -782,6 +766,11 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
|
||||
// Run GC immediately on given timeline.
|
||||
// FIXME: This is just for tests. See test_runner/regress/test_gc.py.
|
||||
// This probably should require special authentication or a global flag to
|
||||
// enable, I don't think we want to or need to allow regular clients to invoke
|
||||
// GC.
|
||||
// @hllinnaka in commits ec44f4b29, 3aca717f3
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
@@ -789,16 +778,16 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
// FIXME: currently this will return a 500 error on bad tenant id; it should be 4XX
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let _span_guard =
|
||||
info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id).entered();
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| repo.get_gc_horizon());
|
||||
|
||||
// Use tenant's pitr setting
|
||||
let pitr = tenant.get_pitr_interval();
|
||||
let result = tenant
|
||||
let pitr = repo.get_pitr_interval();
|
||||
let result = repo
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)
|
||||
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
|
||||
// better once the types support it.
|
||||
@@ -807,15 +796,19 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
// FIXME This is just for tests. Don't expect this to be exposed to
|
||||
// the users or the api.
|
||||
// @dhammika in commit a0781f229
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = repo
|
||||
.get_timeline(timeline_id)
|
||||
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
timeline.compact().map_err(ApiError::InternalServerError)?;
|
||||
|
||||
@@ -829,9 +822,10 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = repo
|
||||
.get_timeline(timeline_id)
|
||||
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
timeline
|
||||
.checkpoint(CheckpointConfig::Forced)
|
||||
@@ -907,10 +901,6 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
||||
timeline_detail_handler,
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
|
||||
get_lsn_by_timestamp_handler,
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc",
|
||||
testing_api!("run timeline GC", timeline_gc_handler),
|
||||
|
||||
@@ -12,10 +12,10 @@ use tracing::*;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::pgdatadir_mapping::*;
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::tenant::Timeline;
|
||||
use crate::walingest::WalIngest;
|
||||
use crate::walrecord::DecodedWALRecord;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::*;
|
||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||
@@ -43,19 +43,19 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
/// The code that deals with the checkpoint would not work right if the
|
||||
/// cluster was not shut down cleanly.
|
||||
pub fn import_timeline_from_postgres_datadir(
|
||||
path: &Path,
|
||||
tline: &Timeline,
|
||||
pgdata_path: &Path,
|
||||
pgdata_lsn: Lsn,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
let mut pg_control: Option<ControlFileData> = None;
|
||||
|
||||
// TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
|
||||
// Then fishing out pg_control would be unnecessary
|
||||
let mut modification = tline.begin_modification(pgdata_lsn);
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.init_empty()?;
|
||||
|
||||
// Import all but pg_wal
|
||||
let all_but_wal = WalkDir::new(pgdata_path)
|
||||
let all_but_wal = WalkDir::new(path)
|
||||
.into_iter()
|
||||
.filter_entry(|entry| !entry.path().ends_with("pg_wal"));
|
||||
for entry in all_but_wal {
|
||||
@@ -63,7 +63,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
let metadata = entry.metadata().expect("error getting dir entry metadata");
|
||||
if metadata.is_file() {
|
||||
let absolute_path = entry.path();
|
||||
let relative_path = absolute_path.strip_prefix(pgdata_path)?;
|
||||
let relative_path = absolute_path.strip_prefix(path)?;
|
||||
|
||||
let file = File::open(absolute_path)?;
|
||||
let len = metadata.len() as usize;
|
||||
@@ -84,7 +84,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
"Postgres cluster was not shut down cleanly"
|
||||
);
|
||||
ensure!(
|
||||
pg_control.checkPointCopy.redo == pgdata_lsn.0,
|
||||
pg_control.checkPointCopy.redo == lsn.0,
|
||||
"unexpected checkpoint REDO pointer"
|
||||
);
|
||||
|
||||
@@ -92,10 +92,10 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
// this reads the checkpoint record itself, advancing the tip of the timeline to
|
||||
// *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'.
|
||||
import_wal(
|
||||
&pgdata_path.join("pg_wal"),
|
||||
&path.join("pg_wal"),
|
||||
tline,
|
||||
Lsn(pg_control.checkPointCopy.redo),
|
||||
pgdata_lsn,
|
||||
lsn,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -5,10 +5,10 @@ pub mod import_datadir;
|
||||
pub mod keyspace;
|
||||
pub mod metrics;
|
||||
pub mod page_cache;
|
||||
pub mod page_image_cache;
|
||||
pub mod page_service;
|
||||
pub mod pgdatadir_mapping;
|
||||
pub mod profiling;
|
||||
pub mod reltag;
|
||||
pub mod repository;
|
||||
pub mod storage_sync;
|
||||
pub mod task_mgr;
|
||||
@@ -46,8 +46,6 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
|
||||
|
||||
pub const LOG_FILE_NAME: &str = "pageserver.log";
|
||||
|
||||
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
/// Config for the Repository checkpointer
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CheckpointConfig {
|
||||
@@ -121,6 +119,32 @@ impl<T> TenantTimelineValues<T> {
|
||||
fn new() -> Self {
|
||||
Self(HashMap::new())
|
||||
}
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self(HashMap::with_capacity(capacity))
|
||||
}
|
||||
|
||||
/// A convenience method to map certain values and omit some of them, if needed.
|
||||
/// Tenants that won't have any timeline entries due to the filtering, will still be preserved
|
||||
/// in the structure.
|
||||
fn filter_map<F, NewT>(self, map: F) -> TenantTimelineValues<NewT>
|
||||
where
|
||||
F: Fn(T) -> Option<NewT>,
|
||||
{
|
||||
let capacity = self.0.len();
|
||||
self.0.into_iter().fold(
|
||||
TenantTimelineValues::<NewT>::with_capacity(capacity),
|
||||
|mut new_values, (tenant_id, old_values)| {
|
||||
let new_timeline_values = new_values.0.entry(tenant_id).or_default();
|
||||
for (timeline_id, old_value) in old_values {
|
||||
if let Some(new_value) = map(old_value) {
|
||||
new_timeline_values.insert(timeline_id, new_value);
|
||||
}
|
||||
}
|
||||
new_values
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A suffix to be used during file sync from the remote storage,
|
||||
@@ -157,3 +181,35 @@ mod backoff_defaults_tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::tenant::harness::TIMELINE_ID;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tenant_timeline_value_mapping() {
|
||||
let first_tenant = TenantId::generate();
|
||||
let second_tenant = TenantId::generate();
|
||||
assert_ne!(first_tenant, second_tenant);
|
||||
|
||||
let mut initial = TenantTimelineValues::new();
|
||||
initial
|
||||
.0
|
||||
.entry(first_tenant)
|
||||
.or_default()
|
||||
.insert(TIMELINE_ID, "test_value");
|
||||
let _ = initial.0.entry(second_tenant).or_default();
|
||||
assert_eq!(initial.0.len(), 2, "Should have entries for both tenants");
|
||||
|
||||
let filtered = initial.filter_map(|_| None::<&str>).0;
|
||||
assert_eq!(
|
||||
filtered.len(),
|
||||
2,
|
||||
"Should have entries for both tenants even after filtering away all entries"
|
||||
);
|
||||
assert!(filtered.contains_key(&first_tenant));
|
||||
assert!(filtered.contains_key(&second_tenant));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,20 +107,18 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
|
||||
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
||||
// or in testing they estimate how much we would upload if we did.
|
||||
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_created_persistent_files_total",
|
||||
"Number of files created that are meant to be uploaded to cloud storage",
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_written_persistent_bytes_total",
|
||||
"Total bytes written that are meant to be uploaded to cloud storage",
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
@@ -277,15 +275,11 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
/// smallest redo processing times. These buckets allow us to measure down
|
||||
/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
|
||||
/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
|
||||
///
|
||||
/// Values up to 1s are recorded because metrics show that we have redo
|
||||
/// durations and lock times larger than 0.250s.
|
||||
macro_rules! redo_histogram_time_buckets {
|
||||
() => {
|
||||
vec![
|
||||
0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
|
||||
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
|
||||
1.000_000,
|
||||
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000,
|
||||
]
|
||||
};
|
||||
}
|
||||
@@ -300,17 +294,6 @@ macro_rules! redo_histogram_count_buckets {
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! redo_bytes_histogram_count_buckets {
|
||||
() => {
|
||||
// powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
|
||||
// rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
|
||||
vec![
|
||||
24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
|
||||
2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_seconds",
|
||||
@@ -338,15 +321,6 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_bytes_histogram",
|
||||
"Histogram of number of records replayed per redo",
|
||||
redo_bytes_histogram_count_buckets!(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_replayed_wal_records_total",
|
||||
@@ -412,12 +386,8 @@ impl TimelineMetrics {
|
||||
let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED.clone();
|
||||
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN.clone();
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
@@ -449,8 +419,6 @@ impl Drop for TimelineMetrics {
|
||||
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = CURRENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
|
||||
|
||||
for op in STORAGE_TIME_OPERATIONS {
|
||||
let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user