mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-22 15:41:15 +00:00
Compare commits
2 Commits
nikitakaly
...
asher/neon
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a40e1edbf4 | ||
|
|
40a56a302a |
18
.github/actions/run-python-test-set/action.yml
vendored
18
.github/actions/run-python-test-set/action.yml
vendored
@@ -73,13 +73,6 @@ runs:
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Download compatibility snapshot for Postgres 14
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
|
||||
path: /tmp/compatibility_snapshot_pg14
|
||||
prefix: latest
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
NEON_BIN: /tmp/neon/bin
|
||||
@@ -87,8 +80,6 @@ runs:
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
|
||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
|
||||
ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# PLATFORM will be embedded in the perf test report
|
||||
@@ -163,15 +154,6 @@ runs:
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
fi
|
||||
|
||||
- name: Upload compatibility snapshot for Postgres 14
|
||||
if: github.ref_name == 'release'
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
|
||||
# The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
|
||||
path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Allure report
|
||||
if: always()
|
||||
uses: ./.github/actions/allure-report
|
||||
|
||||
6
.github/ansible/deploy.yaml
vendored
6
.github/ansible/deploy.yaml
vendored
@@ -1,7 +1,7 @@
|
||||
- name: Upload Neon binaries
|
||||
hosts: storage
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
- name: Deploy pageserver
|
||||
hosts: pageservers
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
- name: Deploy safekeeper
|
||||
hosts: safekeepers
|
||||
gather_facts: False
|
||||
remote_user: "{{ remote_user }}"
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
|
||||
15
.github/ansible/neon-stress.hosts.yaml
vendored
15
.github/ansible/neon-stress.hosts.yaml
vendored
@@ -3,6 +3,7 @@ storage:
|
||||
bucket_name: neon-storage-ireland
|
||||
bucket_region: eu-west-1
|
||||
console_mgmt_base_url: http://neon-stress-console.local
|
||||
env_name: neon-stress
|
||||
etcd_endpoints: neon-stress-etcd.local:2379
|
||||
safekeeper_enable_s3_offload: 'false'
|
||||
pageserver_config_stub:
|
||||
@@ -11,21 +12,19 @@ storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: neon-stress/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
neon-stress-ps-1:
|
||||
console_region_id: aws-eu-west-1
|
||||
console_region_id: 1
|
||||
neon-stress-ps-2:
|
||||
console_region_id: aws-eu-west-1
|
||||
console_region_id: 1
|
||||
safekeepers:
|
||||
hosts:
|
||||
neon-stress-sk-1:
|
||||
console_region_id: aws-eu-west-1
|
||||
console_region_id: 1
|
||||
neon-stress-sk-2:
|
||||
console_region_id: aws-eu-west-1
|
||||
console_region_id: 1
|
||||
neon-stress-sk-3:
|
||||
console_region_id: aws-eu-west-1
|
||||
console_region_id: 1
|
||||
|
||||
14
.github/ansible/production.hosts.yaml
vendored
14
.github/ansible/production.hosts.yaml
vendored
@@ -1,6 +1,7 @@
|
||||
---
|
||||
storage:
|
||||
vars:
|
||||
env_name: prod-1
|
||||
console_mgmt_base_url: http://console-release.local
|
||||
bucket_name: zenith-storage-oregon
|
||||
bucket_region: us-west-2
|
||||
@@ -11,23 +12,20 @@ storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: prod-1/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
zenith-1-ps-2:
|
||||
console_region_id: aws-us-west-2
|
||||
console_region_id: 1
|
||||
zenith-1-ps-3:
|
||||
console_region_id: aws-us-west-2
|
||||
console_region_id: 1
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
zenith-1-sk-1:
|
||||
console_region_id: aws-us-west-2
|
||||
console_region_id: 1
|
||||
zenith-1-sk-2:
|
||||
console_region_id: aws-us-west-2
|
||||
console_region_id: 1
|
||||
zenith-1-sk-3:
|
||||
console_region_id: aws-us-west-2
|
||||
console_region_id: 1
|
||||
|
||||
9
.github/ansible/scripts/init_pageserver.sh
vendored
9
.github/ansible/scripts/init_pageserver.sh
vendored
@@ -12,19 +12,18 @@ cat <<EOF | tee /tmp/payload
|
||||
"version": 1,
|
||||
"host": "${HOST}",
|
||||
"port": 6400,
|
||||
"region_id": "{{ console_region_id }}",
|
||||
"region_id": {{ console_region_id }},
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"http_host": "${HOST}",
|
||||
"http_port": 9898,
|
||||
"active": false
|
||||
"http_port": 9898
|
||||
}
|
||||
EOF
|
||||
|
||||
# check if pageserver already registered or not
|
||||
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
|
||||
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/pageservers/${INSTANCE_ID} -o /dev/null; then
|
||||
|
||||
# not registered, so register it now
|
||||
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
|
||||
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/pageservers -d@/tmp/payload | jq -r '.ID')
|
||||
|
||||
# init pageserver
|
||||
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
|
||||
|
||||
10
.github/ansible/scripts/init_safekeeper.sh
vendored
10
.github/ansible/scripts/init_safekeeper.sh
vendored
@@ -14,18 +14,18 @@ cat <<EOF | tee /tmp/payload
|
||||
"host": "${HOST}",
|
||||
"port": 6500,
|
||||
"http_port": 7676,
|
||||
"region_id": "{{ console_region_id }}",
|
||||
"region_id": {{ console_region_id }},
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"availability_zone_id": "${AZ_ID}",
|
||||
"active": false
|
||||
"availability_zone_id": "${AZ_ID}"
|
||||
}
|
||||
EOF
|
||||
|
||||
# check if safekeeper already registered or not
|
||||
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
|
||||
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
|
||||
|
||||
# not registered, so register it now
|
||||
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
|
||||
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
|
||||
|
||||
# init safekeeper
|
||||
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
|
||||
fi
|
||||
|
||||
3
.github/ansible/ssm_config
vendored
3
.github/ansible/ssm_config
vendored
@@ -1,3 +0,0 @@
|
||||
ansible_connection: aws_ssm
|
||||
ansible_aws_ssm_bucket_name: neon-dev-bucket
|
||||
ansible_python_interpreter: /usr/bin/python3
|
||||
24
.github/ansible/staging.hosts.yaml
vendored
24
.github/ansible/staging.hosts.yaml
vendored
@@ -3,6 +3,7 @@ storage:
|
||||
bucket_name: zenith-staging-storage-us-east-1
|
||||
bucket_region: us-east-1
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
env_name: us-stage
|
||||
etcd_endpoints: zenith-us-stage-etcd.local:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
@@ -10,25 +11,30 @@ storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "{{ inventory_hostname }}"
|
||||
safekeeper_s3_prefix: us-stage/wal
|
||||
hostname_suffix: ".local"
|
||||
remote_user: admin
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
zenith-us-stage-ps-2:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-ps-3:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-ps-4:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-test-ps-1:
|
||||
console_region_id: 28
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
zenith-us-stage-sk-4:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-sk-5:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-sk-6:
|
||||
console_region_id: aws-us-east-1
|
||||
console_region_id: 27
|
||||
zenith-us-stage-test-sk-1:
|
||||
console_region_id: 28
|
||||
zenith-us-stage-test-sk-2:
|
||||
console_region_id: 28
|
||||
zenith-us-stage-test-sk-3:
|
||||
console_region_id: 28
|
||||
|
||||
32
.github/ansible/staging.us-east-2.hosts.yaml
vendored
32
.github/ansible/staging.us-east-2.hosts.yaml
vendored
@@ -1,32 +0,0 @@
|
||||
storage:
|
||||
vars:
|
||||
bucket_name: neon-staging-storage-us-east-2
|
||||
bucket_region: us-east-2
|
||||
console_mgmt_base_url: http://console-staging.local
|
||||
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
|
||||
pageserver_config_stub:
|
||||
pg_distrib_dir: /usr/local
|
||||
remote_storage:
|
||||
bucket_name: "{{ bucket_name }}"
|
||||
bucket_region: "{{ bucket_region }}"
|
||||
prefix_in_bucket: "pageserver/v1"
|
||||
safekeeper_s3_prefix: safekeeper/v1/wal
|
||||
hostname_suffix: ""
|
||||
remote_user: ssm-user
|
||||
ansible_aws_ssm_region: us-east-2
|
||||
console_region_id: aws-us-east-2
|
||||
|
||||
children:
|
||||
pageservers:
|
||||
hosts:
|
||||
pageserver-0.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0c3e70929edb5d691
|
||||
|
||||
safekeepers:
|
||||
hosts:
|
||||
safekeeper-0.us-east-2.aws.neon.build:
|
||||
ansible_host: i-027662bd552bf5db0
|
||||
safekeeper-1.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0171efc3604a7b907
|
||||
safekeeper-2.us-east-2.aws.neon.build:
|
||||
ansible_host: i-0de0b03a51676a6ce
|
||||
2
.github/ansible/systemd/pageserver.service
vendored
2
.github/ansible/systemd/pageserver.service
vendored
@@ -1,5 +1,5 @@
|
||||
[Unit]
|
||||
Description=Neon pageserver
|
||||
Description=Zenith pageserver
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
|
||||
4
.github/ansible/systemd/safekeeper.service
vendored
4
.github/ansible/systemd/safekeeper.service
vendored
@@ -1,12 +1,12 @@
|
||||
[Unit]
|
||||
Description=Neon safekeeper
|
||||
Description=Zenith safekeeper
|
||||
After=network.target auditd.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
# Helm chart values for neon-proxy-scram.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-staging.local/management/api/v2"
|
||||
domain: "*.us-east-2.aws.neon.build"
|
||||
|
||||
# -- Additional labels for neon-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: dev
|
||||
zenith_region: us-east-2
|
||||
zenith_region_slug: us-east-2
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
|
||||
|
||||
#metrics:
|
||||
# enabled: true
|
||||
# serviceMonitor:
|
||||
# enabled: true
|
||||
# selector:
|
||||
# release: kube-prometheus-stack
|
||||
139
.github/workflows/build_and_test.yml
vendored
139
.github/workflows/build_and_test.yml
vendored
@@ -481,7 +481,6 @@ jobs:
|
||||
|
||||
neon-image:
|
||||
runs-on: dev
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
steps:
|
||||
@@ -495,11 +494,10 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build neon
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
|
||||
|
||||
compute-tools-image:
|
||||
runs-on: dev
|
||||
needs: [ tag ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
steps:
|
||||
@@ -510,12 +508,11 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute tools
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
|
||||
|
||||
compute-node-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -530,12 +527,11 @@ jobs:
|
||||
# cloud repo depends on this image name, thus duplicating it
|
||||
# remove compute-node when cloud repo is updated
|
||||
- name: Kaniko build compute node with extensions v14 (compatibility)
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
|
||||
|
||||
compute-node-image-v14:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -547,13 +543,12 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute node with extensions v14
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID
|
||||
|
||||
|
||||
compute-node-image-v15:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
needs: [ tag ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
@@ -565,11 +560,11 @@ jobs:
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute node with extensions v15
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID
|
||||
|
||||
promote-images:
|
||||
runs-on: dev
|
||||
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
|
||||
needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
@@ -582,9 +577,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
run: |
|
||||
export MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=${{needs.tag.outputs.build-tag}} --query 'images[].imageManifest' --output text)
|
||||
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
run:
|
||||
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
push-docker-hub:
|
||||
runs-on: dev
|
||||
@@ -603,19 +597,19 @@ jobs:
|
||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||
|
||||
- name: Pull neon image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
|
||||
|
||||
- name: Pull compute tools image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
|
||||
|
||||
- name: Pull compute node image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
|
||||
|
||||
- name: Pull compute node v14 image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14
|
||||
|
||||
- name: Pull compute node v15 image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest compute-node-v15
|
||||
|
||||
- name: Pull rust image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
|
||||
@@ -625,11 +619,11 @@ jobs:
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
|
||||
|
||||
- name: Configure Docker Hub login
|
||||
run: |
|
||||
@@ -677,11 +671,11 @@ jobs:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
|
||||
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
|
||||
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
@@ -741,47 +735,7 @@ jobs:
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-new:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
./get_binaries.sh
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
RELEASE=true ./get_binaries.sh
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
@@ -825,52 +779,3 @@ jobs:
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-new:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner
|
||||
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
promote-compatibility-test-snapshot:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ deploy, deploy-proxy ]
|
||||
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Promote compatibility snapshot for the release
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
PREFIX: artifacts/latest
|
||||
run: |
|
||||
for build_type in debug release; do
|
||||
OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
|
||||
NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
|
||||
|
||||
time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
|
||||
done
|
||||
|
||||
2
.github/workflows/codestyle.yml
vendored
2
.github/workflows/codestyle.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
336
Cargo.lock
generated
336
Cargo.lock
generated
@@ -40,7 +40,8 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "amplify_num"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/hlinnaka/rust-amplify.git?branch=unsigned-int-perf#bd49b737c2e6e623ab8e9ba5ceaed5712d3a3940"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f27d3d00d3d115395a7a8a4dc045feb7aa82b641e485f7e15f4e67ac16f4f56d"
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
@@ -74,9 +75,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.5.1"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf6690c370453db30743b373a60ba498fc0d6d83b11f4abfd87a84a075db5dd4"
|
||||
checksum = "30ff05a702273012438132f449575dbc804e27b2f3cbe3069aa237d26c98fa33"
|
||||
dependencies = [
|
||||
"asn1-rs-derive",
|
||||
"asn1-rs-impl",
|
||||
@@ -90,9 +91,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-derive"
|
||||
version = "0.4.0"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c"
|
||||
checksum = "db8b7511298d5b7784b40b092d9e9dcd3a627a5707e4b5e507931ab0d44eeebf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -261,13 +262,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.61.0"
|
||||
version = "0.60.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a022e58a142a46fea340d68012b9201c094e93ec3d033a944a24f8fd4a4f09a"
|
||||
checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
@@ -277,7 +280,6 @@ dependencies = [
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn",
|
||||
"which",
|
||||
]
|
||||
|
||||
@@ -325,14 +327,13 @@ checksum = "5988cb1d626264ac94100be357308f29ff7cbdd3b36bda27f450a4ee3f713426"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.0.1"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fca0852af221f458706eb0725c03e4ed6c46af9ac98e6a689d5e634215d594dd"
|
||||
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -449,23 +450,28 @@ version = "3.2.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_lex 0.2.4",
|
||||
"clap_derive",
|
||||
"clap_lex",
|
||||
"indexmap",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.0.15"
|
||||
name = "clap_derive"
|
||||
version = "3.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bf8832993da70a4c6d13c581f4463c2bdda27b9bf1c5498dc4365543abe6d6f"
|
||||
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_lex 0.3.0",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"heck",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -477,15 +483,6 @@ dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "close_fds"
|
||||
version = "0.3.2"
|
||||
@@ -543,7 +540,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"futures",
|
||||
"hyper",
|
||||
@@ -585,7 +582,7 @@ name = "control_plane"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"comfy-table",
|
||||
"git-version",
|
||||
"nix 0.25.0",
|
||||
@@ -678,7 +675,7 @@ dependencies = [
|
||||
"atty",
|
||||
"cast",
|
||||
"ciborium",
|
||||
"clap 3.2.22",
|
||||
"clap",
|
||||
"criterion-plot",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
@@ -746,7 +743,7 @@ dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset 0.6.5",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
@@ -906,14 +903,14 @@ version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6ee87af31d84ef885378aebca32be3d682b0e0dc119d5b4860a2c5bb5046730"
|
||||
dependencies = [
|
||||
"uuid 0.8.2",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der-parser"
|
||||
version = "8.1.0"
|
||||
version = "7.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42d4bc9b0db0a0df9ae64634ac5bdefb7afcb534e182275ca0beadbe486701c1"
|
||||
checksum = "fe398ac75057914d7d07307bf67dc7f3f574a26783b4fc7805a20ffa9f506e82"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"displaydoc",
|
||||
@@ -1020,11 +1017,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fb8664f6ea68aba5503d42dd1be786b0f1bd9b7972e7f40208c83ef74db91bf"
|
||||
dependencies = [
|
||||
"http",
|
||||
"prost",
|
||||
"prost 0.10.4",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tonic 0.7.2",
|
||||
"tonic-build 0.7.2",
|
||||
"tower",
|
||||
"tower-service",
|
||||
]
|
||||
@@ -1222,12 +1219,6 @@ version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-timer"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.24"
|
||||
@@ -1814,15 +1805,6 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics"
|
||||
version = "0.1.0"
|
||||
@@ -1905,6 +1887,22 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "546c37ac5d9e56f55e73b677106873d9d9f5190605e41a856503623648488cae"
|
||||
|
||||
[[package]]
|
||||
name = "neon_broker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"clap",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"prost 0.11.0",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.8.2",
|
||||
"tonic-build 0.8.2",
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.23.1"
|
||||
@@ -1915,7 +1913,7 @@ dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"memoffset 0.6.5",
|
||||
"memoffset",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1928,7 +1926,7 @@ dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"memoffset 0.6.5",
|
||||
"memoffset",
|
||||
"pin-utils",
|
||||
]
|
||||
|
||||
@@ -2041,9 +2039,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "oid-registry"
|
||||
version = "0.6.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d4bda43fd1b844cbc6e6e54b5444e2b1bc7838bce59ad205902cccbb26d6761"
|
||||
checksum = "38e20717fa0541f39bd146692035c37bedfa532b3e5071b35761082407546b2a"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
]
|
||||
@@ -2134,11 +2132,10 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"close_fds",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
"criterion",
|
||||
"crossbeam-utils",
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
@@ -2170,7 +2167,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"svg_fmt",
|
||||
"tar",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
@@ -2189,10 +2185,7 @@ dependencies = [
|
||||
name = "pageserver_api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"const_format",
|
||||
"postgres_ffi",
|
||||
"serde",
|
||||
"serde_with",
|
||||
"utils",
|
||||
@@ -2415,7 +2408,7 @@ dependencies = [
|
||||
"env_logger",
|
||||
"hex",
|
||||
"log",
|
||||
"memoffset 0.7.1",
|
||||
"memoffset",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"rand",
|
||||
@@ -2462,6 +2455,30 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
||||
dependencies = [
|
||||
"proc-macro-error-attr",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error-attr"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
@@ -2513,7 +2530,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71adf41db68aa0daaefc69bb30bcd68ded9b9abaad5d1fbb6304c4fb390e083e"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost-derive",
|
||||
"prost-derive 0.10.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "399c3c31cdec40583bb68f0b18403400d01ec4289c383aa047560439952c4dd7"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost-derive 0.11.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2531,8 +2558,28 @@ dependencies = [
|
||||
"log",
|
||||
"multimap",
|
||||
"petgraph",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"prost 0.10.4",
|
||||
"prost-types 0.10.1",
|
||||
"regex",
|
||||
"tempfile",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-build"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f835c582e6bd972ba8347313300219fed5bfa52caf175298d860b61ff6069bb"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"multimap",
|
||||
"petgraph",
|
||||
"prost 0.11.0",
|
||||
"prost-types 0.11.1",
|
||||
"regex",
|
||||
"tempfile",
|
||||
"which",
|
||||
@@ -2551,6 +2598,19 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7345d5f0e08c0536d7ac7229952590239e77abf0a0100a1b1d890add6ea96364"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-types"
|
||||
version = "0.10.1"
|
||||
@@ -2558,7 +2618,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d0a014229361011dc8e69c8a1ec6c2e8d0f2af7c91e3ea3f5b2170298461e68"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost",
|
||||
"prost 0.10.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-types"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dfaa718ad76a44b3415e6c4d53b17c8f99160dcb3a99b10470fce8ad43f6e3e"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost 0.11.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2571,7 +2641,7 @@ dependencies = [
|
||||
"base64",
|
||||
"bstr",
|
||||
"bytes",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hashbrown",
|
||||
@@ -2605,7 +2675,7 @@ dependencies = [
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"utils",
|
||||
"uuid 1.2.1",
|
||||
"uuid",
|
||||
"workspace_hack",
|
||||
"x509-parser",
|
||||
]
|
||||
@@ -2694,13 +2764,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rcgen"
|
||||
version = "0.10.0"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffbe84efe2f38dea12e9bfc1f65377fdf03e53a18cb3b995faedf7934c7e785b"
|
||||
checksum = "5911d1403f4143c9d56a702069d593e8d0f3fab880a85e103604d0893ea31ba7"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"pem",
|
||||
"ring",
|
||||
"time 0.3.15",
|
||||
"yasna",
|
||||
]
|
||||
|
||||
@@ -2890,21 +2960,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rstest"
|
||||
version = "0.15.0"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9c9dc66cc29792b663ffb5269be669f1613664e69ad56441fdb895c2347b930"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"futures-timer",
|
||||
"rstest_macros",
|
||||
"rustc_version 0.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstest_macros"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5015e68a0685a95ade3eee617ff7101ab6a3fc689203101ca16ebc16f2b89c66"
|
||||
checksum = "d912f35156a3f99a66ee3e11ac2e0b3f34ac85a07e05263d05a7e2c8810d616f"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"proc-macro2",
|
||||
@@ -3084,7 +3142,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
"daemonize",
|
||||
@@ -3465,12 +3523,6 @@ version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||
|
||||
[[package]]
|
||||
name = "svg_fmt"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2"
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "8.8.0"
|
||||
@@ -3480,7 +3532,7 @@ dependencies = [
|
||||
"debugid",
|
||||
"memmap2",
|
||||
"stable_deref_trait",
|
||||
"uuid 0.8.2",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3811,8 +3863,40 @@ dependencies = [
|
||||
"hyper-timeout",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"prost-derive",
|
||||
"prost 0.10.4",
|
||||
"prost-derive 0.10.1",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"tracing-futures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55b9af819e54b8f33d453655bef9b9acc171568fb49523078d0cc4e7484200ec"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-timeout",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost 0.11.0",
|
||||
"prost-derive 0.11.0",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
@@ -3831,7 +3915,20 @@ checksum = "d9263bf4c9bfaae7317c1c2faf7f18491d2fe476f70c414b73bf5d445b00ffa1"
|
||||
dependencies = [
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"prost-build",
|
||||
"prost-build 0.10.4",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic-build"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c6fd7c2581e36d63388a9e04c350c21beb7a8b059580b2e93993c526899ddc"
|
||||
dependencies = [
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"prost-build 0.11.1",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
@@ -3942,16 +4039,6 @@ dependencies = [
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-serde"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.16"
|
||||
@@ -3962,15 +4049,12 @@ dependencies = [
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
"tracing-serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4065,8 +4149,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
@@ -4081,12 +4163,6 @@ name = "uuid"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"serde",
|
||||
@@ -4136,7 +4212,7 @@ name = "wal_craft"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"log",
|
||||
"once_cell",
|
||||
@@ -4376,7 +4452,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"clap 4.0.15",
|
||||
"clap",
|
||||
"crossbeam-utils",
|
||||
"either",
|
||||
"fail",
|
||||
@@ -4384,12 +4460,11 @@ dependencies = [
|
||||
"indexmap",
|
||||
"libc",
|
||||
"log",
|
||||
"memchr",
|
||||
"nom",
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"prost",
|
||||
"prost 0.10.4",
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
@@ -4402,13 +4477,14 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "x509-parser"
|
||||
version = "0.14.0"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8"
|
||||
checksum = "9fb9bace5b5589ffead1afb76e43e34cff39cd0f3ce7e170ae0c29e53b88eb1c"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"base64",
|
||||
@@ -4439,11 +4515,11 @@ checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3"
|
||||
|
||||
[[package]]
|
||||
name = "yasna"
|
||||
version = "0.5.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "346d34a236c9d3e5f3b9b74563f238f955bbd05fa0b8b4efa53c130c43982f4c"
|
||||
checksum = "e262a29d0e61ccf2b6190d7050d4b237535fc76ce4c1210d9caa316f71dffa75"
|
||||
dependencies = [
|
||||
"time 0.3.15",
|
||||
"chrono",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -11,6 +11,7 @@ cargo-features = ["named-profiles"]
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"broker",
|
||||
"compute_tools",
|
||||
"control_plane",
|
||||
"pageserver",
|
||||
|
||||
@@ -44,7 +44,7 @@ COPY . .
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
||||
RUN set -e \
|
||||
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
|
||||
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin safekeeper --bin proxy --locked --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Build final image
|
||||
@@ -65,7 +65,6 @@ RUN set -e \
|
||||
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
|
||||
|
||||
@@ -1,26 +1,24 @@
|
||||
#
|
||||
# This file is identical to the Dockerfile.compute-node-v15 file
|
||||
# except for the version of Postgres that is built.
|
||||
#
|
||||
|
||||
ARG TAG=pinned
|
||||
# apparently, ARGs don't get replaced in RUN commands in kaniko
|
||||
# ARG POSTGIS_VERSION=3.3.0
|
||||
# ARG PLV8_VERSION=3.1.4
|
||||
# ARG PG_VERSION=v14
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "build-deps"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim AS build-deps
|
||||
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update
|
||||
RUN apt update && \
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build"
|
||||
# Build Postgres from the neon postgres repository.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-build
|
||||
COPY vendor/postgres-v14 postgres
|
||||
RUN cd postgres && \
|
||||
@@ -31,20 +29,22 @@ RUN cd postgres && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "postgis-build"
|
||||
# Build PostGIS from the upstream PostGIS mirror.
|
||||
#
|
||||
#########################################################################################
|
||||
# PostGIS compiles against neon postgres sources without changes. Perhaps we
|
||||
# could even use the upstream binaries, compiled against vanilla Postgres, but
|
||||
# it would require some investigation to check that it works, and also keeps
|
||||
# working in the future. So for now, we compile our own binaries.
|
||||
FROM build-deps AS postgis-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
tar xvzf postgis-3.3.1.tar.gz && \
|
||||
cd postgis-3.3.1 && \
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
|
||||
tar xvzf postgis-3.3.0.tar.gz && \
|
||||
cd postgis-3.3.0 && \
|
||||
./autogen.sh && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
./configure && \
|
||||
@@ -57,55 +57,39 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "plv8-build"
|
||||
# Build plv8
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
|
||||
|
||||
# https://github.com/plv8/plv8/issues/475:
|
||||
# v8 uses gold for linking and sets `--thread-count=4` which breaks
|
||||
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
|
||||
# Install newer gold version manually as debian-testing binutils version updates
|
||||
# libc version, which in turn breaks other extension built against non-testing libc.
|
||||
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
|
||||
tar xvzf binutils-2.38.tar.gz && \
|
||||
cd binutils-2.38 && \
|
||||
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
cd ../bfd && ./configure && make bfdver.h && \
|
||||
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
|
||||
cp /usr/local/bin/ld.gold /usr/bin/gold
|
||||
# https://github.com/plv8/plv8/issues/475
|
||||
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing binutils
|
||||
|
||||
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
|
||||
tar xvzf v3.1.4.tar.gz && \
|
||||
cd plv8-3.1.4 && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
|
||||
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
rm -rf /plv8-* && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "h3-pg-build"
|
||||
# Build h3_pg
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# packaged cmake is too old
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
|
||||
-q -O /tmp/cmake-install.sh \
|
||||
&& chmod u+x /tmp/cmake-install.sh \
|
||||
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
|
||||
&& rm /tmp/cmake-install.sh
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing cmake
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
|
||||
tar xvzf h3.tgz && \
|
||||
@@ -124,18 +108,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
# compile neon extensions
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS neon-pg-ext-build
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# plv8 still sometimes crashes during the creation
|
||||
# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /h3/usr /
|
||||
COPY pgxn/ pgxn/
|
||||
@@ -145,22 +127,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Clean up postgres folder before inclusion
|
||||
#
|
||||
#########################################################################################
|
||||
FROM neon-pg-ext-build AS postgres-cleanup-layer
|
||||
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
|
||||
|
||||
@@ -178,12 +154,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
# Put it all together into the final image
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim
|
||||
# Add user postgres
|
||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
@@ -200,6 +174,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
# libreadline8 for psql
|
||||
# libossp-uuid16 for extension ossp-uuid
|
||||
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
|
||||
# GLIBC 2.34 for plv8.
|
||||
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
|
||||
#
|
||||
# Lastly, link compute_ctl into zenith_ctl while we're at it,
|
||||
# so that we don't need to put this in another layer.
|
||||
@@ -212,6 +188,12 @@ RUN apt update && \
|
||||
libproj19 \
|
||||
libprotobuf-c1 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
echo "Installing GLIBC 2.34" && \
|
||||
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends -t testing libc6 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
USER postgres
|
||||
|
||||
@@ -4,23 +4,26 @@
|
||||
#
|
||||
|
||||
ARG TAG=pinned
|
||||
# apparently, ARGs don't get replaced in RUN commands in kaniko
|
||||
# ARG POSTGIS_VERSION=3.3.1
|
||||
# ARG PLV8_VERSION=3.1.4
|
||||
# ARG PG_VERSION=v15
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "build-deps"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim AS build-deps
|
||||
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update
|
||||
RUN apt update && \
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
|
||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
|
||||
libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build"
|
||||
# Build Postgres from the neon postgres repository.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-build
|
||||
COPY vendor/postgres-v15 postgres
|
||||
RUN cd postgres && \
|
||||
@@ -31,12 +34,14 @@ RUN cd postgres && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "postgis-build"
|
||||
# Build PostGIS from the upstream PostGIS mirror.
|
||||
#
|
||||
#########################################################################################
|
||||
# PostGIS compiles against neon postgres sources without changes. Perhaps we
|
||||
# could even use the upstream binaries, compiled against vanilla Postgres, but
|
||||
# it would require some investigation to check that it works, and also keeps
|
||||
# working in the future. So for now, we compile our own binaries.
|
||||
FROM build-deps AS postgis-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
@@ -57,55 +62,39 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "plv8-build"
|
||||
# Build plv8
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
|
||||
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
|
||||
|
||||
# https://github.com/plv8/plv8/issues/475:
|
||||
# v8 uses gold for linking and sets `--thread-count=4` which breaks
|
||||
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
|
||||
# Install newer gold version manually as debian-testing binutils version updates
|
||||
# libc version, which in turn breaks other extension built against non-testing libc.
|
||||
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
|
||||
tar xvzf binutils-2.38.tar.gz && \
|
||||
cd binutils-2.38 && \
|
||||
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
cd ../bfd && ./configure && make bfdver.h && \
|
||||
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
|
||||
cp /usr/local/bin/ld.gold /usr/bin/gold
|
||||
# https://github.com/plv8/plv8/issues/475
|
||||
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing binutils
|
||||
|
||||
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
|
||||
tar xvzf v3.1.4.tar.gz && \
|
||||
cd plv8-3.1.4 && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
|
||||
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
rm -rf /plv8-* && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "h3-pg-build"
|
||||
# Build h3_pg
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# packaged cmake is too old
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
|
||||
-q -O /tmp/cmake-install.sh \
|
||||
&& chmod u+x /tmp/cmake-install.sh \
|
||||
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
|
||||
&& rm /tmp/cmake-install.sh
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends -t testing cmake
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
|
||||
tar xvzf h3.tgz && \
|
||||
@@ -124,18 +113,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
# compile neon extensions
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS neon-pg-ext-build
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# plv8 still sometimes crashes during the creation
|
||||
# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=h3-pg-build /h3/usr /
|
||||
COPY pgxn/ pgxn/
|
||||
@@ -145,22 +132,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Clean up postgres folder before inclusion
|
||||
#
|
||||
#########################################################################################
|
||||
FROM neon-pg-ext-build AS postgres-cleanup-layer
|
||||
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
|
||||
|
||||
@@ -178,12 +159,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
# Put it all together into the final image
|
||||
#
|
||||
#########################################################################################
|
||||
FROM debian:bullseye-slim
|
||||
# Add user postgres
|
||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
@@ -200,6 +179,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
# libreadline8 for psql
|
||||
# libossp-uuid16 for extension ossp-uuid
|
||||
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
|
||||
# GLIBC 2.34 for plv8.
|
||||
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
|
||||
#
|
||||
# Lastly, link compute_ctl into zenith_ctl while we're at it,
|
||||
# so that we don't need to put this in another layer.
|
||||
@@ -212,6 +193,12 @@ RUN apt update && \
|
||||
libproj19 \
|
||||
libprotobuf-c1 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
echo "Installing GLIBC 2.34" && \
|
||||
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
|
||||
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends -t testing libc6 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
USER postgres
|
||||
|
||||
1120
broker/Cargo.lock
generated
Normal file
1120
broker/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
35
broker/Cargo.toml
Normal file
35
broker/Cargo.toml
Normal file
@@ -0,0 +1,35 @@
|
||||
[package]
|
||||
name = "neon_broker"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
bench = []
|
||||
|
||||
[[bin]]
|
||||
name = "neon_broker"
|
||||
path = "src/broker.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "neon_broker_bench"
|
||||
path = "src/bench.rs"
|
||||
# build benchmarking binary only if explicitly requested with '--feature bench'
|
||||
# required-features = ["bench"]
|
||||
|
||||
[dependencies]
|
||||
async-stream = "0.3"
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
tonic = "0.8"
|
||||
prost = "0.11"
|
||||
tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] }
|
||||
# for exploring with tokio-console
|
||||
# tokio = { version = "1", features = ["full", "tracing"] }
|
||||
# console-subscriber = "0.1.8"
|
||||
tokio-stream = "0.1"
|
||||
clap = { version = "3.2.17", features = ["derive"] }
|
||||
|
||||
utils = { path = "../libs/utils" }
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.8"
|
||||
4
broker/build.rs
Normal file
4
broker/build.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
tonic_build::compile_protos("proto/broker.proto")?;
|
||||
Ok(())
|
||||
}
|
||||
38
broker/proto/broker.proto
Normal file
38
broker/proto/broker.proto
Normal file
@@ -0,0 +1,38 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package neon_broker;
|
||||
|
||||
service NeonBroker {
|
||||
// Subscribe to safekeeper updates.
|
||||
rpc SubscribeSafekeeperInfo(SubscribeSafekeeperInfoRequest) returns (stream SafekeeperTimelineInfo) {};
|
||||
|
||||
// Publish safekeeper updates.
|
||||
rpc PublishSafekeeperInfo(stream SafekeeperTimelineInfo) returns (Empty) {};
|
||||
}
|
||||
|
||||
message SubscribeSafekeeperInfoRequest {
|
||||
oneof subscription_key {
|
||||
Empty all = 1; // subscribe to everything
|
||||
TenantTimelineId tenant_timeline_id = 2; // subscribe to specific timeline
|
||||
}
|
||||
}
|
||||
|
||||
message SafekeeperTimelineInfo {
|
||||
uint64 safekeeper_id = 1;
|
||||
TenantTimelineId tenant_timeline_id = 2;
|
||||
uint64 last_log_term = 3;
|
||||
uint64 flush_lsn = 4;
|
||||
uint64 commit_lsn = 5;
|
||||
uint64 backup_lsn = 6;
|
||||
uint64 remote_consistent_lsn = 7;
|
||||
uint64 peer_horizon_lsn = 8;
|
||||
string safekeeper_connstr = 9;
|
||||
}
|
||||
|
||||
message TenantTimelineId {
|
||||
bytes tenant_id = 1;
|
||||
bytes timeline_id = 2;
|
||||
}
|
||||
|
||||
message Empty {
|
||||
}
|
||||
4
broker/readme.md
Normal file
4
broker/readme.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```
|
||||
cargo build -r -p neon_broker --features bench && target/release/neon_broker
|
||||
target/release/neon_broker_bench -s 1 -p 1
|
||||
```
|
||||
179
broker/src/bench.rs
Normal file
179
broker/src/bench.rs
Normal file
@@ -0,0 +1,179 @@
|
||||
pub mod neon_broker {
|
||||
tonic::include_proto!("neon_broker");
|
||||
}
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use clap::Parser;
|
||||
use neon_broker::neon_broker_client::NeonBrokerClient;
|
||||
use neon_broker::subscribe_safekeeper_info_request::SubscriptionKey;
|
||||
use neon_broker::TenantTimelineId as ProtoTenantTimelineId;
|
||||
use neon_broker::{SafekeeperTimelineInfo, SubscribeSafekeeperInfoRequest};
|
||||
use tokio::time::{self, sleep};
|
||||
|
||||
use tonic::transport::Channel;
|
||||
use tonic::Request;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[clap(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Number of publishers
|
||||
#[clap(short = 'p', long, value_parser, default_value_t = 1)]
|
||||
num_pubs: u64,
|
||||
/// Number of subscribers
|
||||
#[clap(short = 's', long, value_parser, default_value_t = 1)]
|
||||
num_subs: u64,
|
||||
}
|
||||
|
||||
async fn progress_reporter(counters: Vec<Arc<AtomicU64>>) {
|
||||
let mut interval = time::interval(Duration::from_millis(1000));
|
||||
let mut c_old = counters.iter().map(|c| c.load(Ordering::Relaxed)).sum();
|
||||
let mut c_min_old = counters
|
||||
.iter()
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.min()
|
||||
.unwrap_or(0);
|
||||
let mut started_at = None;
|
||||
let mut skipped: u64 = 0;
|
||||
loop {
|
||||
interval.tick().await;
|
||||
// print!(
|
||||
// "cnts are {:?}",
|
||||
// counters
|
||||
// .iter()
|
||||
// .map(|c| c.load(Ordering::Relaxed))
|
||||
// .collect::<Vec<_>>()
|
||||
// );
|
||||
let c_new = counters.iter().map(|c| c.load(Ordering::Relaxed)).sum();
|
||||
let c_min_new = counters
|
||||
.iter()
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.min()
|
||||
.unwrap_or(0);
|
||||
if c_new > 0 && started_at.is_none() {
|
||||
started_at = Some(Instant::now());
|
||||
skipped = c_new;
|
||||
}
|
||||
let avg_rps = started_at.map(|s| {
|
||||
let dur = s.elapsed();
|
||||
let dur_secs = dur.as_secs() as f64 + (dur.subsec_millis() as f64) / 1000.0;
|
||||
let avg_rps = (c_new - skipped) as f64 / dur_secs;
|
||||
(dur, avg_rps)
|
||||
});
|
||||
println!(
|
||||
"sum rps {}, min rps {} total {}, total min {}, duration, avg sum rps {:?}",
|
||||
c_new - c_old,
|
||||
c_min_new - c_min_old,
|
||||
c_new,
|
||||
c_min_new,
|
||||
avg_rps
|
||||
);
|
||||
c_old = c_new;
|
||||
c_min_old = c_min_new;
|
||||
}
|
||||
}
|
||||
|
||||
fn tli_from_u64(i: u64) -> Vec<u8> {
|
||||
let mut timeline_id = vec![0xFF; 8];
|
||||
timeline_id.extend_from_slice(&i.to_be_bytes());
|
||||
timeline_id
|
||||
}
|
||||
|
||||
async fn subscribe(client: Option<NeonBrokerClient<Channel>>, counter: Arc<AtomicU64>, i: u64) {
|
||||
let mut client = match client {
|
||||
Some(c) => c,
|
||||
None => NeonBrokerClient::connect("http://[::1]:50051")
|
||||
.await
|
||||
.unwrap(),
|
||||
};
|
||||
|
||||
// let key = SubscriptionKey::All(Empty {});
|
||||
let key = SubscriptionKey::TenantTimelineId(ProtoTenantTimelineId {
|
||||
tenant_id: vec![0xFF; 16],
|
||||
timeline_id: tli_from_u64(i),
|
||||
});
|
||||
let request = SubscribeSafekeeperInfoRequest {
|
||||
subscription_key: Some(key),
|
||||
};
|
||||
let mut stream = client
|
||||
.subscribe_safekeeper_info(request)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
while let Some(_feature) = stream.message().await.unwrap() {
|
||||
counter.fetch_add(1, Ordering::Relaxed);
|
||||
// println!("info = {:?}, client {}", _feature, i);
|
||||
}
|
||||
}
|
||||
|
||||
async fn publish(client: Option<NeonBrokerClient<Channel>>, n_keys: u64) {
|
||||
let mut client = match client {
|
||||
Some(c) => c,
|
||||
None => NeonBrokerClient::connect("http://[::1]:50051")
|
||||
.await
|
||||
.unwrap(),
|
||||
};
|
||||
let mut counter: u64 = 0;
|
||||
|
||||
// create stream producing new values
|
||||
let outbound = async_stream::stream! {
|
||||
loop {
|
||||
let info = SafekeeperTimelineInfo {
|
||||
safekeeper_id: 1,
|
||||
tenant_timeline_id: Some(ProtoTenantTimelineId {
|
||||
tenant_id: vec![0xFF; 16],
|
||||
timeline_id: tli_from_u64(counter % n_keys),
|
||||
}),
|
||||
last_log_term: 0,
|
||||
flush_lsn: counter,
|
||||
commit_lsn: 2,
|
||||
backup_lsn: 3,
|
||||
remote_consistent_lsn: 4,
|
||||
peer_horizon_lsn: 5,
|
||||
safekeeper_connstr: "zenith-1-sk-1.local:7676".to_owned(),
|
||||
};
|
||||
counter += 1;
|
||||
// println!("sending info = {:?}", info);
|
||||
// if counter >= 1000 {
|
||||
// break;
|
||||
// }
|
||||
yield info;
|
||||
// sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
};
|
||||
let _response = client
|
||||
.publish_safekeeper_info(Request::new(outbound))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let args = Args::parse();
|
||||
|
||||
let mut counters = Vec::with_capacity(args.num_subs as usize);
|
||||
for _ in 0..args.num_subs {
|
||||
counters.push(Arc::new(AtomicU64::new(0)));
|
||||
}
|
||||
let h = tokio::spawn(progress_reporter(counters.clone()));
|
||||
|
||||
let c = NeonBrokerClient::connect("http://[::1]:50051")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for i in 0..args.num_subs {
|
||||
let c = Some(c.clone());
|
||||
// let c = None;
|
||||
tokio::spawn(subscribe(c, counters[i as usize].clone(), i));
|
||||
}
|
||||
for _i in 0..args.num_pubs {
|
||||
// let c = Some(c.clone());
|
||||
tokio::spawn(publish(None, args.num_subs as u64));
|
||||
}
|
||||
|
||||
h.await?;
|
||||
Ok(())
|
||||
}
|
||||
526
broker/src/broker.rs
Normal file
526
broker/src/broker.rs
Normal file
@@ -0,0 +1,526 @@
|
||||
//! Simple pub-sub based on grpc (tonic) and Tokio mpsc for storage nodes
|
||||
//! messaging. The main design goal is to avoid central synchronization during
|
||||
//! normal flow, resorting to it only when pub/sub change happens. Each
|
||||
//! subscriber holds mpsc for messages it sits on; tx end is sent to existing
|
||||
//! publishers and saved in shared state for new ones. Publishers maintain
|
||||
//! locally set of subscribers they stream messages to.
|
||||
//!
|
||||
//! Subscriptions to 1) single timeline 2) everything are possible. We could add
|
||||
//! subscription to set of timelines to save grpc streams, but testing shows
|
||||
//! many individual streams is also ok.
|
||||
//!
|
||||
//! Message is dropped if subscriber can't consume it, not affecting other
|
||||
//! subscribers.
|
||||
//!
|
||||
//! Only safekeeper message is supported, but it is not hard to add something
|
||||
//! else with templating.
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
|
||||
use futures_core::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||
use tokio::{select, time};
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tonic::Code;
|
||||
use tonic::{transport::Server, Request, Response, Status};
|
||||
|
||||
use neon_broker_proto::neon_broker_server::{NeonBroker, NeonBrokerServer};
|
||||
use neon_broker_proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
|
||||
use neon_broker_proto::TenantTimelineId as ProtoTenantTimelineId;
|
||||
use neon_broker_proto::{Empty, SafekeeperTimelineInfo, SubscribeSafekeeperInfoRequest};
|
||||
use utils::id::{TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
pub mod neon_broker_proto {
|
||||
// The string specified here must match the proto package name.
|
||||
// If you want to have a look at the generated code, it is at path similar to
|
||||
// target/debug/build/neon_broker-0fde81d03bedc3b2/out/neon_broker.rs
|
||||
tonic::include_proto!("neon_broker");
|
||||
}
|
||||
|
||||
// Max size of the queue to the subscriber.
|
||||
const CHAN_SIZE: usize = 256;
|
||||
|
||||
type PubId = u64; // id of publisher for registering in maps
|
||||
type SubId = u64; // id of subscriber for registering in maps
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum SubscriptionKey {
|
||||
All,
|
||||
Timeline(TenantTimelineId),
|
||||
}
|
||||
|
||||
impl SubscriptionKey {
|
||||
// Parse protobuf subkey (protobuf doesn't have fixed size bytes, we get vectors).
|
||||
pub fn from_proto_subscription_key(key: ProtoSubscriptionKey) -> Result<Self, Status> {
|
||||
match key {
|
||||
ProtoSubscriptionKey::All(_) => Ok(SubscriptionKey::All),
|
||||
ProtoSubscriptionKey::TenantTimelineId(proto_ttid) => {
|
||||
Ok(SubscriptionKey::Timeline(parse_proto_ttid(&proto_ttid)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Subscriber id + tx end of the channel for messages to it.
|
||||
#[derive(Clone)]
|
||||
struct SubSender(SubId, Sender<SafekeeperTimelineInfo>);
|
||||
|
||||
impl fmt::Debug for SubSender {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "Subscription id {}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
// Announcements subscriber sends to publisher(s) asking it to stream to the
|
||||
// provided channel, or forget about it, releasing memory.
|
||||
#[derive(Clone)]
|
||||
enum SubAnnounce {
|
||||
AddAll(Sender<SafekeeperTimelineInfo>), // add subscription to all timelines
|
||||
AddTimeline(TenantTimelineId, SubSender), // add subsciption to the specific timeline
|
||||
RemoveTimeline(TenantTimelineId, SubId), // remove subscription to the specific timeline
|
||||
// RemoveAll is not needed as publisher will notice closed channel while
|
||||
// trying to send the next message.
|
||||
}
|
||||
|
||||
struct SharedState {
|
||||
// Registered publishers. They sit on the rx end of these channels and
|
||||
// receive through it tx handles of chans to subscribers.
|
||||
//
|
||||
// Note: publishers don't identify which keys they publish, so each
|
||||
// publisher will receive channels to all subs and filter them before sending.
|
||||
pub_txs: HashMap<PubId, Sender<SubAnnounce>>,
|
||||
next_pub_id: PubId,
|
||||
// Registered subscribers -- when publisher joins it walks over them,
|
||||
// collecting txs to send messages.
|
||||
subs_to_all: HashMap<SubId, Sender<SafekeeperTimelineInfo>>,
|
||||
subs_to_timelines: HashMap<TenantTimelineId, Vec<SubSender>>,
|
||||
next_sub_id: SubId,
|
||||
}
|
||||
|
||||
// Utility func to remove subscription from the map
|
||||
fn remove_sub(
|
||||
subs_to_timelines: &mut HashMap<TenantTimelineId, Vec<SubSender>>,
|
||||
ttid: &TenantTimelineId,
|
||||
sub_id: SubId,
|
||||
) {
|
||||
if let Some(subsenders) = subs_to_timelines.get_mut(&ttid) {
|
||||
subsenders.retain(|ss| ss.0 != sub_id);
|
||||
if subsenders.len() == 0 {
|
||||
subs_to_timelines.remove(&ttid);
|
||||
}
|
||||
}
|
||||
// Note that subscription might be not here if subscriber task was aborted
|
||||
// earlier than it managed to notify publisher about itself.
|
||||
}
|
||||
|
||||
impl SharedState {
|
||||
// Register new publisher.
|
||||
pub fn register_publisher(&mut self, announce_tx: Sender<SubAnnounce>) -> PubId {
|
||||
let pub_id = self.next_pub_id;
|
||||
self.next_pub_id += 1;
|
||||
assert!(!self.pub_txs.contains_key(&pub_id));
|
||||
self.pub_txs.insert(pub_id, announce_tx);
|
||||
pub_id
|
||||
}
|
||||
|
||||
pub fn unregister_publisher(&mut self, pub_id: PubId) {
|
||||
assert!(self.pub_txs.contains_key(&pub_id));
|
||||
self.pub_txs.remove(&pub_id);
|
||||
}
|
||||
|
||||
// Register new subscriber.
|
||||
// Returns list of channels through which existing publishers must be notified
|
||||
// about new subscriber; we can't do it here due to risk of deadlock.
|
||||
pub fn register_subscriber(
|
||||
&mut self,
|
||||
sub_key: SubscriptionKey,
|
||||
sub_tx: Sender<SafekeeperTimelineInfo>,
|
||||
) -> (SubId, Vec<Sender<SubAnnounce>>, SubAnnounce) {
|
||||
let sub_id = self.next_sub_id;
|
||||
self.next_sub_id += 1;
|
||||
let announce = match sub_key {
|
||||
SubscriptionKey::All => {
|
||||
assert!(!self.subs_to_all.contains_key(&sub_id));
|
||||
self.subs_to_all.insert(sub_id, sub_tx.clone());
|
||||
SubAnnounce::AddAll(sub_tx)
|
||||
}
|
||||
SubscriptionKey::Timeline(ttid) => {
|
||||
match self.subs_to_timelines.entry(ttid) {
|
||||
Entry::Occupied(mut o) => {
|
||||
let subsenders = o.get_mut();
|
||||
subsenders.push(SubSender(sub_id, sub_tx.clone()));
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(vec![SubSender(sub_id, sub_tx.clone())]);
|
||||
}
|
||||
}
|
||||
SubAnnounce::AddTimeline(ttid, SubSender(sub_id, sub_tx))
|
||||
}
|
||||
};
|
||||
// Collect existing publishers to notify them after lock is released;
|
||||
// TODO: the probability of channels being full here is tiny (publisher
|
||||
// always blocks listening chan), we can try sending first and resort to
|
||||
// cloning if needed.
|
||||
//
|
||||
// Deadlock is possible only if publisher tries to access shared state
|
||||
// during its lifetime, i.e. we add maintenance of set of published
|
||||
// tlis. Otherwise we can just await here (but lock must be replaced
|
||||
// with Tokio one).
|
||||
//
|
||||
// We could also just error out if some chan is full, but that needs
|
||||
// cleanup of incompleted job, and notifying publishers when unregistering
|
||||
// is mandatory anyway.
|
||||
(sub_id, self.pub_txs.values().cloned().collect(), announce)
|
||||
}
|
||||
|
||||
// Unregister the subscriber. Similar to register_subscriber, returns list
|
||||
// of channels through which publishers must be notified about the removal.
|
||||
pub fn unregister_subscriber(
|
||||
&mut self,
|
||||
sub_id: SubId,
|
||||
sub_key: SubscriptionKey,
|
||||
) -> Option<(Vec<Sender<SubAnnounce>>, SubAnnounce)> {
|
||||
// We need to notify existing publishers only about per timeline
|
||||
// subscriptions, 'all' kind is detected on its own through closed
|
||||
// channels.
|
||||
let announce = match sub_key {
|
||||
SubscriptionKey::All => {
|
||||
assert!(self.subs_to_all.contains_key(&sub_id));
|
||||
self.subs_to_all.remove(&sub_id);
|
||||
None
|
||||
}
|
||||
SubscriptionKey::Timeline(ref ttid) => {
|
||||
remove_sub(&mut self.subs_to_timelines, ttid, sub_id);
|
||||
Some(SubAnnounce::RemoveTimeline(*ttid, sub_id))
|
||||
}
|
||||
};
|
||||
announce.map(|a| (self.pub_txs.values().cloned().collect(), a))
|
||||
}
|
||||
|
||||
pub fn report(&mut self) {
|
||||
println!(
|
||||
"registered {} publishers, {} subs to all, {} subs to timelines",
|
||||
self.pub_txs.len(),
|
||||
self.subs_to_all.len(),
|
||||
self.subs_to_timelines.len(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// SharedState wrapper for post-locking operations (sending to pub_tx chans).
|
||||
#[derive(Clone)]
|
||||
struct Registry {
|
||||
shared_state: Arc<Mutex<SharedState>>,
|
||||
}
|
||||
|
||||
impl Registry {
|
||||
// Register new publisher in shared state.
|
||||
pub fn register_publisher(&self) -> Publisher {
|
||||
let (announce_tx, announce_rx) = mpsc::channel(128);
|
||||
let mut ss = self.shared_state.lock().unwrap();
|
||||
let id = ss.register_publisher(announce_tx);
|
||||
let (subs_to_all, subs_to_timelines) = (
|
||||
ss.subs_to_all.values().cloned().collect(),
|
||||
ss.subs_to_timelines.clone(),
|
||||
);
|
||||
drop(ss);
|
||||
// println!("registered publisher {}", id);
|
||||
Publisher {
|
||||
id,
|
||||
announce_rx: announce_rx.into(),
|
||||
subs_to_all,
|
||||
subs_to_timelines,
|
||||
registry: self.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unregister_publisher(&self, publisher: &Publisher) {
|
||||
self.shared_state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.unregister_publisher(publisher.id);
|
||||
// println!("unregistered publisher {}", publisher.id);
|
||||
}
|
||||
|
||||
// Register new subscriber in shared state.
|
||||
pub async fn register_subscriber(&self, sub_key: SubscriptionKey) -> Subscriber {
|
||||
let (tx, rx) = mpsc::channel(CHAN_SIZE);
|
||||
let id;
|
||||
let mut pub_txs;
|
||||
let announce;
|
||||
{
|
||||
let mut ss = self.shared_state.lock().unwrap();
|
||||
(id, pub_txs, announce) = ss.register_subscriber(sub_key, tx);
|
||||
}
|
||||
// Note: it is important to create Subscriber before .await. If client
|
||||
// disconnects during await, which would terminate the Future we still
|
||||
// need to run Subscriber's drop() which will unregister it from the
|
||||
// shared state.
|
||||
let subscriber = Subscriber {
|
||||
id,
|
||||
key: sub_key,
|
||||
sub_rx: rx,
|
||||
registry: self.clone(),
|
||||
};
|
||||
// Notify existing publishers about new subscriber.
|
||||
for pub_tx in pub_txs.iter_mut() {
|
||||
// Closed channel is fine; it means publisher has gone.
|
||||
pub_tx.send(announce.clone()).await.ok();
|
||||
}
|
||||
// println!("registered subscriber {}", id);
|
||||
subscriber
|
||||
}
|
||||
|
||||
// Unregister the subscriber
|
||||
pub fn unregister_subscriber(&self, sub: &Subscriber) {
|
||||
let mut ss = self.shared_state.lock().unwrap();
|
||||
let announce_pack = ss.unregister_subscriber(sub.id, sub.key);
|
||||
drop(ss);
|
||||
// Notify publishers about the removal. Apart from wanting to do it
|
||||
// outside lock, here we also spin a task as Drop impl can't be async.
|
||||
if let Some((mut pub_txs, announce)) = announce_pack {
|
||||
tokio::spawn(async move {
|
||||
for pub_tx in pub_txs.iter_mut() {
|
||||
// Closed channel is fine; it means publisher has gone.
|
||||
pub_tx.send(announce.clone()).await.ok();
|
||||
}
|
||||
});
|
||||
}
|
||||
// println!("unregistered subscriber {}", sub.id);
|
||||
}
|
||||
|
||||
pub async fn report(&self) {
|
||||
let mut interval = time::interval(Duration::from_millis(1000));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
self.shared_state.lock().unwrap().report();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Private subscriber state.
|
||||
struct Subscriber {
|
||||
id: SubId,
|
||||
key: SubscriptionKey,
|
||||
// Subscriber receives messages from publishers here.
|
||||
sub_rx: Receiver<SafekeeperTimelineInfo>,
|
||||
// to unregister itself from shared state in Drop
|
||||
registry: Registry,
|
||||
}
|
||||
|
||||
impl Drop for Subscriber {
|
||||
fn drop(&mut self) {
|
||||
self.registry.unregister_subscriber(self);
|
||||
}
|
||||
}
|
||||
|
||||
// Private publisher state
|
||||
struct Publisher {
|
||||
id: PubId,
|
||||
// new subscribers request to send (or stop sending) msgs them here.
|
||||
// It could be just Receiver, but weirdly it doesn't implement futures_core Stream directly.
|
||||
announce_rx: ReceiverStream<SubAnnounce>,
|
||||
subs_to_all: Vec<Sender<SafekeeperTimelineInfo>>,
|
||||
subs_to_timelines: HashMap<TenantTimelineId, Vec<SubSender>>,
|
||||
// to unregister itself from shared state in Drop
|
||||
registry: Registry,
|
||||
}
|
||||
|
||||
impl Publisher {
|
||||
// Send msg to relevant subscribers.
|
||||
pub fn send_msg(&mut self, msg: &SafekeeperTimelineInfo) -> Result<(), Status> {
|
||||
// send message to subscribers for everything
|
||||
let mut cleanup_subs_to_all = false;
|
||||
for sub in self.subs_to_all.iter() {
|
||||
match sub.try_send(msg.clone()) {
|
||||
Err(TrySendError::Full(_)) => {
|
||||
// println!("dropping message, channel is full");
|
||||
}
|
||||
Err(TrySendError::Closed(_)) => {
|
||||
cleanup_subs_to_all = true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
// some channels got closed (subscriber gone), remove them
|
||||
if cleanup_subs_to_all {
|
||||
self.subs_to_all.retain(|tx| !tx.is_closed());
|
||||
}
|
||||
|
||||
// send message to per timeline subscribers
|
||||
let ttid = parse_proto_ttid(msg.tenant_timeline_id.as_ref().ok_or(Status::new(
|
||||
Code::InvalidArgument,
|
||||
"missing tenant_timeline_id",
|
||||
))?)?;
|
||||
if let Some(subs) = self.subs_to_timelines.get(&ttid) {
|
||||
for tx in subs.iter().map(|sub_sender| &sub_sender.1) {
|
||||
if let Err(TrySendError::Full(_)) = tx.try_send(msg.clone()) {
|
||||
// println!("dropping message, channel is full");
|
||||
}
|
||||
// closed channel is ignored here; we will be notified and remove it soon
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Add/remove subscriber according to sub_announce.
|
||||
pub fn update_sub(&mut self, sub_announce: SubAnnounce) {
|
||||
match sub_announce {
|
||||
SubAnnounce::AddAll(tx) => self.subs_to_all.push(tx),
|
||||
SubAnnounce::AddTimeline(ttid, sub_sender) => {
|
||||
match self.subs_to_timelines.entry(ttid) {
|
||||
Entry::Occupied(mut o) => {
|
||||
let subsenders = o.get_mut();
|
||||
subsenders.push(sub_sender);
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
v.insert(vec![sub_sender]);
|
||||
}
|
||||
}
|
||||
}
|
||||
SubAnnounce::RemoveTimeline(ref ttid, sub_id) => {
|
||||
remove_sub(&mut self.subs_to_timelines, ttid, sub_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Publisher {
|
||||
fn drop(&mut self) {
|
||||
self.registry.unregister_publisher(self);
|
||||
}
|
||||
}
|
||||
|
||||
struct NeonBrokerService {
|
||||
registry: Registry,
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl NeonBroker for NeonBrokerService {
|
||||
async fn publish_safekeeper_info(
|
||||
&self,
|
||||
request: Request<tonic::Streaming<SafekeeperTimelineInfo>>,
|
||||
) -> Result<Response<Empty>, Status> {
|
||||
let mut publisher = self.registry.register_publisher();
|
||||
|
||||
let mut stream = request.into_inner();
|
||||
|
||||
loop {
|
||||
select! {
|
||||
msg = stream.next() => {
|
||||
match msg {
|
||||
Some(Ok(msg)) => {publisher.send_msg(&msg)?;},
|
||||
Some(Err(e)) => {return Err(e);}, // grpc error from the stream
|
||||
None => {break;} // closed stream
|
||||
}
|
||||
}
|
||||
Some(announce) = publisher.announce_rx.next() => {
|
||||
publisher.update_sub(announce);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Response::new(Empty {}))
|
||||
}
|
||||
|
||||
type SubscribeSafekeeperInfoStream =
|
||||
Pin<Box<dyn Stream<Item = Result<SafekeeperTimelineInfo, Status>> + Send + 'static>>;
|
||||
|
||||
async fn subscribe_safekeeper_info(
|
||||
&self,
|
||||
request: Request<SubscribeSafekeeperInfoRequest>,
|
||||
) -> Result<Response<Self::SubscribeSafekeeperInfoStream>, Status> {
|
||||
let proto_key = request.into_inner().subscription_key.ok_or(Status::new(
|
||||
Code::InvalidArgument,
|
||||
"missing subscription key",
|
||||
))?;
|
||||
let sub_key = SubscriptionKey::from_proto_subscription_key(proto_key)?;
|
||||
let mut subscriber = self.registry.register_subscriber(sub_key).await;
|
||||
|
||||
// transform rx into stream with item = Result, as method result demands
|
||||
let output = async_stream::try_stream! {
|
||||
while let Some(info) = subscriber.sub_rx.recv().await {
|
||||
yield info
|
||||
}
|
||||
|
||||
// internal generator
|
||||
// let _ = subscriber.sub_rx.try_recv().ok();
|
||||
// let mut counter = 0;
|
||||
// loop {
|
||||
// let info = SafekeeperTimelineInfo {
|
||||
// safekeeper_id: 1,
|
||||
// tenant_timeline_id: Some(ProtoTenantTimelineId {
|
||||
// tenant_id: vec![0xFF; 16],
|
||||
// timeline_id: vec![0xFF; 16],
|
||||
// // timeline_id: tli_from_u64(counter),
|
||||
// }),
|
||||
// last_log_term: 0,
|
||||
// flush_lsn: counter,
|
||||
// commit_lsn: 2,
|
||||
// backup_lsn: 3,
|
||||
// remote_consistent_lsn: 4,
|
||||
// peer_horizon_lsn: 5,
|
||||
// safekeeper_connstr: "zenith-1-sk-1.local:7676".to_owned(),
|
||||
// };
|
||||
// counter += 1;
|
||||
// yield info;
|
||||
// }
|
||||
};
|
||||
|
||||
Ok(Response::new(
|
||||
Box::pin(output) as Self::SubscribeSafekeeperInfoStream
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// console_subscriber::init();
|
||||
|
||||
let addr = "[::1]:50051".parse()?;
|
||||
let registry = Registry {
|
||||
shared_state: Arc::new(Mutex::new(SharedState {
|
||||
pub_txs: HashMap::new(),
|
||||
next_pub_id: 0,
|
||||
subs_to_all: HashMap::new(),
|
||||
subs_to_timelines: HashMap::new(),
|
||||
next_sub_id: 0,
|
||||
})),
|
||||
};
|
||||
let neon_broker_service = NeonBrokerService {
|
||||
registry: registry.clone(),
|
||||
};
|
||||
|
||||
tokio::spawn(async move { registry.report().await });
|
||||
|
||||
Server::builder()
|
||||
.http2_keepalive_interval(Some(Duration::from_millis(5000)))
|
||||
.add_service(NeonBrokerServer::new(neon_broker_service))
|
||||
.serve(addr)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// parse variable length bytes from protobuf
|
||||
fn parse_proto_ttid(proto_ttid: &ProtoTenantTimelineId) -> Result<TenantTimelineId, Status> {
|
||||
let tenant_id = TenantId::from_vec(&proto_ttid.tenant_id)
|
||||
.map_err(|e| Status::new(Code::InvalidArgument, format!("malformed tenant_id: {}", e)))?;
|
||||
let timeline_id = TimelineId::from_vec(&proto_ttid.timeline_id).map_err(|e| {
|
||||
Status::new(
|
||||
Code::InvalidArgument,
|
||||
format!("malformed timeline_id: {}", e),
|
||||
)
|
||||
})?;
|
||||
Ok(TenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
}
|
||||
@@ -6,7 +6,7 @@ edition = "2021"
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
chrono = "0.4"
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
futures = "0.3.13"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
|
||||
@@ -51,19 +51,53 @@ fn main() -> Result<()> {
|
||||
// TODO: re-use `utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let matches = cli().get_matches();
|
||||
// Env variable is set by `cargo`
|
||||
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
|
||||
let matches = clap::App::new("compute_ctl")
|
||||
.version(version.unwrap_or("unknown"))
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let pgdata = matches
|
||||
.get_one::<String>("pgdata")
|
||||
.expect("PGDATA path is required");
|
||||
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.get_one::<String>("connstr")
|
||||
.value_of("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let spec = matches.value_of("spec");
|
||||
let spec_path = matches.value_of("spec-path");
|
||||
|
||||
// Try to use just 'postgres' if no path is provided
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap();
|
||||
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
|
||||
|
||||
let spec: ComputeSpec = match spec {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
@@ -139,48 +173,3 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> clap::Command {
|
||||
// Env variable is set by `cargo`
|
||||
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
|
||||
clap::Command::new("compute_ctl")
|
||||
.version(version)
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.default_value("postgres")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert()
|
||||
}
|
||||
|
||||
@@ -8,10 +8,11 @@ use std::process::Child;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
use postgres::{Client, Transaction};
|
||||
use serde::Deserialize;
|
||||
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
|
||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||
|
||||
/// Rust representation of Postgres role info with only those fields
|
||||
@@ -168,7 +169,7 @@ impl Database {
|
||||
/// it may require a proper quoting too.
|
||||
pub fn to_pg_options(&self) -> String {
|
||||
let mut params: String = self.options.as_pg_options();
|
||||
write!(params, " OWNER {}", &self.owner.pg_quote())
|
||||
write!(params, " OWNER {}", &self.owner.quote())
|
||||
.expect("String is documented to not to error during write operations");
|
||||
|
||||
params
|
||||
@@ -179,17 +180,18 @@ impl Database {
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
|
||||
/// Generic trait used to provide quoting / encoding for strings used in the
|
||||
/// Postgres SQL queries and DATABASE_URL.
|
||||
pub trait Escaping {
|
||||
fn pg_quote(&self) -> String;
|
||||
/// Generic trait used to provide quoting for strings used in the
|
||||
/// Postgres SQL queries. Currently used only to implement quoting
|
||||
/// of identifiers, but could be used for literals in the future.
|
||||
pub trait PgQuote {
|
||||
fn quote(&self) -> String;
|
||||
}
|
||||
|
||||
impl Escaping for PgIdent {
|
||||
impl PgQuote for PgIdent {
|
||||
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
|
||||
/// always quotes provided string with `""` and escapes every `"`.
|
||||
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
|
||||
fn pg_quote(&self) -> String {
|
||||
/// always quotes provided string with `""` and escapes every `"`. Not idempotent,
|
||||
/// i.e. if string is already escaped it will be escaped again.
|
||||
fn quote(&self) -> String {
|
||||
let result = format!("\"{}\"", self.replace('"', "\"\""));
|
||||
result
|
||||
}
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use log::{info, log_enabled, warn, Level};
|
||||
use postgres::config::Config;
|
||||
use postgres::{Client, NoTls};
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -117,8 +115,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if existing_roles.iter().any(|r| r.name == op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER ROLE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
op.name.quote(),
|
||||
new_name.quote()
|
||||
);
|
||||
|
||||
warn!("renaming role '{}' to '{}'", op.name, new_name);
|
||||
@@ -164,7 +162,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
}
|
||||
|
||||
if update_role {
|
||||
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
|
||||
let mut query: String = format!("ALTER ROLE {} ", name.quote());
|
||||
info_print!(" -> update");
|
||||
|
||||
query.push_str(&role.to_pg_options());
|
||||
@@ -172,7 +170,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
}
|
||||
} else {
|
||||
info!("role name: '{}'", &name);
|
||||
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
|
||||
let mut query: String = format!("CREATE ROLE {} ", name.quote());
|
||||
info!("role create query: '{}'", &query);
|
||||
info_print!(" -> create");
|
||||
|
||||
@@ -181,7 +179,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
let grant_query = format!(
|
||||
"GRANT pg_read_all_data, pg_write_all_data TO {}",
|
||||
name.pg_quote()
|
||||
name.quote()
|
||||
);
|
||||
xact.execute(grant_query.as_str(), &[])?;
|
||||
info!("role grant query: '{}'", &grant_query);
|
||||
@@ -217,7 +215,7 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
|
||||
// We do not check either role exists or not,
|
||||
// Postgres will take care of it for us
|
||||
if op.action == "delete_role" {
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting role '{}'", &op.name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
@@ -232,16 +230,17 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
|
||||
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &node.spec.cluster.databases {
|
||||
if db.owner != *role_name {
|
||||
let mut conf = Config::from_str(node.connstr.as_str())?;
|
||||
conf.dbname(&db.name);
|
||||
let mut connstr = node.connstr.clone();
|
||||
// database name is always the last and the only component of the path
|
||||
connstr.set_path(&db.name);
|
||||
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
|
||||
// This will reassign all dependent objects to the db owner
|
||||
let reassign_query = format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
role_name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
role_name.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info!(
|
||||
"reassigning objects owned by '{}' in db '{}' to '{}'",
|
||||
@@ -250,7 +249,7 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
|
||||
client.simple_query(&reassign_query)?;
|
||||
|
||||
// This now will only drop privileges of the role
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
|
||||
client.simple_query(&drop_query)?;
|
||||
}
|
||||
}
|
||||
@@ -280,7 +279,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
// We do not check either DB exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_db" => {
|
||||
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
|
||||
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting database '{}'", &op.name);
|
||||
client.execute(query.as_str(), &[])?;
|
||||
@@ -292,8 +291,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if existing_dbs.iter().any(|r| r.name == op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
op.name.quote(),
|
||||
new_name.quote()
|
||||
);
|
||||
|
||||
warn!("renaming database '{}' to '{}'", op.name, new_name);
|
||||
@@ -321,7 +320,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
// XXX: db owner name is returned as quoted string from Postgres,
|
||||
// when quoting is needed.
|
||||
let new_owner = if r.owner.starts_with('"') {
|
||||
db.owner.pg_quote()
|
||||
db.owner.quote()
|
||||
} else {
|
||||
db.owner.clone()
|
||||
};
|
||||
@@ -329,15 +328,15 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if new_owner != r.owner {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} OWNER TO {}",
|
||||
name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
name.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info_print!(" -> update");
|
||||
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
} else {
|
||||
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
|
||||
let mut query: String = format!("CREATE DATABASE {} ", name.quote());
|
||||
info_print!(" -> create");
|
||||
|
||||
query.push_str(&db.to_pg_options());
|
||||
@@ -367,7 +366,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
.cluster
|
||||
.roles
|
||||
.iter()
|
||||
.map(|r| r.name.pg_quote())
|
||||
.map(|r| r.name.quote())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for db in &spec.cluster.databases {
|
||||
@@ -375,7 +374,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
|
||||
let query: String = format!(
|
||||
"GRANT CREATE ON DATABASE {} TO {}",
|
||||
dbname.pg_quote(),
|
||||
dbname.quote(),
|
||||
roles.join(", ")
|
||||
);
|
||||
info!("grant query {}", &query);
|
||||
@@ -386,11 +385,12 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||
// atomically.
|
||||
let mut db_connstr = node.connstr.clone();
|
||||
for db in &node.spec.cluster.databases {
|
||||
let mut conf = Config::from_str(node.connstr.as_str())?;
|
||||
conf.dbname(&db.name);
|
||||
// database name is always the last and the only component of the path
|
||||
db_connstr.set_path(&db.name);
|
||||
|
||||
let mut db_client = conf.connect(NoTls)?;
|
||||
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
|
||||
|
||||
// This will only change ownership on the schema itself, not the objects
|
||||
// inside it. Without it owner of the `public` schema will be `cloud_admin`
|
||||
@@ -419,36 +419,9 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;",
|
||||
db.owner.pg_quote()
|
||||
db.owner.quote()
|
||||
);
|
||||
db_client.simple_query(&alter_query)?;
|
||||
|
||||
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
|
||||
// This is needed because since postgres 15 this privilege is removed by default.
|
||||
let grant_query = "DO $$\n\
|
||||
BEGIN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT nspname\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
) AND\n\
|
||||
current_setting('server_version_num')::int/10000 >= 15\n\
|
||||
THEN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT rolname\n\
|
||||
FROM pg_catalog.pg_roles\n\
|
||||
WHERE rolname = 'web_access'\n\
|
||||
)\n\
|
||||
THEN\n\
|
||||
GRANT CREATE ON SCHEMA public TO web_access;\n\
|
||||
END IF;\n\
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;"
|
||||
.to_string();
|
||||
|
||||
info!("grant query for db {} : {}", &db.name, &grant_query);
|
||||
db_client.simple_query(&grant_query)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -33,9 +33,9 @@ mod pg_helpers_tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_pg_quote() {
|
||||
fn quote_ident() {
|
||||
let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
|
||||
|
||||
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
|
||||
assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
comfy-table = "6.1"
|
||||
git-version = "0.3.5"
|
||||
tar = "0.4.38"
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
//! rely on `neon_local` to set up the environment for each test.
|
||||
//!
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
|
||||
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env::{EtcdBroker, LocalEnv};
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
@@ -85,7 +85,212 @@ struct TimelineTreeEl {
|
||||
// * Providing CLI api to the pageserver
|
||||
// * TODO: export/import to/from usual postgres
|
||||
fn main() -> Result<()> {
|
||||
let matches = cli().get_matches();
|
||||
let branch_name_arg = Arg::new("branch-name")
|
||||
.long("branch-name")
|
||||
.takes_value(true)
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
let tenant_id_arg = Arg::new("tenant-id")
|
||||
.long("tenant-id")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let timeline_id_arg = Arg::new("timeline-id")
|
||||
.long("timeline-id")
|
||||
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let pg_version_arg = Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.default_value(DEFAULT_PG_VERSION);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
.required(false)
|
||||
.value_name("port");
|
||||
|
||||
let stop_mode_arg = Arg::new("stop-mode")
|
||||
.short('m')
|
||||
.takes_value(true)
|
||||
.possible_values(&["fast", "immediate"])
|
||||
.help("If 'immediate', don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::new("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple_occurrences(true)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let lsn_arg = Arg::new("lsn")
|
||||
.long("lsn")
|
||||
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
|
||||
.takes_value(true)
|
||||
.required(false);
|
||||
|
||||
let matches = App::new("Neon CLI")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
App::new("init")
|
||||
.about("Initialize a new Neon repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
App::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.subcommand(App::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name").takes_value(true)
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn").takes_value(true)
|
||||
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a new blank timeline")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("import")
|
||||
.about("Import timeline from basebackup directory")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(Arg::new("node-name").long("node-name").takes_value(true)
|
||||
.help("Name to assign to the imported timeline"))
|
||||
.arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
|
||||
.help("Basebackup tarfile to import"))
|
||||
.arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
|
||||
.help("Lsn the basebackup starts at"))
|
||||
.arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
|
||||
.help("Wal to add after base"))
|
||||
.arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
|
||||
.help("Lsn the basebackup ends at"))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
).subcommand(
|
||||
App::new("tenant")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage tenants")
|
||||
.subcommand(App::new("list"))
|
||||
.subcommand(App::new("create")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("config")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("pageserver")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(App::new("status"))
|
||||
.subcommand(App::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(App::new("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(App::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
|
||||
)
|
||||
.subcommand(
|
||||
App::new("safekeeper")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage safekeepers")
|
||||
.subcommand(App::new("start")
|
||||
.about("Start local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("stop")
|
||||
.about("Stop local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("restart")
|
||||
.about("Restart local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("pg")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(App::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(App::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
.long("config-only")
|
||||
.required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(App::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
.subcommand(
|
||||
App::new("start")
|
||||
.about("Start page server and safekeepers")
|
||||
.arg(pageserver_config_args)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.about("Stop page server and safekeepers")
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let (sub_name, sub_args) = match matches.subcommand() {
|
||||
Some(subcommand_data) => subcommand_data,
|
||||
@@ -270,16 +475,16 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
|
||||
|
||||
fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
|
||||
sub_match
|
||||
.get_one::<String>("tenant-id")
|
||||
.map(|tenant_id| TenantId::from_str(tenant_id))
|
||||
.value_of("tenant-id")
|
||||
.map(TenantId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse tenant id from the argument string")
|
||||
}
|
||||
|
||||
fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
|
||||
sub_match
|
||||
.get_one::<String>("timeline-id")
|
||||
.map(|timeline_id| TimelineId::from_str(timeline_id))
|
||||
.value_of("timeline-id")
|
||||
.map(TimelineId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse timeline id from the argument string")
|
||||
}
|
||||
@@ -288,22 +493,19 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
let initial_timeline_id_arg = parse_timeline_id(init_match)?;
|
||||
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
|
||||
let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
|
||||
// load and parse the file
|
||||
std::fs::read_to_string(config_path).with_context(|| {
|
||||
format!(
|
||||
"Could not read configuration file '{}'",
|
||||
config_path.display()
|
||||
)
|
||||
})?
|
||||
std::fs::read_to_string(std::path::Path::new(config_path))
|
||||
.with_context(|| format!("Could not read configuration file '{config_path}'"))?
|
||||
} else {
|
||||
// Built-in default config
|
||||
default_conf(&EtcdBroker::locate_etcd()?)
|
||||
};
|
||||
|
||||
let pg_version = init_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut env =
|
||||
@@ -339,10 +541,9 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
|
||||
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
|
||||
init_match
|
||||
.get_many::<String>("pageserver-config-override")
|
||||
.values_of("pageserver-config-override")
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|s| s.as_str())
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -357,7 +558,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
Some(("create", create_match)) => {
|
||||
let initial_tenant_id = parse_tenant_id(create_match)?;
|
||||
let tenant_conf: HashMap<_, _> = create_match
|
||||
.get_many::<String>("config")
|
||||
.values_of("config")
|
||||
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
|
||||
.unwrap_or_default();
|
||||
let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
|
||||
@@ -366,8 +567,9 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
// Create an initial timeline for the new tenant
|
||||
let new_timeline_id = parse_timeline_id(create_match)?;
|
||||
let pg_version = create_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let timeline_info = pageserver.timeline_create(
|
||||
@@ -393,7 +595,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
Some(("config", create_match)) => {
|
||||
let tenant_id = get_tenant_id(create_match, env)?;
|
||||
let tenant_conf: HashMap<_, _> = create_match
|
||||
.get_many::<String>("config")
|
||||
.values_of("config")
|
||||
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
@@ -420,12 +622,13 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
Some(("create", create_match)) => {
|
||||
let tenant_id = get_tenant_id(create_match, env)?;
|
||||
let new_branch_name = create_match
|
||||
.get_one::<String>("branch-name")
|
||||
.value_of("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
|
||||
let pg_version = create_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let timeline_info =
|
||||
@@ -444,32 +647,35 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
let tenant_id = get_tenant_id(import_match, env)?;
|
||||
let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
|
||||
let name = import_match
|
||||
.get_one::<String>("node-name")
|
||||
.value_of("node-name")
|
||||
.ok_or_else(|| anyhow!("No node name provided"))?;
|
||||
|
||||
// Parse base inputs
|
||||
let base_tarfile = import_match
|
||||
.get_one::<PathBuf>("base-tarfile")
|
||||
.ok_or_else(|| anyhow!("No base-tarfile provided"))?
|
||||
.to_owned();
|
||||
.value_of("base-tarfile")
|
||||
.map(|s| PathBuf::from_str(s).unwrap())
|
||||
.ok_or_else(|| anyhow!("No base-tarfile provided"))?;
|
||||
let base_lsn = Lsn::from_str(
|
||||
import_match
|
||||
.get_one::<String>("base-lsn")
|
||||
.value_of("base-lsn")
|
||||
.ok_or_else(|| anyhow!("No base-lsn provided"))?,
|
||||
)?;
|
||||
let base = (base_lsn, base_tarfile);
|
||||
|
||||
// Parse pg_wal inputs
|
||||
let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
|
||||
let wal_tarfile = import_match
|
||||
.value_of("wal-tarfile")
|
||||
.map(|s| PathBuf::from_str(s).unwrap());
|
||||
let end_lsn = import_match
|
||||
.get_one::<String>("end-lsn")
|
||||
.value_of("end-lsn")
|
||||
.map(|s| Lsn::from_str(s).unwrap());
|
||||
// TODO validate both or none are provided
|
||||
let pg_wal = end_lsn.zip(wal_tarfile);
|
||||
|
||||
let pg_version = import_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
@@ -484,11 +690,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
Some(("branch", branch_match)) => {
|
||||
let tenant_id = get_tenant_id(branch_match, env)?;
|
||||
let new_branch_name = branch_match
|
||||
.get_one::<String>("branch-name")
|
||||
.value_of("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
let ancestor_branch_name = branch_match
|
||||
.get_one::<String>("ancestor-branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("ancestor-branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let ancestor_timeline_id = env
|
||||
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
|
||||
@@ -497,8 +702,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
})?;
|
||||
|
||||
let start_lsn = branch_match
|
||||
.get_one::<String>("ancestor-start-lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("ancestor-start-lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||
let timeline_info = pageserver.timeline_create(
|
||||
@@ -599,39 +804,45 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
}
|
||||
"create" => {
|
||||
let branch_name = sub_args
|
||||
.get_one::<String>("branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.map(|node_name| node_name.to_string())
|
||||
.unwrap_or_else(|| format!("{branch_name}_node"));
|
||||
.value_of("node")
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_else(|| format!("{}_node", branch_name));
|
||||
|
||||
let lsn = sub_args
|
||||
.get_one::<String>("lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
|
||||
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{}'", branch_name))?;
|
||||
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let pg_version = sub_args
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
|
||||
}
|
||||
"start" => {
|
||||
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
|
||||
let port: Option<u16> = match sub_args.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
|
||||
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
|
||||
let node = cplane.nodes.get(&(tenant_id, node_name.to_owned()));
|
||||
|
||||
let auth_token = if matches!(env.pageserver.auth_type, AuthType::NeonJWT) {
|
||||
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
|
||||
@@ -642,33 +853,36 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
};
|
||||
|
||||
if let Some(node) = node {
|
||||
println!("Starting existing postgres {node_name}...");
|
||||
println!("Starting existing postgres {}...", node_name);
|
||||
node.start(&auth_token)?;
|
||||
} else {
|
||||
let branch_name = sub_args
|
||||
.get_one::<String>("branch-name")
|
||||
.map(|s| s.as_str())
|
||||
.value_of("branch-name")
|
||||
.unwrap_or(DEFAULT_BRANCH_NAME);
|
||||
let timeline_id = env
|
||||
.get_branch_timeline_id(branch_name, tenant_id)
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Found no timeline id for branch name '{branch_name}'")
|
||||
anyhow!("Found no timeline id for branch name '{}'", branch_name)
|
||||
})?;
|
||||
let lsn = sub_args
|
||||
.get_one::<String>("lsn")
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.value_of("lsn")
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse Lsn from the request")?;
|
||||
let pg_version = sub_args
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.context("Failed to `pg-version` from the argument string")?;
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
// when used with custom port this results in non obvious behaviour
|
||||
// port is remembered from first start command, i e
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
|
||||
println!(
|
||||
"Starting new postgres (v{}) {} on timeline {} ...",
|
||||
pg_version, node_name, timeline_id
|
||||
);
|
||||
|
||||
let node =
|
||||
cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
|
||||
@@ -677,18 +891,18 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
}
|
||||
"stop" => {
|
||||
let node_name = sub_args
|
||||
.get_one::<String>("node")
|
||||
.value_of("node")
|
||||
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
|
||||
let destroy = sub_args.get_flag("destroy");
|
||||
let destroy = sub_args.is_present("destroy");
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenant_id, node_name.to_string()))
|
||||
.with_context(|| format!("postgres {node_name} is not found"))?;
|
||||
.get(&(tenant_id, node_name.to_owned()))
|
||||
.with_context(|| format!("postgres {} is not found", node_name))?;
|
||||
node.stop(destroy)?;
|
||||
}
|
||||
|
||||
_ => bail!("Unexpected pg subcommand '{sub_name}'"),
|
||||
_ => bail!("Unexpected pg subcommand '{}'", sub_name),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -706,10 +920,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
Some(("stop", stop_match)) => {
|
||||
let immediate = stop_match
|
||||
.get_one::<String>("stop-mode")
|
||||
.map(|s| s.as_str())
|
||||
== Some("immediate");
|
||||
let immediate = stop_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
@@ -759,7 +970,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
};
|
||||
|
||||
// All the commands take an optional safekeeper name argument
|
||||
let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
|
||||
let sk_id = if let Some(id_str) = sub_args.value_of("id") {
|
||||
NodeId(id_str.parse().context("while parsing safekeeper id")?)
|
||||
} else {
|
||||
DEFAULT_SAFEKEEPER_ID
|
||||
@@ -775,8 +986,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
"stop" => {
|
||||
let immediate =
|
||||
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
@@ -785,8 +995,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
"restart" => {
|
||||
let immediate =
|
||||
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
@@ -830,8 +1039,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
|
||||
}
|
||||
|
||||
fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let immediate =
|
||||
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||
let immediate = sub_match.value_of("stop-mode") == Some("immediate");
|
||||
|
||||
let pageserver = PageServerNode::from_env(env);
|
||||
|
||||
@@ -864,219 +1072,3 @@ fn try_stop_etcd_process(env: &local_env::LocalEnv) {
|
||||
eprintln!("etcd stop failed: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
let branch_name_arg = Arg::new("branch-name")
|
||||
.long("branch-name")
|
||||
.help("Name of the branch to be created or used as an alias for other services")
|
||||
.required(false);
|
||||
|
||||
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
|
||||
|
||||
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
|
||||
|
||||
let tenant_id_arg = Arg::new("tenant-id")
|
||||
.long("tenant-id")
|
||||
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
|
||||
.required(false);
|
||||
|
||||
let timeline_id_arg = Arg::new("timeline-id")
|
||||
.long("timeline-id")
|
||||
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
|
||||
.required(false);
|
||||
|
||||
let pg_version_arg = Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u32))
|
||||
.default_value(DEFAULT_PG_VERSION);
|
||||
|
||||
let port_arg = Arg::new("port")
|
||||
.long("port")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(u16))
|
||||
.value_name("port");
|
||||
|
||||
let stop_mode_arg = Arg::new("stop-mode")
|
||||
.short('m')
|
||||
.value_parser(["fast", "immediate"])
|
||||
.help("If 'immediate', don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::new("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.num_args(1)
|
||||
.action(ArgAction::Append)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let lsn_arg = Arg::new("lsn")
|
||||
.long("lsn")
|
||||
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
|
||||
.required(false);
|
||||
|
||||
Command::new("Neon CLI")
|
||||
.arg_required_else_help(true)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
Command::new("init")
|
||||
.about("Initialize a new Neon repository")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.subcommand(Command::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
.subcommand(Command::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
|
||||
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
|
||||
.subcommand(Command::new("create")
|
||||
.about("Create a new blank timeline")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("import")
|
||||
.about("Import timeline from basebackup directory")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(Arg::new("node-name").long("node-name")
|
||||
.help("Name to assign to the imported timeline"))
|
||||
.arg(Arg::new("base-tarfile")
|
||||
.long("base-tarfile")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Basebackup tarfile to import")
|
||||
)
|
||||
.arg(Arg::new("base-lsn").long("base-lsn")
|
||||
.help("Lsn the basebackup starts at"))
|
||||
.arg(Arg::new("wal-tarfile")
|
||||
.long("wal-tarfile")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Wal to add after base")
|
||||
)
|
||||
.arg(Arg::new("end-lsn").long("end-lsn")
|
||||
.help("Lsn the basebackup ends at"))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
).subcommand(
|
||||
Command::new("tenant")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage tenants")
|
||||
.subcommand(Command::new("list"))
|
||||
.subcommand(Command::new("create")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
|
||||
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("config")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pageserver")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage pageserver")
|
||||
.subcommand(Command::new("status"))
|
||||
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(Command::new("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
.subcommand(Command::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("safekeeper")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage safekeepers")
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("stop")
|
||||
.about("Stop local safekeeper")
|
||||
.arg(safekeeper_id_arg.clone())
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("restart")
|
||||
.about("Restart local safekeeper")
|
||||
.arg(safekeeper_id_arg)
|
||||
.arg(stop_mode_arg.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pg")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(Command::new("list").arg(tenant_id_arg.clone()))
|
||||
.subcommand(Command::new("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(lsn_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
.long("config-only")
|
||||
.required(false))
|
||||
.arg(pg_version_arg.clone())
|
||||
)
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(pg_node_arg.clone())
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(branch_name_arg)
|
||||
.arg(timeline_id_arg)
|
||||
.arg(lsn_arg)
|
||||
.arg(port_arg)
|
||||
.arg(pg_version_arg)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
.arg(pg_node_arg)
|
||||
.arg(tenant_id_arg)
|
||||
.arg(
|
||||
Arg::new("destroy")
|
||||
.help("Also delete data directory (now optional, should be default in future)")
|
||||
.long("destroy")
|
||||
.action(ArgAction::SetTrue)
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("start")
|
||||
.about("Start page server and safekeepers")
|
||||
.arg(pageserver_config_args)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
.about("Stop page server and safekeepers")
|
||||
.arg(stop_mode_arg)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -183,18 +183,18 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
|
||||
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
|
||||
let pg_path = self.env.pg_bin_dir(pg_version).join("postgres");
|
||||
let mut cmd = Command::new(&pg_path);
|
||||
|
||||
cmd.arg("--sync-safekeepers")
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env("PGDATA", self.pgdata().to_str().unwrap())
|
||||
.stdout(Stdio::piped())
|
||||
@@ -282,7 +282,9 @@ impl PostgresNode {
|
||||
fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
|
||||
let mut conf = PostgresConf::new();
|
||||
conf.append("max_wal_senders", "10");
|
||||
conf.append("wal_log_hints", "off");
|
||||
// wal_log_hints is mandatory when running against pageserver (see gh issue#192)
|
||||
// TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
|
||||
conf.append("wal_log_hints", "on");
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
conf.append("shared_buffers", "1MB");
|
||||
@@ -420,7 +422,7 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
|
||||
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version).join("pg_ctl");
|
||||
let mut cmd = Command::new(pg_ctl_path);
|
||||
cmd.args(
|
||||
[
|
||||
@@ -438,11 +440,11 @@ impl PostgresNode {
|
||||
.env_clear()
|
||||
.env(
|
||||
"LD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
|
||||
)
|
||||
.env(
|
||||
"DYLD_LIBRARY_PATH",
|
||||
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
|
||||
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
|
||||
);
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("ZENITH_AUTH_TOKEN", token);
|
||||
|
||||
@@ -201,28 +201,28 @@ impl LocalEnv {
|
||||
self.pg_distrib_dir.clone()
|
||||
}
|
||||
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => path.join(format!("v{pg_version}")),
|
||||
15 => path.join(format!("v{pg_version}")),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -422,10 +422,10 @@ impl LocalEnv {
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.display()
|
||||
);
|
||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||
if !self.pg_bin_dir(pg_version).join("postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_bin_dir(pg_version)?.display()
|
||||
self.pg_bin_dir(pg_version).display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
|
||||
@@ -123,6 +123,7 @@ impl SafekeeperNode {
|
||||
.args(&["--id", self.id.to_string().as_ref()])
|
||||
.args(&["--listen-pg", &listen_pg])
|
||||
.args(&["--listen-http", &listen_http])
|
||||
.args(&["--recall", "1 second"])
|
||||
.arg("--daemonize"),
|
||||
);
|
||||
if !self.conf.sync {
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
PG_VERSION=${PG_VERSION:-14}
|
||||
|
||||
SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
|
||||
SPEC_FILE=/tmp/spec.json
|
||||
|
||||
echo "Waiting pageserver become ready."
|
||||
while ! nc -z pageserver 6400; do
|
||||
sleep 1;
|
||||
done
|
||||
echo "Page server is ready."
|
||||
|
||||
echo "Create a tenant and timeline"
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{}"
|
||||
http://pageserver:9898/v1/tenant/
|
||||
)
|
||||
tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
|
||||
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
echo "Overwrite tenant id and timeline id in spec file"
|
||||
tenant_id=$(echo ${result} | jq -r .tenant_id)
|
||||
timeline_id=$(echo ${result} | jq -r .timeline_id)
|
||||
|
||||
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
|
||||
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
|
||||
|
||||
cat ${SPEC_FILE}
|
||||
|
||||
echo "Start compute node"
|
||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||
-b /usr/local/bin/postgres \
|
||||
-S ${SPEC_FILE}
|
||||
@@ -1,141 +0,0 @@
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2022-10-12T18:00:00.000Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "docker_compose",
|
||||
"name": "docker_compose_test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
{
|
||||
"name": "cloud_admin",
|
||||
"encrypted_password": "b093c0d3b281ba6da1eacc608620abd8",
|
||||
"options": null
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "replica",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": "55433",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "1MB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "5s",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_keep_size",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "restart_after_crash",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "safekeeper1:5454,safekeeper2:5454,safekeeper3:5454",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "TIMELINE_ID",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "TENANT_ID",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=pageserver port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_write_lag",
|
||||
"value": "500MB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_flush_lag",
|
||||
"value": "10GB",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"delta_operations": [
|
||||
]
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.4
|
||||
ports:
|
||||
- 2379:2379
|
||||
- 2380:2380
|
||||
environment:
|
||||
# This signifficantly speeds up etcd and we anyway don't data persistency there.
|
||||
ETCD_UNSAFE_NO_FSYNC: "1"
|
||||
command:
|
||||
- "etcd"
|
||||
- "--auto-compaction-mode=revision"
|
||||
- "--auto-compaction-retention=1"
|
||||
- "--name=etcd-cluster"
|
||||
- "--initial-cluster-state=new"
|
||||
- "--initial-cluster-token=etcd-cluster-1"
|
||||
- "--initial-cluster=etcd-cluster=http://etcd:2380"
|
||||
- "--initial-advertise-peer-urls=http://etcd:2380"
|
||||
- "--advertise-client-urls=http://etcd:2379"
|
||||
- "--listen-client-urls=http://0.0.0.0:2379"
|
||||
- "--listen-peer-urls=http://0.0.0.0:2380"
|
||||
- "--quota-backend-bytes=134217728" # 128 MB
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minio
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
command: server /data --address :9000 --console-address ":9001"
|
||||
|
||||
minio_create_buckets:
|
||||
image: minio/mc
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minio
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
|
||||
echo 'Waiting to start minio...' && sleep 1;
|
||||
done;
|
||||
/usr/bin/mc mb minio/neon --region=eu-north-1;
|
||||
exit 0;"
|
||||
depends_on:
|
||||
- minio
|
||||
|
||||
pageserver:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- BROKER_ENDPOINT='http://etcd:2379'
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 6400:6400 # pg protocol handler
|
||||
- 9898:9898 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "/usr/local/bin/pageserver -D /data/.neon/
|
||||
-c \"broker_endpoints=[$$BROKER_ENDPOINT]\"
|
||||
-c \"listen_pg_addr='0.0.0.0:6400'\"
|
||||
-c \"listen_http_addr='0.0.0.0:9898'\"
|
||||
-c \"remote_storage={endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/pageserver/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper1:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
|
||||
- SAFEKEEPER_ID=1
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7676:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper2:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
|
||||
- SAFEKEEPER_ID=2
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7677:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
safekeeper3:
|
||||
image: neondatabase/neon:${TAG:-latest}
|
||||
environment:
|
||||
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
|
||||
- SAFEKEEPER_ID=3
|
||||
- BROKER_ENDPOINT=http://etcd:2379
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
#- RUST_BACKTRACE=1
|
||||
ports:
|
||||
#- 5454:5454 # pg protocol handler
|
||||
- 7678:7676 # http endpoints
|
||||
entrypoint:
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
|
||||
--listen-http='0.0.0.0:7676'
|
||||
--id=$$SAFEKEEPER_ID
|
||||
--broker-endpoints=$$BROKER_ENDPOINT
|
||||
-D /data
|
||||
--remote-storage=\"{endpoint='http://minio:9000',
|
||||
bucket_name='neon',
|
||||
bucket_region='eu-north-1',
|
||||
prefix_in_bucket='/safekeeper/'}\""
|
||||
depends_on:
|
||||
- etcd
|
||||
- minio_create_buckets
|
||||
|
||||
compute:
|
||||
build:
|
||||
context: ./image/compute
|
||||
args:
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-14}
|
||||
#- RUST_BACKTRACE=1
|
||||
volumes:
|
||||
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
|
||||
- ./compute/shell/:/shell/
|
||||
ports:
|
||||
- 55433:55433 # pg protocol handler
|
||||
- 3080:3080 # http endpoints
|
||||
entrypoint:
|
||||
- "/shell/compute.sh"
|
||||
depends_on:
|
||||
- safekeeper1
|
||||
- safekeeper2
|
||||
- safekeeper3
|
||||
- pageserver
|
||||
|
||||
compute_is_ready:
|
||||
image: postgres:latest
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
command:
|
||||
- "until pg_isready -h compute -p 55433 ; do
|
||||
echo 'Waiting to start compute...' && sleep 1;
|
||||
done"
|
||||
depends_on:
|
||||
- compute
|
||||
@@ -1,10 +0,0 @@
|
||||
ARG COMPUTE_IMAGE=compute-node-v14:latest
|
||||
FROM neondatabase/${COMPUTE_IMAGE}
|
||||
|
||||
USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
netcat
|
||||
|
||||
USER postgres
|
||||
@@ -80,6 +80,4 @@
|
||||
- [015-storage-messaging](rfcs/015-storage-messaging.md)
|
||||
- [016-connection-routing](rfcs/016-connection-routing.md)
|
||||
- [017-timeline-data-management](rfcs/017-timeline-data-management.md)
|
||||
- [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
|
||||
- [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
|
||||
- [cluster-size-limits](rfcs/cluster-size-limits.md)
|
||||
|
||||
@@ -18,67 +18,3 @@ We build all images after a successful `release` tests run and push automaticall
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
|
||||
|
||||
2. `neondatabase/neon`
|
||||
|
||||
## Docker Compose example
|
||||
|
||||
You can see a [docker compose](https://docs.docker.com/compose/) example to create a neon cluster in [/docker-compose/docker-compose.yml](/docker-compose/docker-compose.yml). It creates the following conatainers.
|
||||
|
||||
- etcd x 1
|
||||
- pageserver x 1
|
||||
- safekeeper x 3
|
||||
- compute x 1
|
||||
- MinIO x 1 # This is Amazon S3 compatible object storage
|
||||
|
||||
### How to use
|
||||
|
||||
1. create containers
|
||||
|
||||
You can specify version of neon cluster using following environment values.
|
||||
- PG_VERSION: postgres version for compute (default is 14)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags) (default is latest), which is tagged in [CI test](/.github/workflows/build_and_test.yml)
|
||||
```
|
||||
$ cd docker-compose/docker-compose.yml
|
||||
$ docker-compose down # remove the conainers if exists
|
||||
$ PG_VERSION=15 TAG=2221 docker-compose up --build -d # You can specify the postgres and image version
|
||||
Creating network "dockercompose_default" with the default driver
|
||||
Creating dockercompose_etcd3_1 ...
|
||||
(...omit...)
|
||||
```
|
||||
|
||||
2. connect compute node
|
||||
```
|
||||
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
||||
$ psql -h localhost -p 55433 -U cloud_admin
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t values(1,1);
|
||||
INSERT 0 1
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
-----+-------
|
||||
1 | 1
|
||||
(1 row)
|
||||
```
|
||||
|
||||
3. If you want to see the log, you can use `docker-compose logs` command.
|
||||
```
|
||||
# check the container name you want to see
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
d6968a5ae912 dockercompose_compute "/shell/compute.sh" 5 minutes ago Up 5 minutes 0.0.0.0:3080->3080/tcp, 0.0.0.0:55433->55433/tcp dockercompose_compute_1
|
||||
(...omit...)
|
||||
|
||||
$ docker logs -f dockercompose_compute_1
|
||||
2022-10-21 06:15:48.757 GMT [56] LOG: connection authorized: user=cloud_admin database=postgres application_name=psql
|
||||
2022-10-21 06:17:00.307 GMT [56] LOG: [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'
|
||||
(...omit...)
|
||||
```
|
||||
|
||||
4. If you want to see durable data in MinIO which is s3 compatible storage
|
||||
|
||||
Access http://localhost:9001 and sign in.
|
||||
|
||||
- Username: `minio`
|
||||
- Password: `password`
|
||||
|
||||
You can see durable pages and WAL data in `neon` bucket.
|
||||
@@ -1,91 +0,0 @@
|
||||
# Managing Tenant and Timeline lifecycles
|
||||
|
||||
## Summary
|
||||
|
||||
The pageserver has a Tenant object in memory for each tenant it manages, and a
|
||||
Timeline for each timeline. There are a lot of tasks that operate on the tenants
|
||||
and timelines with references to those objects. We have some mechanisms to track
|
||||
which tasks are operating on each Tenant and Timeline, and to request them to
|
||||
shutdown when a tenant or timeline is deleted, but it does not cover all uses,
|
||||
and as a result we have many race conditions around tenant/timeline shutdown.
|
||||
|
||||
## Motivation
|
||||
|
||||
We have a bunch of race conditions that can produce weird errors and can be hard
|
||||
to track down.
|
||||
|
||||
## Non Goals
|
||||
|
||||
This RFC only covers the problem of ensuring that a task/thread isn't operating
|
||||
on a Tenant or Timeline. It does not cover what states, aside from Active and
|
||||
non-Active, each Tenant and Timeline should have, or when exactly the transitions
|
||||
should happen.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Pageserver. Although I wonder if the safekeeper should have a similar mechanism.
|
||||
|
||||
## Current situation
|
||||
|
||||
Most pageserver tasks of are managed by task_mgr.rs:
|
||||
|
||||
- LibpqEndpointListener
|
||||
- HttpEndPointListener
|
||||
- WalReceiverManager and -Connection
|
||||
- GarbageCollector and Compaction
|
||||
- InitialLogicalSizeCalculation
|
||||
|
||||
In addition to those tasks, the walreceiver performs some direct tokio::spawn
|
||||
calls to spawn tasks that are not registered with 'task_mgr'. And all of these
|
||||
tasks can spawn extra operations with tokio spawn_blocking.
|
||||
|
||||
Whenever a tenant or timeline is removed from the system, by pageserver
|
||||
shutdown, delete_timeline or tenant-detach operation, we rely on the task
|
||||
registry in 'task_mgr.rs' to wait until there are no tasks operating on the
|
||||
tenant or timeline, before its Tenant/Timeline object is removed. That relies on
|
||||
each task to register itself with the tenant/timeline ID in
|
||||
'task_mgr.rs'. However, there are many gaps in that. For example,
|
||||
GarbageCollection and Compaction tasks are registered with the tenant, but when
|
||||
they proceed to operate on a particular timeline of the tenant, they don't
|
||||
register with timeline ID. Because of that, the timeline can be deleted while GC
|
||||
or compaction is running on it, causing failures in the GC or compaction (see
|
||||
https://github.com/neondatabase/neon/issues/2442).
|
||||
|
||||
Another problem is that the task registry only works for tokio Tasks. There is
|
||||
no way to register a piece of code that runs inside spawn_blocking(), for
|
||||
example.
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
This "voluntary" registration of tasks is fragile. Let's use Rust language features
|
||||
to enforce that a tenant/timeline cannot be removed from the system when there is
|
||||
still some code operating on it.
|
||||
|
||||
Let's introduce new Guard objects for Tenant and Timeline, and do all actions through
|
||||
the Guard object. Something like:
|
||||
|
||||
TenantActiveGuard: Guard object over Arc<Tenant>. When you acquire the guard,
|
||||
the code checks that the tenant is in Active state. If it's not, you get an
|
||||
error. You can change the state of the tenant to Stopping while there are
|
||||
ActiveTenantGuard objects still on it, to prevent new ActiveTenantGuards from
|
||||
being acquired, but the Tenant cannot be removed until all the guards are gone.
|
||||
|
||||
TenantMaintenanceGuard: Like ActiveTenantGuard, but can be held even when the
|
||||
tenant is not in Active state. Used for operations like attach/detach. Perhaps
|
||||
allow only one such guard on a Tenant at a time.
|
||||
|
||||
Similarly for Timelines. We don't currentl have a "state" on Timeline, but I think
|
||||
we need at least two states: Active and Stopping. The Stopping state is used at
|
||||
deletion, to prevent new TimelineActiveGuards from appearing, while you wait for
|
||||
existing TimelineActiveGuards to die out.
|
||||
|
||||
The shutdown-signaling, using shutdown_watcher() and is_shutdown_requested(),
|
||||
probably also needs changes to deal with the new Guards. The rule is that if you
|
||||
have a TenantActiveGuard, and the tenant's state changes from Active to
|
||||
Stopping, the is_shutdown_requested() function should return true, and
|
||||
shutdown_watcher() future should return.
|
||||
|
||||
This signaling doesn't neessarily need to cover all cases. For example, if you
|
||||
have a block of code in spawn_blocking(), it might be acceptable if
|
||||
is_shutdown_requested() doesn't return true even though the tenant is in
|
||||
Stopping state, as long as the code finishes reasonably fast.
|
||||
@@ -7,9 +7,6 @@ edition = "2021"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
const_format = "0.2.21"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
bytes = "1.0.1"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -2,7 +2,6 @@ use const_format::formatcp;
|
||||
|
||||
/// Public API types
|
||||
pub mod models;
|
||||
pub mod reltag;
|
||||
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
|
||||
@@ -7,10 +7,6 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// A state of a tenant in pageserver's memory.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum TenantState {
|
||||
@@ -23,22 +19,6 @@ pub enum TenantState {
|
||||
Broken,
|
||||
}
|
||||
|
||||
/// A state of a timeline in pageserver's memory.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub enum TimelineState {
|
||||
/// Timeline is fully operational, its background jobs are running.
|
||||
Active,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate.
|
||||
/// The status indicates, that the timeline could eventually go back to Active automatically:
|
||||
/// for example, if the owning tenant goes back to Active again.
|
||||
Suspended,
|
||||
/// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to
|
||||
/// automatically become Active after certain events: only a management call can change this status.
|
||||
Paused,
|
||||
/// A timeline is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
|
||||
Broken,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
@@ -180,8 +160,6 @@ pub struct TimelineInfo {
|
||||
pub remote_consistent_lsn: Option<Lsn>,
|
||||
pub awaits_download: bool,
|
||||
|
||||
pub state: TimelineState,
|
||||
|
||||
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
|
||||
// compatility with older clients.
|
||||
pub local: LocalTimelineInfo,
|
||||
@@ -223,160 +201,3 @@ pub struct FailpointConfig {
|
||||
pub struct TimelineGcRequest {
|
||||
pub gc_horizon: Option<u64>,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub latest: bool,
|
||||
pub lsn: Lsn,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
pub fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// TODO these gets can fail
|
||||
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.get_u8();
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
blkno: body.get_u32(),
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
dbnode: body.get_u32(),
|
||||
})),
|
||||
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(100); /* tag from pagestore_client.h */
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(101); /* tag from pagestore_client.h */
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(102); /* tag from pagestore_client.h */
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(103); /* tag from pagestore_client.h */
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ crc32c = "0.6.0"
|
||||
hex = "0.4.3"
|
||||
once_cell = "1.13.0"
|
||||
log = "0.4.14"
|
||||
memoffset = "0.7"
|
||||
memoffset = "0.6.2"
|
||||
thiserror = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
utils = { path = "../utils" }
|
||||
@@ -26,4 +26,4 @@ wal_craft = { path = "wal_craft" }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0"
|
||||
bindgen = "0.61"
|
||||
bindgen = "0.60.1"
|
||||
|
||||
@@ -7,7 +7,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
log = "0.4"
|
||||
once_cell = "1.13.0"
|
||||
|
||||
@@ -1,19 +1,68 @@
|
||||
use anyhow::*;
|
||||
use clap::{value_parser, Arg, ArgMatches, Command};
|
||||
use std::{path::PathBuf, str::FromStr};
|
||||
use clap::{App, Arg, ArgMatches};
|
||||
use std::str::FromStr;
|
||||
use wal_craft::*;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
|
||||
.init();
|
||||
let arg_matches = cli().get_matches();
|
||||
let type_arg = &Arg::new("type")
|
||||
.takes_value(true)
|
||||
.help("Type of WAL to craft")
|
||||
.possible_values([
|
||||
Simple::NAME,
|
||||
LastWalRecordXlogSwitch::NAME,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
|
||||
LastWalRecordCrossingSegment::NAME,
|
||||
])
|
||||
.required(true);
|
||||
let arg_matches = App::new("Postgres WAL crafter")
|
||||
.about("Crafts Postgres databases with specific WAL properties")
|
||||
.subcommand(
|
||||
App::new("print-postgres-config")
|
||||
.about("Print the configuration required for PostgreSQL server before running this script")
|
||||
)
|
||||
.subcommand(
|
||||
App::new("with-initdb")
|
||||
.about("Craft WAL in a new data directory first initialized with initdb")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.takes_value(true)
|
||||
.help("Data directory for the Postgres server")
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.takes_value(true)
|
||||
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
App::new("in-existing")
|
||||
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("connection")
|
||||
.takes_value(true)
|
||||
.help("Connection string to the Postgres database to populate")
|
||||
.required(true)
|
||||
)
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let wal_craft = |arg_matches: &ArgMatches, client| {
|
||||
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches
|
||||
.get_one::<String>("type")
|
||||
.map(|s| s.as_str())
|
||||
.context("'type' is required")?
|
||||
{
|
||||
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
|
||||
Simple::NAME => Simple::craft(client)?,
|
||||
LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
|
||||
@@ -23,12 +72,12 @@ fn main() -> Result<()> {
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
|
||||
}
|
||||
LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
|
||||
a => panic!("Unknown --type argument: {a}"),
|
||||
a => panic!("Unknown --type argument: {}", a),
|
||||
};
|
||||
for lsn in intermediate_lsns {
|
||||
println!("intermediate_lsn = {lsn}");
|
||||
println!("intermediate_lsn = {}", lsn);
|
||||
}
|
||||
println!("end_of_wal = {end_of_wal_lsn}");
|
||||
println!("end_of_wal = {}", end_of_wal_lsn);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
@@ -36,24 +85,20 @@ fn main() -> Result<()> {
|
||||
None => panic!("No subcommand provided"),
|
||||
Some(("print-postgres-config", _)) => {
|
||||
for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
|
||||
println!("{cfg}");
|
||||
println!("{}", cfg);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Some(("with-initdb", arg_matches)) => {
|
||||
let cfg = Conf {
|
||||
pg_version: *arg_matches
|
||||
.get_one::<u32>("pg-version")
|
||||
.context("'pg-version' is required")?,
|
||||
pg_distrib_dir: arg_matches
|
||||
.get_one::<PathBuf>("pg-distrib-dir")
|
||||
.context("'pg-distrib-dir' is required")?
|
||||
.to_owned(),
|
||||
datadir: arg_matches
|
||||
.get_one::<PathBuf>("datadir")
|
||||
.context("'datadir' is required")?
|
||||
.to_owned(),
|
||||
pg_version: arg_matches
|
||||
.value_of("pg-version")
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.context("Failed to parse postgres version from the argument string")?,
|
||||
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
||||
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
||||
};
|
||||
cfg.initdb()?;
|
||||
let srv = cfg.start_server()?;
|
||||
@@ -63,77 +108,9 @@ fn main() -> Result<()> {
|
||||
}
|
||||
Some(("in-existing", arg_matches)) => wal_craft(
|
||||
arg_matches,
|
||||
&mut postgres::Config::from_str(
|
||||
arg_matches
|
||||
.get_one::<String>("connection")
|
||||
.context("'connection' is required")?,
|
||||
)
|
||||
.context(
|
||||
"'connection' argument value could not be parsed as a postgres connection string",
|
||||
)?
|
||||
.connect(postgres::NoTls)?,
|
||||
&mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
|
||||
.connect(postgres::NoTls)?,
|
||||
),
|
||||
Some(_) => panic!("Unknown subcommand"),
|
||||
}
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
let type_arg = &Arg::new("type")
|
||||
.help("Type of WAL to craft")
|
||||
.value_parser([
|
||||
Simple::NAME,
|
||||
LastWalRecordXlogSwitch::NAME,
|
||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
|
||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
|
||||
LastWalRecordCrossingSegment::NAME,
|
||||
])
|
||||
.required(true);
|
||||
|
||||
Command::new("Postgres WAL crafter")
|
||||
.about("Crafts Postgres databases with specific WAL properties")
|
||||
.subcommand(
|
||||
Command::new("print-postgres-config")
|
||||
.about("Print the configuration required for PostgreSQL server before running this script")
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("with-initdb")
|
||||
.about("Craft WAL in a new data directory first initialized with initdb")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.help("Data directory for the Postgres server")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-version")
|
||||
.long("pg-version")
|
||||
.help("Postgres version to use for the initial tenant")
|
||||
.value_parser(value_parser!(u32))
|
||||
.required(true)
|
||||
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("in-existing")
|
||||
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
|
||||
.arg(type_arg)
|
||||
.arg(
|
||||
Arg::new("connection")
|
||||
.help("Connection string to the Postgres database to populate")
|
||||
.required(true)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -37,22 +37,22 @@ pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
impl Conf {
|
||||
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match self.pg_version {
|
||||
14 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
15 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
_ => bail!("Unsupported postgres version: {}", self.pg_version),
|
||||
14 => path.join(format!("v{}", self.pg_version)),
|
||||
15 => path.join(format!("v{}", self.pg_version)),
|
||||
_ => panic!("Unsupported postgres version: {}", self.pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_bin_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir()?.join("bin"))
|
||||
fn pg_bin_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir().join("bin")
|
||||
}
|
||||
|
||||
fn pg_lib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir()?.join("lib"))
|
||||
fn pg_lib_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir().join("lib")
|
||||
}
|
||||
|
||||
pub fn wal_dir(&self) -> PathBuf {
|
||||
@@ -60,12 +60,12 @@ impl Conf {
|
||||
}
|
||||
|
||||
fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
|
||||
let path = self.pg_bin_dir()?.join(command);
|
||||
let path = self.pg_bin_dir().join(command);
|
||||
ensure!(path.exists(), "Command {:?} does not exist", path);
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir())
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use tokio::{
|
||||
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
|
||||
};
|
||||
use tracing::*;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::crashsafe_dir::path_with_suffix_extension;
|
||||
|
||||
use crate::{Download, DownloadError, RemoteObjectId};
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ thiserror = "1.0"
|
||||
tokio = { version = "1.17", features = ["macros"]}
|
||||
tokio-rustls = "0.23"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
nix = "0.25"
|
||||
signal-hook = "0.3.10"
|
||||
rand = "0.8.3"
|
||||
@@ -30,8 +30,6 @@ rustls-split = "0.3.0"
|
||||
git-version = "0.3.5"
|
||||
serde_with = "2.0"
|
||||
once_cell = "1.13.0"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
|
||||
|
||||
metrics = { path = "../metrics" }
|
||||
|
||||
@@ -12,8 +12,16 @@ pub fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
let path = path.as_ref();
|
||||
|
||||
fs::create_dir(path)?;
|
||||
fsync_file_and_parent(path)?;
|
||||
Ok(())
|
||||
File::open(path)?.sync_all()?;
|
||||
|
||||
if let Some(parent) = path.parent() {
|
||||
File::open(parent)?.sync_all()
|
||||
} else {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"can't find parent",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Similar to [`std::fs::create_dir_all`], except we fsync all
|
||||
@@ -57,12 +65,12 @@ pub fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {
|
||||
|
||||
// Fsync the created directories from child to parent.
|
||||
for &path in dirs_to_create.iter() {
|
||||
fsync(path)?;
|
||||
File::open(path)?.sync_all()?;
|
||||
}
|
||||
|
||||
// If we created any new directories, fsync the parent.
|
||||
if !dirs_to_create.is_empty() {
|
||||
fsync(path)?;
|
||||
File::open(path)?.sync_all()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -84,33 +92,6 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
|
||||
.with_extension(new_extension.as_ref())
|
||||
}
|
||||
|
||||
pub fn fsync_file_and_parent(file_path: &Path) -> io::Result<()> {
|
||||
let parent = file_path.parent().ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("File {file_path:?} has no parent"),
|
||||
)
|
||||
})?;
|
||||
|
||||
fsync(file_path)?;
|
||||
fsync(parent)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fsync(path: &Path) -> io::Result<()> {
|
||||
File::open(path)
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
|
||||
.and_then(|file| {
|
||||
file.sync_all().map_err(|e| {
|
||||
io::Error::new(
|
||||
e.kind(),
|
||||
format!("Failed to sync file {path:?} data and metadata: {e}"),
|
||||
)
|
||||
})
|
||||
})
|
||||
.map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::tempdir;
|
||||
@@ -3,6 +3,13 @@ use std::{fmt, str::FromStr};
|
||||
use hex::FromHex;
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IdError {
|
||||
#[error("invalid id length {0}")]
|
||||
VecParseError(usize),
|
||||
}
|
||||
|
||||
/// Neon ID is a 128-bit random ID.
|
||||
/// Used to represent various identifiers. Provides handy utility methods and impls.
|
||||
@@ -22,6 +29,15 @@ impl Id {
|
||||
Id::from(arr)
|
||||
}
|
||||
|
||||
pub fn from_vec(src: &Vec<u8>) -> Result<Id, IdError> {
|
||||
if src.len() != 16 {
|
||||
return Err(IdError::VecParseError(src.len()));
|
||||
}
|
||||
let mut zid_slice = [0u8; 16];
|
||||
zid_slice.copy_from_slice(&src);
|
||||
Ok(zid_slice.into())
|
||||
}
|
||||
|
||||
pub fn as_arr(&self) -> [u8; 16] {
|
||||
self.0
|
||||
}
|
||||
@@ -100,6 +116,10 @@ macro_rules! id_newtype {
|
||||
$t(Id::get_from_buf(buf))
|
||||
}
|
||||
|
||||
pub fn from_vec(src: &Vec<u8>) -> Result<$t, IdError> {
|
||||
Ok($t(Id::from_vec(src)?))
|
||||
}
|
||||
|
||||
pub fn as_arr(&self) -> [u8; 16] {
|
||||
self.0.as_arr()
|
||||
}
|
||||
|
||||
@@ -22,8 +22,8 @@ pub mod pq_proto;
|
||||
// dealing with connstring parsing and handy access to it's parts
|
||||
pub mod connstring;
|
||||
|
||||
// helper functions for creating and fsyncing
|
||||
pub mod crashsafe;
|
||||
// helper functions for creating and fsyncing directories/trees
|
||||
pub mod crashsafe_dir;
|
||||
|
||||
// common authentication routines
|
||||
pub mod auth;
|
||||
|
||||
@@ -1,35 +1,11 @@
|
||||
use std::{
|
||||
fs::{File, OpenOptions},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use strum_macros::{EnumString, EnumVariantNames};
|
||||
|
||||
#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum LogFormat {
|
||||
Plain,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogFormat {
|
||||
pub fn from_config(s: &str) -> anyhow::Result<LogFormat> {
|
||||
use strum::VariantNames;
|
||||
LogFormat::from_str(s).with_context(|| {
|
||||
format!(
|
||||
"Unrecognized log format. Please specify one of: {:?}",
|
||||
LogFormat::VARIANTS
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
pub fn init(
|
||||
log_filename: impl AsRef<Path>,
|
||||
daemonize: bool,
|
||||
log_format: LogFormat,
|
||||
) -> Result<File> {
|
||||
pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
|
||||
// Don't open the same file for output multiple times;
|
||||
// the different fds could overwrite each other's output.
|
||||
let log_file = OpenOptions::new()
|
||||
@@ -45,50 +21,22 @@ pub fn init(
|
||||
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));
|
||||
|
||||
let x: File = log_file.try_clone().unwrap();
|
||||
let base_logger = tracing_subscriber::fmt()
|
||||
.with_env_filter(env_filter)
|
||||
.with_target(false)
|
||||
.with_ansi(false)
|
||||
.with_writer(move || -> Box<dyn std::io::Write> {
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
if daemonize {
|
||||
Box::new(x.try_clone().unwrap())
|
||||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
}
|
||||
});
|
||||
.with_target(false) // don't include event targets
|
||||
.with_ansi(false); // don't use colors in log file;
|
||||
|
||||
match log_format {
|
||||
LogFormat::Json => base_logger.json().init(),
|
||||
LogFormat::Plain => base_logger.init(),
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
if daemonize {
|
||||
let x = log_file.try_clone().unwrap();
|
||||
base_logger
|
||||
.with_writer(move || x.try_clone().unwrap())
|
||||
.init();
|
||||
} else {
|
||||
base_logger.init();
|
||||
}
|
||||
|
||||
Ok(log_file)
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// Due to global logger, can't run tests in same process.
|
||||
// So until there's a non-global one, the tests are in ../tests/ as separate files.
|
||||
#[macro_export(local_inner_macros)]
|
||||
macro_rules! test_init_file_logger {
|
||||
($log_level:expr, $log_format:expr) => {{
|
||||
use std::str::FromStr;
|
||||
std::env::set_var("RUST_LOG", $log_level);
|
||||
|
||||
let tmp_dir = tempfile::TempDir::new().unwrap();
|
||||
let log_file_path = tmp_dir.path().join("logfile");
|
||||
|
||||
let log_format = $crate::logging::LogFormat::from_str($log_format).unwrap();
|
||||
let _log_file = $crate::logging::init(&log_file_path, true, log_format).unwrap();
|
||||
|
||||
let log_file = std::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&log_file_path)
|
||||
.unwrap();
|
||||
|
||||
log_file
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ use std::sync::Arc;
|
||||
use std::task::Poll;
|
||||
use tracing::{debug, error, trace};
|
||||
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -66,8 +66,8 @@ pub enum ProcessMsgResult {
|
||||
/// Always-writeable sock_split stream.
|
||||
/// May not be readable. See [`PostgresBackend::take_stream_in`]
|
||||
pub enum Stream {
|
||||
Unencrypted(BufReader<tokio::net::TcpStream>),
|
||||
Tls(Box<tokio_rustls::server::TlsStream<BufReader<tokio::net::TcpStream>>>),
|
||||
Unencrypted(tokio::net::TcpStream),
|
||||
Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
|
||||
Broken,
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ impl PostgresBackend {
|
||||
let peer_addr = socket.peer_addr()?;
|
||||
|
||||
Ok(Self {
|
||||
stream: Stream::Unencrypted(BufReader::new(socket)),
|
||||
stream: Stream::Unencrypted(socket),
|
||||
buf_out: BytesMut::with_capacity(10 * 1024),
|
||||
state: ProtoState::Initialization,
|
||||
md5_salt: [0u8; 4],
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "info");
|
||||
|
||||
let log_file = test_init_file_logger!("info", "json");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
let json_object = serde_json::from_str::<serde_json::Value>(&content).unwrap();
|
||||
|
||||
assert_eq!(json_object["fields"]["custom"], "hi");
|
||||
assert_eq!(json_object["fields"]["message"], "test log message");
|
||||
|
||||
assert_ne!(json_object["level"], "TRACE");
|
||||
assert_ne!(json_object["level"], "DEBUG");
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
// This could be in ../src/logging.rs but since the logger is global, these
|
||||
// can't be run in threads of the same process
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Lines};
|
||||
use tracing::*;
|
||||
use utils::test_init_file_logger;
|
||||
|
||||
fn read_lines(file: File) -> Lines<BufReader<File>> {
|
||||
BufReader::new(file).lines()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plain_format_has_message_and_custom_field() {
|
||||
std::env::set_var("RUST_LOG", "warn");
|
||||
|
||||
let log_file = test_init_file_logger!("warn", "plain");
|
||||
|
||||
let custom_field: &str = "hi";
|
||||
trace!(custom = %custom_field, "test log message");
|
||||
debug!(custom = %custom_field, "test log message");
|
||||
info!(custom = %custom_field, "test log message");
|
||||
warn!(custom = %custom_field, "test log message");
|
||||
error!(custom = %custom_field, "test log message");
|
||||
|
||||
let lines = read_lines(log_file);
|
||||
for line in lines {
|
||||
let content = line.unwrap();
|
||||
serde_json::from_str::<serde_json::Value>(&content).unwrap_err();
|
||||
assert!(content.contains("custom=hi"));
|
||||
assert!(content.contains("test log message"));
|
||||
|
||||
assert!(!content.contains("TRACE"));
|
||||
assert!(!content.contains("DEBUG"));
|
||||
assert!(!content.contains("INFO"));
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ futures = "0.3.13"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
itertools = "0.10.3"
|
||||
clap = { version = "4.0", features = ["string"] }
|
||||
clap = "3.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
|
||||
@@ -56,7 +56,7 @@ fail = "0.5.0"
|
||||
git-version = "0.3.5"
|
||||
rstar = "0.9.3"
|
||||
num-traits = "0.2.15"
|
||||
amplify_num = { git = "https://github.com/hlinnaka/rust-amplify.git", branch = "unsigned-int-perf" }
|
||||
amplify_num = "0.4.1"
|
||||
|
||||
pageserver_api = { path = "../libs/pageserver_api" }
|
||||
postgres_ffi = { path = "../libs/postgres_ffi" }
|
||||
@@ -67,13 +67,7 @@ remote_storage = { path = "../libs/remote_storage" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
close_fds = "0.3.2"
|
||||
walkdir = "2.3.2"
|
||||
svg_fmt = "0.4.1"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
hex-literal = "0.3"
|
||||
tempfile = "3.2"
|
||||
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
harness = false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,8 +22,8 @@ use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
//! A tool for visualizing the arrangement of layerfiles within a timeline.
|
||||
//!
|
||||
//! It reads filenames from stdin and prints a svg on stdout. The image is a plot in
|
||||
//! page-lsn space, where every delta layer is a rectangle and every image layer is a
|
||||
//! thick line. Legend:
|
||||
//! - The x axis (left to right) represents page index.
|
||||
//! - The y axis represents LSN, growing upwards.
|
||||
//!
|
||||
//! Coordinates in both axis are compressed for better readability.
|
||||
//! (see https://medium.com/algorithms-digest/coordinate-compression-2fff95326fb)
|
||||
//!
|
||||
//! Example use:
|
||||
//! ```
|
||||
//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
|
||||
//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ firefox out.svg
|
||||
//! ```
|
||||
//!
|
||||
//! This API was chosen so that we can easily work with filenames extracted from ssh,
|
||||
//! or from pageserver log files.
|
||||
//!
|
||||
//! TODO Consider shipping this as a grafana panel plugin:
|
||||
//! https://grafana.com/tutorials/build-a-panel-plugin/
|
||||
use anyhow::Result;
|
||||
use pageserver::repository::Key;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
ops::Range,
|
||||
};
|
||||
use svg_fmt::{rectangle, rgb, BeginSvg, EndSvg, Fill, Stroke};
|
||||
use utils::{lsn::Lsn, project_git_version};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
// Map values to their compressed coordinate - the index the value
|
||||
// would have in a sorted and deduplicated list of all values.
|
||||
fn build_coordinate_compression_map<T: Ord + Copy>(coords: Vec<T>) -> BTreeMap<T, usize> {
|
||||
let set: BTreeSet<T> = coords.into_iter().collect();
|
||||
|
||||
let mut map: BTreeMap<T, usize> = BTreeMap::new();
|
||||
for (i, e) in set.iter().enumerate() {
|
||||
map.insert(*e, i);
|
||||
}
|
||||
|
||||
map
|
||||
}
|
||||
|
||||
fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let mut lsns: Vec<&str> = split[1].split('-').collect();
|
||||
if lsns.len() == 1 {
|
||||
lsns.push(lsns[0]);
|
||||
}
|
||||
|
||||
let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
|
||||
let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
|
||||
(keys, lsns)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// Parse layer filenames from stdin
|
||||
let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
|
||||
let stdin = io::stdin();
|
||||
for line in stdin.lock().lines() {
|
||||
let range = parse_filename(&line.unwrap());
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
// Collect all coordinates
|
||||
let mut keys: Vec<Key> = vec![];
|
||||
let mut lsns: Vec<Lsn> = vec![];
|
||||
for (keyr, lsnr) in &ranges {
|
||||
keys.push(keyr.start);
|
||||
keys.push(keyr.end);
|
||||
lsns.push(lsnr.start);
|
||||
lsns.push(lsnr.end);
|
||||
}
|
||||
|
||||
// Analyze
|
||||
let key_map = build_coordinate_compression_map(keys);
|
||||
let lsn_map = build_coordinate_compression_map(lsns);
|
||||
|
||||
// Initialize stats
|
||||
let mut num_deltas = 0;
|
||||
let mut num_images = 0;
|
||||
|
||||
// Draw
|
||||
let stretch = 3.0; // Stretch out vertically for better visibility
|
||||
println!(
|
||||
"{}",
|
||||
BeginSvg {
|
||||
w: key_map.len() as f32,
|
||||
h: stretch * lsn_map.len() as f32
|
||||
}
|
||||
);
|
||||
for (keyr, lsnr) in &ranges {
|
||||
let key_start = *key_map.get(&keyr.start).unwrap();
|
||||
let key_end = *key_map.get(&keyr.end).unwrap();
|
||||
let key_diff = key_end - key_start;
|
||||
let lsn_max = lsn_map.len();
|
||||
|
||||
if key_start >= key_end {
|
||||
panic!("Invalid key range {}-{}", key_start, key_end);
|
||||
}
|
||||
|
||||
let lsn_start = *lsn_map.get(&lsnr.start).unwrap();
|
||||
let lsn_end = *lsn_map.get(&lsnr.end).unwrap();
|
||||
|
||||
let mut lsn_diff = (lsn_end - lsn_start) as f32;
|
||||
let mut fill = Fill::None;
|
||||
let mut margin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas
|
||||
let mut lsn_offset = 0.0;
|
||||
|
||||
// Fill in and thicken rectangle if it's an
|
||||
// image layer so that we can see it.
|
||||
match lsn_start.cmp(&lsn_end) {
|
||||
Ordering::Less => num_deltas += 1,
|
||||
Ordering::Equal => {
|
||||
num_images += 1;
|
||||
lsn_diff = 0.3;
|
||||
lsn_offset = -lsn_diff / 2.0;
|
||||
margin = 0.05;
|
||||
fill = Fill::Color(rgb(0, 0, 0));
|
||||
}
|
||||
Ordering::Greater => panic!("Invalid lsn range {}-{}", lsn_start, lsn_end),
|
||||
}
|
||||
|
||||
println!(
|
||||
" {}",
|
||||
rectangle(
|
||||
key_start as f32 + stretch * margin,
|
||||
stretch * (lsn_max as f32 - (lsn_end as f32 - margin - lsn_offset)),
|
||||
key_diff as f32 - stretch * 2.0 * margin,
|
||||
stretch * (lsn_diff - 2.0 * margin)
|
||||
)
|
||||
.fill(fill)
|
||||
.stroke(Stroke::Color(rgb(0, 0, 0), 0.1))
|
||||
.border_radius(0.4)
|
||||
);
|
||||
}
|
||||
println!("{}", EndSvg);
|
||||
|
||||
eprintln!("num_images: {}", num_images);
|
||||
eprintln!("num_deltas: {}", num_deltas);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -6,7 +6,7 @@ use tracing::*;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use clap::{App, Arg};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use fail::FailScenario;
|
||||
@@ -51,17 +51,57 @@ fn version() -> String {
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = cli().get_matches();
|
||||
let arg_matches = App::new("Neon page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(&*version())
|
||||
.arg(
|
||||
|
||||
if arg_matches.get_flag("enabled-features") {
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize pageserver with all given config overrides"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
.short('D')
|
||||
.long("workdir")
|
||||
.takes_value(true)
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
.short('c')
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple_occurrences(true)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(Arg::new("update-config").long("update-config").takes_value(false).help(
|
||||
"Update the config file when started",
|
||||
))
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
.takes_value(false)
|
||||
.help("Show enabled compile time features"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if arg_matches.is_present("enabled-features") {
|
||||
println!("{{\"features\": {FEATURES:?} }}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let workdir = arg_matches
|
||||
.get_one::<String>("workdir")
|
||||
.map(Path::new)
|
||||
.unwrap_or_else(|| Path::new(".neon"));
|
||||
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
|
||||
let workdir = workdir
|
||||
.canonicalize()
|
||||
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
|
||||
@@ -75,7 +115,7 @@ fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
})?;
|
||||
|
||||
let daemonize = arg_matches.get_flag("daemonize");
|
||||
let daemonize = arg_matches.is_present("daemonize");
|
||||
|
||||
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
|
||||
ControlFlow::Continue(conf) => conf,
|
||||
@@ -87,7 +127,7 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
let tenants_path = conf.tenants_path();
|
||||
if !tenants_path.exists() {
|
||||
utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
format!(
|
||||
"Failed to create tenants root dir at '{}'",
|
||||
tenants_path.display()
|
||||
@@ -113,8 +153,8 @@ fn initialize_config(
|
||||
arg_matches: clap::ArgMatches,
|
||||
workdir: &Path,
|
||||
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
|
||||
let init = arg_matches.get_flag("init");
|
||||
let update_config = init || arg_matches.get_flag("update-config");
|
||||
let init = arg_matches.is_present("init");
|
||||
let update_config = init || arg_matches.is_present("update-config");
|
||||
|
||||
let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
|
||||
if init {
|
||||
@@ -156,10 +196,13 @@ fn initialize_config(
|
||||
)
|
||||
};
|
||||
|
||||
if let Some(values) = arg_matches.get_many::<String>("config-override") {
|
||||
if let Some(values) = arg_matches.values_of("config-override") {
|
||||
for option_line in values {
|
||||
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
|
||||
format!("Option '{option_line}' could not be parsed as a toml document")
|
||||
format!(
|
||||
"Option '{}' could not be parsed as a toml document",
|
||||
option_line
|
||||
)
|
||||
})?;
|
||||
|
||||
for (key, item) in doc.iter() {
|
||||
@@ -199,9 +242,9 @@ fn initialize_config(
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
|
||||
// Initialize logger
|
||||
let log_file = logging::init(LOG_FILE_NAME, daemonize, conf.log_format)?;
|
||||
let log_file = logging::init(LOG_FILE_NAME, daemonize)?;
|
||||
|
||||
info!("version: {}", version());
|
||||
info!("version: {GIT_VERSION}");
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
@@ -342,55 +385,3 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("Neon page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(version())
|
||||
.arg(
|
||||
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Initialize pageserver with all given config overrides"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
.short('D')
|
||||
.long("workdir")
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
.short('c')
|
||||
.num_args(1)
|
||||
.action(ArgAction::Append)
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("update-config")
|
||||
.long("update-config")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Update the config file when started"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Show enabled compile time features"),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use clap::{value_parser, Arg, Command};
|
||||
use clap::{App, Arg};
|
||||
|
||||
use pageserver::{
|
||||
page_cache,
|
||||
@@ -24,14 +24,40 @@ project_git_version!(GIT_VERSION);
|
||||
const METADATA_SUBCOMMAND: &str = "metadata";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = cli().get_matches();
|
||||
let arg_matches = App::new("Neon Pageserver binutils")
|
||||
.about("Reads pageserver (and related) binary files management utility")
|
||||
.version(GIT_VERSION)
|
||||
.arg(Arg::new("path").help("Input file path").required(false))
|
||||
.subcommand(
|
||||
App::new(METADATA_SUBCOMMAND)
|
||||
.about("Read and update pageserver metadata file")
|
||||
.arg(
|
||||
Arg::new("metadata_path")
|
||||
.help("Input metadata file path")
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("disk_consistent_lsn")
|
||||
.long("disk_consistent_lsn")
|
||||
.takes_value(true)
|
||||
.help("Replace disk consistent Lsn"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("prev_record_lsn")
|
||||
.long("prev_record_lsn")
|
||||
.takes_value(true)
|
||||
.help("Replace previous record Lsn"),
|
||||
),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
match arg_matches.subcommand() {
|
||||
Some((subcommand_name, subcommand_matches)) => {
|
||||
let path = subcommand_matches
|
||||
.get_one::<PathBuf>("metadata_path")
|
||||
.context("'metadata_path' argument is missing")?
|
||||
.to_path_buf();
|
||||
let path = PathBuf::from(
|
||||
subcommand_matches
|
||||
.value_of("metadata_path")
|
||||
.context("'metadata_path' argument is missing")?,
|
||||
);
|
||||
anyhow::ensure!(
|
||||
subcommand_name == METADATA_SUBCOMMAND,
|
||||
"Unknown subcommand {subcommand_name}"
|
||||
@@ -39,10 +65,11 @@ fn main() -> anyhow::Result<()> {
|
||||
handle_metadata(&path, subcommand_matches)?;
|
||||
}
|
||||
None => {
|
||||
let path = arg_matches
|
||||
.get_one::<PathBuf>("path")
|
||||
.context("'path' argument is missing")?
|
||||
.to_path_buf();
|
||||
let path = PathBuf::from(
|
||||
arg_matches
|
||||
.value_of("path")
|
||||
.context("'path' argument is missing")?,
|
||||
);
|
||||
println!(
|
||||
"No subcommand specified, attempting to guess the format for file {}",
|
||||
path.display()
|
||||
@@ -83,7 +110,7 @@ fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), an
|
||||
let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
|
||||
println!("Current metadata:\n{meta:?}");
|
||||
let mut update_meta = false;
|
||||
if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
|
||||
if let Some(disk_consistent_lsn) = arg_matches.value_of("disk_consistent_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
Lsn::from_str(disk_consistent_lsn)?,
|
||||
meta.prev_record_lsn(),
|
||||
@@ -95,7 +122,7 @@ fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), an
|
||||
);
|
||||
update_meta = true;
|
||||
}
|
||||
if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
|
||||
if let Some(prev_record_lsn) = arg_matches.value_of("prev_record_lsn") {
|
||||
meta = TimelineMetadata::new(
|
||||
meta.disk_consistent_lsn(),
|
||||
Some(Lsn::from_str(prev_record_lsn)?),
|
||||
@@ -115,40 +142,3 @@ fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), an
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("Neon Pageserver binutils")
|
||||
.about("Reads pageserver (and related) binary files management utility")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.help("Input file path")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(false),
|
||||
)
|
||||
.subcommand(
|
||||
Command::new(METADATA_SUBCOMMAND)
|
||||
.about("Read and update pageserver metadata file")
|
||||
.arg(
|
||||
Arg::new("metadata_path")
|
||||
.help("Input metadata file path")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.required(false),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("disk_consistent_lsn")
|
||||
.long("disk_consistent_lsn")
|
||||
.help("Replace disk consistent Lsn"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("prev_record_lsn")
|
||||
.long("prev_record_lsn")
|
||||
.help("Replace previous record Lsn"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::env;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
@@ -17,7 +16,6 @@ use toml_edit::{Document, Item};
|
||||
use url::Url;
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
logging::LogFormat,
|
||||
postgres_backend::AuthType,
|
||||
};
|
||||
|
||||
@@ -26,7 +24,6 @@ use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||
|
||||
/// The name of the metadata file pageserver creates per timeline.
|
||||
pub const METADATA_FILE_NAME: &str = "metadata";
|
||||
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
|
||||
const TENANT_CONFIG_NAME: &str = "config";
|
||||
|
||||
pub mod defaults {
|
||||
@@ -46,8 +43,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
|
||||
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
|
||||
|
||||
pub const DEFAULT_LOG_FORMAT: &str = "plain";
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -66,7 +61,6 @@ pub mod defaults {
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
#initial_superuser_name = '{DEFAULT_SUPERUSER}'
|
||||
|
||||
#log_format = '{DEFAULT_LOG_FORMAT}'
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
@@ -130,8 +124,6 @@ pub struct PageServerConf {
|
||||
|
||||
/// Etcd broker endpoints to connect to.
|
||||
pub broker_endpoints: Vec<Url>,
|
||||
|
||||
pub log_format: LogFormat,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -198,8 +190,6 @@ struct PageServerConfigBuilder {
|
||||
profiling: BuilderValue<ProfilingConfig>,
|
||||
broker_etcd_prefix: BuilderValue<String>,
|
||||
broker_endpoints: BuilderValue<Vec<Url>>,
|
||||
|
||||
log_format: BuilderValue<LogFormat>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
@@ -227,7 +217,6 @@ impl Default for PageServerConfigBuilder {
|
||||
profiling: Set(ProfilingConfig::Disabled),
|
||||
broker_etcd_prefix: Set(etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string()),
|
||||
broker_endpoints: Set(Vec::new()),
|
||||
log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -300,10 +289,6 @@ impl PageServerConfigBuilder {
|
||||
self.profiling = BuilderValue::Set(profiling)
|
||||
}
|
||||
|
||||
pub fn log_format(&mut self, log_format: LogFormat) {
|
||||
self.log_format = BuilderValue::Set(log_format)
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
let broker_endpoints = self
|
||||
.broker_endpoints
|
||||
@@ -348,7 +333,6 @@ impl PageServerConfigBuilder {
|
||||
broker_etcd_prefix: self
|
||||
.broker_etcd_prefix
|
||||
.ok_or(anyhow!("missing broker_etcd_prefix"))?,
|
||||
log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -380,17 +364,6 @@ impl PageServerConf {
|
||||
self.timelines_path(tenant_id).join(timeline_id.to_string())
|
||||
}
|
||||
|
||||
pub fn timeline_uninit_mark_file_path(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> PathBuf {
|
||||
path_with_suffix_extension(
|
||||
self.timeline_path(&timeline_id, &tenant_id),
|
||||
TIMELINE_UNINIT_MARK_SUFFIX,
|
||||
)
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain timeline's metadata file should be located.
|
||||
pub fn metadata_path(&self, timeline_id: TimelineId, tenant_id: TenantId) -> PathBuf {
|
||||
@@ -401,28 +374,28 @@ impl PageServerConf {
|
||||
//
|
||||
// Postgres distribution paths
|
||||
//
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
match pg_version {
|
||||
14 => Ok(path.join(format!("v{pg_version}"))),
|
||||
15 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => path.join(format!("v{pg_version}")),
|
||||
15 => path.join(format!("v{pg_version}")),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("bin"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
|
||||
match pg_version {
|
||||
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
14 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
15 => self.pg_distrib_dir(pg_version).join("lib"),
|
||||
_ => panic!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -473,9 +446,6 @@ impl PageServerConf {
|
||||
})
|
||||
.collect::<anyhow::Result<_>>()?,
|
||||
),
|
||||
"log_format" => builder.log_format(
|
||||
LogFormat::from_config(&parse_toml_string(key, item)?)?
|
||||
),
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
@@ -588,7 +558,6 @@ impl PageServerConf {
|
||||
default_tenant_conf: TenantConf::dummy_conf(),
|
||||
broker_endpoints: Vec::new(),
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -683,8 +652,6 @@ max_file_descriptors = 333
|
||||
initial_superuser_name = 'zzzz'
|
||||
id = 10
|
||||
|
||||
log_format = 'json'
|
||||
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
@@ -724,7 +691,6 @@ log_format = 'json'
|
||||
.parse()
|
||||
.expect("Failed to parse a valid broker endpoint URL")],
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -769,7 +735,6 @@ log_format = 'json'
|
||||
.parse()
|
||||
.expect("Failed to parse a valid broker endpoint URL")],
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::Json,
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
|
||||
@@ -618,7 +618,6 @@ components:
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
- awaits_download
|
||||
- state
|
||||
properties:
|
||||
timeline_id:
|
||||
type: string
|
||||
@@ -661,8 +660,6 @@ components:
|
||||
type: integer
|
||||
awaits_download:
|
||||
type: boolean
|
||||
state:
|
||||
type: string
|
||||
|
||||
# These 'local' and 'remote' fields just duplicate some of the fields
|
||||
# above. They are kept for backwards-compatibility. They can be removed,
|
||||
|
||||
@@ -129,7 +129,6 @@ async fn build_timeline_info(
|
||||
}
|
||||
};
|
||||
let current_physical_size = Some(timeline.get_physical_size());
|
||||
let state = timeline.current_state();
|
||||
|
||||
let info = TimelineInfo {
|
||||
tenant_id: timeline.tenant_id,
|
||||
@@ -159,7 +158,6 @@ async fn build_timeline_info(
|
||||
|
||||
remote_consistent_lsn,
|
||||
awaits_download,
|
||||
state,
|
||||
|
||||
// Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
|
||||
// with the control plane.
|
||||
@@ -296,7 +294,7 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
|
||||
let timeline_info = async {
|
||||
let timeline = tokio::task::spawn_blocking(move || {
|
||||
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id, false)
|
||||
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
@@ -333,13 +331,14 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
|
||||
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
|
||||
|
||||
let timeline = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id))
|
||||
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let result = match timeline
|
||||
.find_lsn_for_timestamp(timestamp_pg)
|
||||
.map_err(ApiError::InternalServerError)?
|
||||
{
|
||||
LsnForTimestamp::Present(lsn) => format!("{lsn}"),
|
||||
LsnForTimestamp::Present(lsn) => format!("{}", lsn),
|
||||
LsnForTimestamp::Future(_lsn) => "future".into(),
|
||||
LsnForTimestamp::Past(_lsn) => "past".into(),
|
||||
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
|
||||
@@ -387,7 +386,7 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
}
|
||||
return json_response(StatusCode::ACCEPTED, ());
|
||||
}
|
||||
// no tenant in the index, release the lock to make the potentially lengthy download operation
|
||||
// no tenant in the index, release the lock to make the potentially lengthy download opetation
|
||||
drop(index_accessor);
|
||||
|
||||
// download index parts for every tenant timeline
|
||||
@@ -782,6 +781,11 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
|
||||
// Run GC immediately on given timeline.
|
||||
// FIXME: This is just for tests. See test_runner/regress/test_gc.py.
|
||||
// This probably should require special authentication or a global flag to
|
||||
// enable, I don't think we want to or need to allow regular clients to invoke
|
||||
// GC.
|
||||
// @hllinnaka in commits ec44f4b29, 3aca717f3
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
@@ -789,16 +793,16 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
// FIXME: currently this will return a 500 error on bad tenant id; it should be 4XX
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let _span_guard =
|
||||
info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id).entered();
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| repo.get_gc_horizon());
|
||||
|
||||
// Use tenant's pitr setting
|
||||
let pitr = tenant.get_pitr_interval();
|
||||
let result = tenant
|
||||
let pitr = repo.get_pitr_interval();
|
||||
let result = repo
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)
|
||||
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
|
||||
// better once the types support it.
|
||||
@@ -807,15 +811,19 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
// FIXME This is just for tests. Don't expect this to be exposed to
|
||||
// the users or the api.
|
||||
// @dhammika in commit a0781f229
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = repo
|
||||
.get_timeline(timeline_id)
|
||||
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
timeline.compact().map_err(ApiError::InternalServerError)?;
|
||||
|
||||
@@ -829,9 +837,10 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = repo
|
||||
.get_timeline(timeline_id)
|
||||
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
timeline
|
||||
.checkpoint(CheckpointConfig::Forced)
|
||||
|
||||
@@ -12,10 +12,10 @@ use tracing::*;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::pgdatadir_mapping::*;
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::tenant::Timeline;
|
||||
use crate::walingest::WalIngest;
|
||||
use crate::walrecord::DecodedWALRecord;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::*;
|
||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||
@@ -43,19 +43,19 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
/// The code that deals with the checkpoint would not work right if the
|
||||
/// cluster was not shut down cleanly.
|
||||
pub fn import_timeline_from_postgres_datadir(
|
||||
path: &Path,
|
||||
tline: &Timeline,
|
||||
pgdata_path: &Path,
|
||||
pgdata_lsn: Lsn,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
let mut pg_control: Option<ControlFileData> = None;
|
||||
|
||||
// TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
|
||||
// Then fishing out pg_control would be unnecessary
|
||||
let mut modification = tline.begin_modification(pgdata_lsn);
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.init_empty()?;
|
||||
|
||||
// Import all but pg_wal
|
||||
let all_but_wal = WalkDir::new(pgdata_path)
|
||||
let all_but_wal = WalkDir::new(path)
|
||||
.into_iter()
|
||||
.filter_entry(|entry| !entry.path().ends_with("pg_wal"));
|
||||
for entry in all_but_wal {
|
||||
@@ -63,7 +63,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
let metadata = entry.metadata().expect("error getting dir entry metadata");
|
||||
if metadata.is_file() {
|
||||
let absolute_path = entry.path();
|
||||
let relative_path = absolute_path.strip_prefix(pgdata_path)?;
|
||||
let relative_path = absolute_path.strip_prefix(path)?;
|
||||
|
||||
let file = File::open(absolute_path)?;
|
||||
let len = metadata.len() as usize;
|
||||
@@ -84,7 +84,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
"Postgres cluster was not shut down cleanly"
|
||||
);
|
||||
ensure!(
|
||||
pg_control.checkPointCopy.redo == pgdata_lsn.0,
|
||||
pg_control.checkPointCopy.redo == lsn.0,
|
||||
"unexpected checkpoint REDO pointer"
|
||||
);
|
||||
|
||||
@@ -92,10 +92,10 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
// this reads the checkpoint record itself, advancing the tip of the timeline to
|
||||
// *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'.
|
||||
import_wal(
|
||||
&pgdata_path.join("pg_wal"),
|
||||
&path.join("pg_wal"),
|
||||
tline,
|
||||
Lsn(pg_control.checkPointCopy.redo),
|
||||
pgdata_lsn,
|
||||
lsn,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -8,6 +8,7 @@ pub mod page_cache;
|
||||
pub mod page_service;
|
||||
pub mod pgdatadir_mapping;
|
||||
pub mod profiling;
|
||||
pub mod reltag;
|
||||
pub mod repository;
|
||||
pub mod storage_sync;
|
||||
pub mod task_mgr;
|
||||
@@ -45,8 +46,6 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
|
||||
|
||||
pub const LOG_FILE_NAME: &str = "pageserver.log";
|
||||
|
||||
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
/// Config for the Repository checkpointer
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CheckpointConfig {
|
||||
|
||||
@@ -107,20 +107,18 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
|
||||
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
||||
// or in testing they estimate how much we would upload if we did.
|
||||
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_created_persistent_files_total",
|
||||
"Number of files created that are meant to be uploaded to cloud storage",
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_written_persistent_bytes_total",
|
||||
"Total bytes written that are meant to be uploaded to cloud storage",
|
||||
&["tenant_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
@@ -277,15 +275,11 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
/// smallest redo processing times. These buckets allow us to measure down
|
||||
/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
|
||||
/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
|
||||
///
|
||||
/// Values up to 1s are recorded because metrics show that we have redo
|
||||
/// durations and lock times larger than 0.250s.
|
||||
macro_rules! redo_histogram_time_buckets {
|
||||
() => {
|
||||
vec![
|
||||
0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
|
||||
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
|
||||
1.000_000,
|
||||
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000,
|
||||
]
|
||||
};
|
||||
}
|
||||
@@ -300,17 +294,6 @@ macro_rules! redo_histogram_count_buckets {
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! redo_bytes_histogram_count_buckets {
|
||||
() => {
|
||||
// powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
|
||||
// rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
|
||||
vec![
|
||||
24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
|
||||
2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_seconds",
|
||||
@@ -338,15 +321,6 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_bytes_histogram",
|
||||
"Histogram of number of records replayed per redo",
|
||||
redo_bytes_histogram_count_buckets!(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_replayed_wal_records_total",
|
||||
@@ -412,12 +386,8 @@ impl TimelineMetrics {
|
||||
let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED.clone();
|
||||
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN.clone();
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
@@ -449,8 +419,6 @@ impl Drop for TimelineMetrics {
|
||||
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = CURRENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
|
||||
let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
|
||||
|
||||
for op in STORAGE_TIME_OPERATIONS {
|
||||
let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
|
||||
|
||||
@@ -10,14 +10,8 @@
|
||||
//
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use futures::{Stream, StreamExt};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
};
|
||||
use std::io;
|
||||
use std::net::TcpListener;
|
||||
use std::str;
|
||||
@@ -38,9 +32,10 @@ use utils::{
|
||||
|
||||
use crate::basebackup;
|
||||
use crate::config::{PageServerConf, ProfilingConfig};
|
||||
use crate::import_datadir::import_wal_from_tar;
|
||||
use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
|
||||
use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
|
||||
use crate::profiling::profpoint_start;
|
||||
use crate::reltag::RelTag;
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::Timeline;
|
||||
@@ -50,6 +45,163 @@ use crate::CheckpointConfig;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamExistsRequest {
|
||||
latest: bool,
|
||||
lsn: Lsn,
|
||||
rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamNblocksRequest {
|
||||
latest: bool,
|
||||
lsn: Lsn,
|
||||
rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamGetPageRequest {
|
||||
latest: bool,
|
||||
lsn: Lsn,
|
||||
rel: RelTag,
|
||||
blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamDbSizeRequest {
|
||||
latest: bool,
|
||||
lsn: Lsn,
|
||||
dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamExistsResponse {
|
||||
exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamNblocksResponse {
|
||||
n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamGetPageResponse {
|
||||
page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamErrorResponse {
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PagestreamDbSizeResponse {
|
||||
db_size: i64,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// TODO these gets can fail
|
||||
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.get_u8();
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
rel: RelTag {
|
||||
spcnode: body.get_u32(),
|
||||
dbnode: body.get_u32(),
|
||||
relnode: body.get_u32(),
|
||||
forknum: body.get_u8(),
|
||||
},
|
||||
blkno: body.get_u32(),
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
latest: body.get_u8() != 0,
|
||||
lsn: Lsn::from(body.get_u64()),
|
||||
dbnode: body.get_u32(),
|
||||
})),
|
||||
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(100); /* tag from pagestore_client.h */
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(101); /* tag from pagestore_client.h */
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(102); /* tag from pagestore_client.h */
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(103); /* tag from pagestore_client.h */
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(104); /* tag from pagestore_client.h */
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
|
||||
async_stream::try_stream! {
|
||||
loop {
|
||||
@@ -348,8 +500,11 @@ impl PageServerHandler {
|
||||
task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
|
||||
// Create empty timeline
|
||||
info!("creating new timeline");
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
|
||||
let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version)?;
|
||||
let timeline = tenant_mgr::get_tenant(tenant_id, true)?.create_empty_timeline(
|
||||
timeline_id,
|
||||
base_lsn,
|
||||
pg_version,
|
||||
)?;
|
||||
|
||||
// TODO mark timeline as not ready until it reaches end_lsn.
|
||||
// We might have some wal to import as well, and we should prevent compute
|
||||
@@ -372,8 +527,7 @@ impl PageServerHandler {
|
||||
// - use block_in_place()
|
||||
let mut copyin_stream = Box::pin(copyin_stream(pgb));
|
||||
let reader = SyncIoBridge::new(StreamReader::new(&mut copyin_stream));
|
||||
tokio::task::block_in_place(|| timeline.import_basebackup_from_tar(reader, base_lsn))?;
|
||||
timeline.initialize()?;
|
||||
tokio::task::block_in_place(|| import_basebackup_from_tar(&timeline, reader, base_lsn))?;
|
||||
|
||||
// Drain the rest of the Copy data
|
||||
let mut bytes_after_tar = 0;
|
||||
@@ -390,6 +544,12 @@ impl PageServerHandler {
|
||||
// It wouldn't work if base came from vanilla postgres though,
|
||||
// since we discard some log files.
|
||||
|
||||
// Flush data to disk, then upload to s3
|
||||
info!("flushing layers");
|
||||
timeline.checkpoint(CheckpointConfig::Flush)?;
|
||||
|
||||
timeline.launch_wal_receiver()?;
|
||||
|
||||
info!("done");
|
||||
Ok(())
|
||||
}
|
||||
@@ -908,8 +1068,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
|
||||
}
|
||||
|
||||
fn get_local_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> Result<Arc<Timeline>> {
|
||||
tenant_mgr::get_tenant(tenant_id, true)
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
|
||||
tenant_mgr::get_tenant(tenant_id, true).and_then(|tenant| tenant.get_timeline(timeline_id))
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
@@ -7,12 +7,12 @@
|
||||
//! Clarify that)
|
||||
//!
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::repository::*;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use anyhow::{bail, ensure, Result};
|
||||
use bytes::{Buf, Bytes};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, TimestampTz, TransactionId};
|
||||
@@ -1373,17 +1373,6 @@ fn is_rel_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0
|
||||
}
|
||||
|
||||
pub fn is_rel_fsm_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
pub fn is_rel_vm_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00
|
||||
&& key.field4 != 0
|
||||
&& key.field5 == VISIBILITYMAP_FORKNUM
|
||||
&& key.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x01 => {
|
||||
@@ -1414,9 +1403,7 @@ pub fn create_test_timeline(
|
||||
timeline_id: utils::id::TimelineId,
|
||||
pg_version: u32,
|
||||
) -> Result<std::sync::Arc<Timeline>> {
|
||||
let tline = tenant
|
||||
.create_empty_timeline(timeline_id, Lsn(8), pg_version)?
|
||||
.initialize()?;
|
||||
let tline = tenant.create_empty_timeline(timeline_id, Lsn(8), pg_version)?;
|
||||
let mut m = tline.begin_modification(Lsn(8));
|
||||
m.init_empty()?;
|
||||
m.commit()?;
|
||||
|
||||
@@ -171,7 +171,7 @@ use self::{
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
exponential_backoff,
|
||||
storage_sync::index::{LayerFileMetadata, RemoteIndex},
|
||||
storage_sync::index::RemoteIndex,
|
||||
task_mgr,
|
||||
task_mgr::TaskKind,
|
||||
task_mgr::BACKGROUND_RUNTIME,
|
||||
@@ -193,7 +193,7 @@ static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
|
||||
|
||||
/// A timeline status to share with pageserver's sync counterpart,
|
||||
/// after comparing local and remote timeline state.
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
#[derive(Clone)]
|
||||
pub enum LocalTimelineInitStatus {
|
||||
/// The timeline has every remote layer present locally.
|
||||
/// There could be some layers requiring uploading,
|
||||
@@ -316,7 +316,7 @@ impl SyncQueue {
|
||||
|
||||
/// A task to run in the async download/upload loop.
|
||||
/// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone)]
|
||||
enum SyncTask {
|
||||
/// A checkpoint outcome with possible local file updates that need actualization in the remote storage.
|
||||
/// Not necessary more fresh than the one already uploaded.
|
||||
@@ -427,7 +427,7 @@ impl SyncTaskBatch {
|
||||
.extend(new_delete.data.deleted_layers.iter().cloned());
|
||||
}
|
||||
if let Some(batch_upload) = &mut self.upload {
|
||||
let not_deleted = |layer: &PathBuf, _: &mut LayerFileMetadata| {
|
||||
let not_deleted = |layer: &PathBuf| {
|
||||
!new_delete.data.layers_to_delete.contains(layer)
|
||||
&& !new_delete.data.deleted_layers.contains(layer)
|
||||
};
|
||||
@@ -455,35 +455,21 @@ impl SyncTaskBatch {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct LayersUpload {
|
||||
/// Layer file path in the pageserver workdir, that were added for the corresponding checkpoint.
|
||||
layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
|
||||
layers_to_upload: HashSet<PathBuf>,
|
||||
/// Already uploaded layers. Used to store the data about the uploads between task retries
|
||||
/// and to record the data into the remote index after the task got completed or evicted.
|
||||
uploaded_layers: HashMap<PathBuf, LayerFileMetadata>,
|
||||
uploaded_layers: HashSet<PathBuf>,
|
||||
metadata: Option<TimelineMetadata>,
|
||||
}
|
||||
|
||||
/// A timeline download task.
|
||||
/// Does not contain the file list to download, to allow other
|
||||
/// parts of the pageserer code to schedule the task
|
||||
/// without using the remote index or any other ways to list the remote timeline files.
|
||||
/// without using the remote index or any other ways to list the remote timleine files.
|
||||
/// Skips the files that are already downloaded.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct LayersDownload {
|
||||
layers_to_skip: HashSet<PathBuf>,
|
||||
|
||||
/// Paths which have been downloaded, and had their metadata verified or generated.
|
||||
///
|
||||
/// Metadata generation happens when upgrading from past version of `IndexPart`.
|
||||
gathered_metadata: HashMap<PathBuf, LayerFileMetadata>,
|
||||
}
|
||||
|
||||
impl LayersDownload {
|
||||
fn from_skipped_layers(layers_to_skip: HashSet<PathBuf>) -> Self {
|
||||
LayersDownload {
|
||||
layers_to_skip,
|
||||
gathered_metadata: HashMap::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -505,7 +491,7 @@ struct LayersDeletion {
|
||||
pub fn schedule_layer_upload(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
|
||||
layers_to_upload: HashSet<PathBuf>,
|
||||
metadata: Option<TimelineMetadata>,
|
||||
) {
|
||||
let sync_queue = match SYNC_QUEUE.get() {
|
||||
@@ -522,7 +508,7 @@ pub fn schedule_layer_upload(
|
||||
},
|
||||
SyncTask::upload(LayersUpload {
|
||||
layers_to_upload,
|
||||
uploaded_layers: HashMap::new(),
|
||||
uploaded_layers: HashSet::new(),
|
||||
metadata,
|
||||
}),
|
||||
);
|
||||
@@ -580,44 +566,21 @@ pub fn schedule_layer_download(tenant_id: TenantId, timeline_id: TimelineId) {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
SyncTask::download(LayersDownload::from_skipped_layers(HashSet::new())),
|
||||
SyncTask::download(LayersDownload {
|
||||
layers_to_skip: HashSet::new(),
|
||||
}),
|
||||
);
|
||||
debug!("Download task for tenant {tenant_id}, timeline {timeline_id} sent")
|
||||
}
|
||||
|
||||
/// Local existing timeline files
|
||||
///
|
||||
/// Values of this type serve different meanings in different contexts. On startup, collected
|
||||
/// timelines come with the full collected information and when signalling readyness to attach
|
||||
/// after completed download. After the download the file information is no longer carried, because
|
||||
/// it is already merged into [`RemoteTimeline`].
|
||||
#[derive(Debug)]
|
||||
pub struct TimelineLocalFiles(TimelineMetadata, HashMap<PathBuf, LayerFileMetadata>);
|
||||
|
||||
impl TimelineLocalFiles {
|
||||
pub fn metadata(&self) -> &TimelineMetadata {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Called during startup, for all of the local files with full metadata.
|
||||
pub(crate) fn collected(
|
||||
metadata: TimelineMetadata,
|
||||
timeline_files: HashMap<PathBuf, LayerFileMetadata>,
|
||||
) -> TimelineLocalFiles {
|
||||
TimelineLocalFiles(metadata, timeline_files)
|
||||
}
|
||||
|
||||
/// Called near the end of tenant initialization, to signal readyness to attach tenants.
|
||||
pub(crate) fn ready(metadata: TimelineMetadata) -> Self {
|
||||
TimelineLocalFiles(metadata, HashMap::new())
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a thread to perform remote storage sync tasks.
|
||||
/// See module docs for loop step description.
|
||||
pub fn spawn_storage_sync_task(
|
||||
conf: &'static PageServerConf,
|
||||
local_timeline_files: HashMap<TenantId, HashMap<TimelineId, TimelineLocalFiles>>,
|
||||
local_timeline_files: HashMap<
|
||||
TenantId,
|
||||
HashMap<TimelineId, (TimelineMetadata, HashSet<PathBuf>)>,
|
||||
>,
|
||||
storage: GenericRemoteStorage,
|
||||
max_concurrent_timelines_sync: NonZeroUsize,
|
||||
max_sync_errors: NonZeroU32,
|
||||
@@ -775,7 +738,7 @@ async fn storage_sync_loop(
|
||||
tenant_entry
|
||||
.iter()
|
||||
.map(|(&id, entry)| {
|
||||
(id, TimelineLocalFiles::ready(entry.metadata.clone()))
|
||||
(id, (entry.metadata.clone(), HashSet::new()))
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
@@ -1020,27 +983,15 @@ async fn download_timeline_data(
|
||||
}
|
||||
DownloadedTimeline::Successful(mut download_data) => {
|
||||
match update_local_metadata(conf, sync_id, current_remote_timeline).await {
|
||||
Ok(()) => {
|
||||
let mut g = index.write().await;
|
||||
|
||||
match g.set_awaits_download(&sync_id, false) {
|
||||
Ok(()) => {
|
||||
let timeline = g
|
||||
.timeline_entry_mut(&sync_id)
|
||||
.expect("set_awaits_download verified existence");
|
||||
|
||||
timeline.merge_metadata_from_downloaded(
|
||||
&download_data.data.gathered_metadata,
|
||||
);
|
||||
|
||||
register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
|
||||
return DownloadStatus::Downloaded;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
|
||||
Ok(()) => {
|
||||
register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
|
||||
return DownloadStatus::Downloaded;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!("Failed to update local timeline metadata: {e:?}");
|
||||
download_data.retries += 1;
|
||||
@@ -1243,18 +1194,11 @@ async fn update_remote_data(
|
||||
}
|
||||
if upload_failed {
|
||||
existing_entry.add_upload_failures(
|
||||
uploaded_data
|
||||
.layers_to_upload
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
uploaded_data.layers_to_upload.iter().cloned(),
|
||||
);
|
||||
} else {
|
||||
existing_entry.add_timeline_layers(
|
||||
uploaded_data
|
||||
.uploaded_layers
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
);
|
||||
existing_entry
|
||||
.add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
|
||||
}
|
||||
}
|
||||
RemoteDataUpdate::Delete(layers_to_remove) => {
|
||||
@@ -1274,19 +1218,11 @@ async fn update_remote_data(
|
||||
};
|
||||
let mut new_remote_timeline = RemoteTimeline::new(new_metadata.clone());
|
||||
if upload_failed {
|
||||
new_remote_timeline.add_upload_failures(
|
||||
uploaded_data
|
||||
.layers_to_upload
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
);
|
||||
new_remote_timeline
|
||||
.add_upload_failures(uploaded_data.layers_to_upload.iter().cloned());
|
||||
} else {
|
||||
new_remote_timeline.add_timeline_layers(
|
||||
uploaded_data
|
||||
.uploaded_layers
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_owned(), v.to_owned())),
|
||||
);
|
||||
new_remote_timeline
|
||||
.add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
|
||||
}
|
||||
|
||||
index_accessor.add_timeline_entry(sync_id, new_remote_timeline.clone());
|
||||
@@ -1334,14 +1270,13 @@ async fn validate_task_retries(
|
||||
fn schedule_first_sync_tasks(
|
||||
index: &mut RemoteTimelineIndex,
|
||||
sync_queue: &SyncQueue,
|
||||
local_timeline_files: HashMap<TenantTimelineId, TimelineLocalFiles>,
|
||||
local_timeline_files: HashMap<TenantTimelineId, (TimelineMetadata, HashSet<PathBuf>)>,
|
||||
) -> TenantTimelineValues<LocalTimelineInitStatus> {
|
||||
let mut local_timeline_init_statuses = TenantTimelineValues::new();
|
||||
|
||||
let mut new_sync_tasks = VecDeque::with_capacity(local_timeline_files.len());
|
||||
|
||||
for (sync_id, local_timeline) in local_timeline_files {
|
||||
let TimelineLocalFiles(local_metadata, local_files) = local_timeline;
|
||||
for (sync_id, (local_metadata, local_files)) in local_timeline_files {
|
||||
match index.timeline_entry_mut(&sync_id) {
|
||||
Some(remote_timeline) => {
|
||||
let (timeline_status, awaits_download) = compare_local_and_remote_timeline(
|
||||
@@ -1385,7 +1320,7 @@ fn schedule_first_sync_tasks(
|
||||
sync_id,
|
||||
SyncTask::upload(LayersUpload {
|
||||
layers_to_upload: local_files,
|
||||
uploaded_layers: HashMap::new(),
|
||||
uploaded_layers: HashSet::new(),
|
||||
metadata: Some(local_metadata.clone()),
|
||||
}),
|
||||
));
|
||||
@@ -1412,46 +1347,20 @@ fn compare_local_and_remote_timeline(
|
||||
new_sync_tasks: &mut VecDeque<(TenantTimelineId, SyncTask)>,
|
||||
sync_id: TenantTimelineId,
|
||||
local_metadata: TimelineMetadata,
|
||||
local_files: HashMap<PathBuf, LayerFileMetadata>,
|
||||
local_files: HashSet<PathBuf>,
|
||||
remote_entry: &RemoteTimeline,
|
||||
) -> (LocalTimelineInitStatus, bool) {
|
||||
let _entered = info_span!("compare_local_and_remote_timeline", sync_id = %sync_id).entered();
|
||||
|
||||
let needed_to_download_files = remote_entry
|
||||
.stored_files()
|
||||
.iter()
|
||||
.filter_map(|(layer_file, remote_metadata)| {
|
||||
if let Some(local_metadata) = local_files.get(layer_file) {
|
||||
match (remote_metadata.file_size(), local_metadata.file_size()) {
|
||||
(Some(x), Some(y)) if x == y => { None },
|
||||
(None, Some(_)) => {
|
||||
// upgrading from an earlier IndexPart without metadata
|
||||
None
|
||||
},
|
||||
_ => {
|
||||
// having to deal with other than (Some(x), Some(y)) where x != y here is a
|
||||
// bummer, but see #2582 and #2610 for attempts and discussion.
|
||||
warn!("Redownloading locally existing {layer_file:?} due to size mismatch, size on index: {:?}, on disk: {:?}", remote_metadata.file_size(), local_metadata.file_size());
|
||||
Some(layer_file)
|
||||
},
|
||||
}
|
||||
} else {
|
||||
// doesn't exist locally
|
||||
Some(layer_file)
|
||||
}
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
let remote_files = remote_entry.stored_files();
|
||||
|
||||
let (initial_timeline_status, awaits_download) = if !needed_to_download_files.is_empty() {
|
||||
let number_of_layers_to_download = remote_files.difference(&local_files).count();
|
||||
let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
|
||||
new_sync_tasks.push_back((
|
||||
sync_id,
|
||||
SyncTask::download(LayersDownload::from_skipped_layers(
|
||||
local_files
|
||||
.keys()
|
||||
.filter(|path| !needed_to_download_files.contains(path))
|
||||
.cloned()
|
||||
.collect(),
|
||||
)),
|
||||
SyncTask::download(LayersDownload {
|
||||
layers_to_skip: local_files.clone(),
|
||||
}),
|
||||
));
|
||||
info!("NeedsSync");
|
||||
(LocalTimelineInitStatus::NeedsSync, true)
|
||||
@@ -1466,22 +1375,15 @@ fn compare_local_and_remote_timeline(
|
||||
};
|
||||
|
||||
let layers_to_upload = local_files
|
||||
.iter()
|
||||
.filter_map(|(local_file, metadata)| {
|
||||
if !remote_entry.stored_files().contains_key(local_file) {
|
||||
Some((local_file.to_owned(), metadata.to_owned()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
.difference(remote_files)
|
||||
.cloned()
|
||||
.collect::<HashSet<_>>();
|
||||
if !layers_to_upload.is_empty() {
|
||||
new_sync_tasks.push_back((
|
||||
sync_id,
|
||||
SyncTask::upload(LayersUpload {
|
||||
layers_to_upload,
|
||||
uploaded_layers: HashMap::new(),
|
||||
uploaded_layers: HashSet::new(),
|
||||
metadata: Some(local_metadata),
|
||||
}),
|
||||
));
|
||||
@@ -1537,12 +1439,11 @@ mod test_utils {
|
||||
let timeline_path = harness.timeline_path(&timeline_id);
|
||||
fs::create_dir_all(&timeline_path).await?;
|
||||
|
||||
let mut layers_to_upload = HashMap::with_capacity(filenames.len());
|
||||
let mut layers_to_upload = HashSet::with_capacity(filenames.len());
|
||||
for &file in filenames {
|
||||
let file_path = timeline_path.join(file);
|
||||
fs::write(&file_path, dummy_contents(file).into_bytes()).await?;
|
||||
let metadata = LayerFileMetadata::new(file_path.metadata()?.len());
|
||||
layers_to_upload.insert(file_path, metadata);
|
||||
layers_to_upload.insert(file_path);
|
||||
}
|
||||
|
||||
fs::write(
|
||||
@@ -1553,7 +1454,7 @@ mod test_utils {
|
||||
|
||||
Ok(LayersUpload {
|
||||
layers_to_upload,
|
||||
uploaded_layers: HashMap::new(),
|
||||
uploaded_layers: HashSet::new(),
|
||||
metadata: Some(metadata),
|
||||
})
|
||||
}
|
||||
@@ -1608,13 +1509,12 @@ mod tests {
|
||||
assert!(sync_id_2 != sync_id_3);
|
||||
assert!(sync_id_3 != TEST_SYNC_ID);
|
||||
|
||||
let download_task =
|
||||
SyncTask::download(LayersDownload::from_skipped_layers(HashSet::from([
|
||||
PathBuf::from("sk"),
|
||||
])));
|
||||
let download_task = SyncTask::download(LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk")]),
|
||||
});
|
||||
let upload_task = SyncTask::upload(LayersUpload {
|
||||
layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
|
||||
uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
|
||||
layers_to_upload: HashSet::from([PathBuf::from("up")]),
|
||||
uploaded_layers: HashSet::from([PathBuf::from("upl")]),
|
||||
metadata: Some(dummy_metadata(Lsn(2))),
|
||||
});
|
||||
let delete_task = SyncTask::delete(LayersDeletion {
|
||||
@@ -1658,10 +1558,12 @@ mod tests {
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
assert_eq!(sync_queue.len(), 0);
|
||||
|
||||
let download = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk")]));
|
||||
let download = LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk")]),
|
||||
};
|
||||
let upload = LayersUpload {
|
||||
layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
|
||||
uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
|
||||
layers_to_upload: HashSet::from([PathBuf::from("up")]),
|
||||
uploaded_layers: HashSet::from([PathBuf::from("upl")]),
|
||||
metadata: Some(dummy_metadata(Lsn(2))),
|
||||
};
|
||||
let delete = LayersDeletion {
|
||||
@@ -1709,10 +1611,18 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn same_task_id_same_tasks_batch() {
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(1).unwrap());
|
||||
let download_1 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk1")]));
|
||||
let download_2 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk2")]));
|
||||
let download_3 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk3")]));
|
||||
let download_4 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk4")]));
|
||||
let download_1 = LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk1")]),
|
||||
};
|
||||
let download_2 = LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk2")]),
|
||||
};
|
||||
let download_3 = LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk3")]),
|
||||
};
|
||||
let download_4 = LayersDownload {
|
||||
layers_to_skip: HashSet::from([PathBuf::from("sk4")]),
|
||||
};
|
||||
|
||||
let sync_id_2 = TenantTimelineId {
|
||||
tenant_id: TenantId::from_array(hex!("22223344556677881122334455667788")),
|
||||
@@ -1736,15 +1646,15 @@ mod tests {
|
||||
Some(SyncTaskBatch {
|
||||
download: Some(SyncData {
|
||||
retries: 0,
|
||||
data: LayersDownload::from_skipped_layers(
|
||||
{
|
||||
data: LayersDownload {
|
||||
layers_to_skip: {
|
||||
let mut set = HashSet::new();
|
||||
set.extend(download_1.layers_to_skip.into_iter());
|
||||
set.extend(download_2.layers_to_skip.into_iter());
|
||||
set.extend(download_4.layers_to_skip.into_iter());
|
||||
set
|
||||
},
|
||||
)
|
||||
}
|
||||
}),
|
||||
upload: None,
|
||||
delete: None,
|
||||
@@ -1760,148 +1670,4 @@ mod tests {
|
||||
"Should have one task left out of the batch"
|
||||
);
|
||||
}
|
||||
|
||||
mod local_and_remote_comparisons {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn ready() {
|
||||
let mut new_sync_tasks = VecDeque::default();
|
||||
let sync_id = TenantTimelineId::generate();
|
||||
let local_metadata = dummy_metadata(0x02.into());
|
||||
let local_files =
|
||||
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
|
||||
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
|
||||
remote_entry
|
||||
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
|
||||
|
||||
let (status, sync_needed) = compare_local_and_remote_timeline(
|
||||
&mut new_sync_tasks,
|
||||
sync_id,
|
||||
local_metadata.clone(),
|
||||
local_files,
|
||||
&remote_entry,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
status,
|
||||
LocalTimelineInitStatus::LocallyComplete(local_metadata)
|
||||
);
|
||||
assert!(!sync_needed);
|
||||
|
||||
assert!(new_sync_tasks.is_empty(), "{:?}", new_sync_tasks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn needs_download() {
|
||||
let mut new_sync_tasks = VecDeque::default();
|
||||
let sync_id = TenantTimelineId::generate();
|
||||
let local_metadata = dummy_metadata(0x02.into());
|
||||
let local_files = HashMap::default();
|
||||
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
|
||||
remote_entry
|
||||
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
|
||||
|
||||
let (status, sync_needed) = compare_local_and_remote_timeline(
|
||||
&mut new_sync_tasks,
|
||||
sync_id,
|
||||
local_metadata,
|
||||
local_files.clone(),
|
||||
&remote_entry,
|
||||
);
|
||||
|
||||
assert_eq!(status, LocalTimelineInitStatus::NeedsSync);
|
||||
assert!(sync_needed);
|
||||
|
||||
let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
&new_sync_tasks,
|
||||
&[(
|
||||
sync_id,
|
||||
SyncTask::download(LayersDownload::from_skipped_layers(
|
||||
local_files.keys().cloned().collect()
|
||||
))
|
||||
)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redownload_is_not_needed_on_upgrade() {
|
||||
// originally the implementation missed the `(None, Some(_))` case in the match, and
|
||||
// proceeded to always redownload if the remote metadata was not available.
|
||||
|
||||
let mut new_sync_tasks = VecDeque::default();
|
||||
let sync_id = TenantTimelineId::generate();
|
||||
|
||||
let local_metadata = dummy_metadata(0x02.into());
|
||||
|
||||
// type system would in general allow that LayerFileMetadata would be created with
|
||||
// file_size: None, however `LayerFileMetadata::default` is only allowed from tests,
|
||||
// and so everywhere within the system valid LayerFileMetadata is being created, it is
|
||||
// created through `::new`.
|
||||
let local_files =
|
||||
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
|
||||
|
||||
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
|
||||
|
||||
// RemoteTimeline is constructed out of an older version IndexPart, which didn't carry
|
||||
// any metadata.
|
||||
remote_entry
|
||||
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::default())]);
|
||||
|
||||
let (status, sync_needed) = compare_local_and_remote_timeline(
|
||||
&mut new_sync_tasks,
|
||||
sync_id,
|
||||
local_metadata.clone(),
|
||||
local_files,
|
||||
&remote_entry,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
status,
|
||||
LocalTimelineInitStatus::LocallyComplete(local_metadata)
|
||||
);
|
||||
assert!(!sync_needed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn needs_upload() {
|
||||
let mut new_sync_tasks = VecDeque::default();
|
||||
let sync_id = TenantTimelineId::generate();
|
||||
let local_metadata = dummy_metadata(0x02.into());
|
||||
let local_files =
|
||||
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
|
||||
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
|
||||
remote_entry.add_timeline_layers([]);
|
||||
|
||||
let (status, sync_needed) = compare_local_and_remote_timeline(
|
||||
&mut new_sync_tasks,
|
||||
sync_id,
|
||||
local_metadata.clone(),
|
||||
local_files.clone(),
|
||||
&remote_entry,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
status,
|
||||
LocalTimelineInitStatus::LocallyComplete(local_metadata.clone())
|
||||
);
|
||||
assert!(!sync_needed);
|
||||
|
||||
let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
&new_sync_tasks,
|
||||
&[(
|
||||
sync_id,
|
||||
SyncTask::upload(LayersUpload {
|
||||
layers_to_upload: local_files,
|
||||
uploaded_layers: HashMap::default(),
|
||||
metadata: Some(local_metadata),
|
||||
})
|
||||
)]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,7 +171,7 @@ mod tests {
|
||||
let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
let timeline_upload =
|
||||
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
|
||||
for (local_path, _metadata) in timeline_upload.layers_to_upload {
|
||||
for local_path in timeline_upload.layers_to_upload {
|
||||
let remote_path =
|
||||
local_storage.resolve_in_storage(&local_storage.remote_object_id(&local_path)?)?;
|
||||
let remote_parent_dir = remote_path.parent().unwrap();
|
||||
|
||||
@@ -16,13 +16,9 @@ use tokio::{
|
||||
};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
storage_sync::{index::LayerFileMetadata, SyncTask},
|
||||
TEMP_FILE_SUFFIX,
|
||||
};
|
||||
use crate::{config::PageServerConf, storage_sync::SyncTask, TEMP_FILE_SUFFIX};
|
||||
use utils::{
|
||||
crashsafe::path_with_suffix_extension,
|
||||
crashsafe_dir::path_with_suffix_extension,
|
||||
id::{TenantId, TenantTimelineId, TimelineId},
|
||||
};
|
||||
|
||||
@@ -223,14 +219,8 @@ pub(super) async fn download_timeline_layers<'a>(
|
||||
|
||||
let layers_to_download = remote_timeline
|
||||
.stored_files()
|
||||
.iter()
|
||||
.filter_map(|(layer_path, metadata)| {
|
||||
if !download.layers_to_skip.contains(layer_path) {
|
||||
Some((layer_path.to_owned(), metadata.to_owned()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.difference(&download.layers_to_skip)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
debug!("Layers to download: {layers_to_download:?}");
|
||||
@@ -243,129 +233,89 @@ pub(super) async fn download_timeline_layers<'a>(
|
||||
|
||||
let mut download_tasks = layers_to_download
|
||||
.into_iter()
|
||||
.map(|(layer_destination_path, metadata)| async move {
|
||||
.map(|layer_destination_path| async move {
|
||||
if layer_destination_path.exists() {
|
||||
debug!(
|
||||
"Layer already exists locally, skipping download: {}",
|
||||
layer_destination_path.display()
|
||||
);
|
||||
} else {
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
// write(tmp)
|
||||
// fsync(tmp)
|
||||
// rename(tmp, new)
|
||||
// fsync(new)
|
||||
// fsync(parent)
|
||||
// For more context about durable_rename check this email from postgres mailing list:
|
||||
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
|
||||
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
|
||||
let temp_file_path =
|
||||
path_with_suffix_extension(&layer_destination_path, TEMP_FILE_SUFFIX);
|
||||
|
||||
match layer_destination_path.metadata() {
|
||||
Ok(m) if m.is_file() => {
|
||||
// the file exists from earlier round when we failed after renaming it as
|
||||
// layer_destination_path
|
||||
let verified = if let Some(expected) = metadata.file_size() {
|
||||
m.len() == expected
|
||||
} else {
|
||||
// behaviour before recording metadata was to accept any existing
|
||||
true
|
||||
};
|
||||
let mut destination_file =
|
||||
fs::File::create(&temp_file_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create a destination file for layer '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
if verified {
|
||||
debug!(
|
||||
"Layer already exists locally, skipping download: {}",
|
||||
layer_destination_path.display()
|
||||
);
|
||||
return Ok((layer_destination_path, LayerFileMetadata::new(m.len())))
|
||||
} else {
|
||||
// no need to remove it, it will be overwritten by fs::rename
|
||||
// after successful download
|
||||
warn!("Downloaded layer exists already but layer file metadata mismatches: {}, metadata {:?}", layer_destination_path.display(), metadata);
|
||||
}
|
||||
}
|
||||
Ok(m) => {
|
||||
return Err(anyhow::anyhow!("Downloaded layer destination exists but is not a file: {m:?}, target needs to be removed/archived manually: {layer_destination_path:?}"));
|
||||
}
|
||||
Err(_) => {
|
||||
// behave as the file didn't exist
|
||||
}
|
||||
let mut layer_download = storage.download_storage_object(None, &layer_destination_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to initiate the download the layer for {sync_id} into file '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
io::copy(&mut layer_download.download_stream, &mut destination_file)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to download the layer for {sync_id} into file '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
// A file will not be closed immediately when it goes out of scope if there are any IO operations
|
||||
// that have not yet completed. To ensure that a file is closed immediately when it is dropped,
|
||||
// you should call flush before dropping it.
|
||||
//
|
||||
// From the tokio code I see that it waits for pending operations to complete. There shouldn't be any because
|
||||
// we assume that `destination_file` file is fully written. I.e there is no pending .write(...).await operations.
|
||||
// But for additional safety let's check/wait for any pending operations.
|
||||
destination_file.flush().await.with_context(|| {
|
||||
format!(
|
||||
"failed to flush source file at {}",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// not using sync_data because it can lose file size update
|
||||
destination_file.sync_all().await.with_context(|| {
|
||||
format!(
|
||||
"failed to fsync source file at {}",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
drop(destination_file);
|
||||
|
||||
fail::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
anyhow::bail!("remote-storage-download-pre-rename failpoint triggered")
|
||||
});
|
||||
|
||||
fs::rename(&temp_file_path, &layer_destination_path).await?;
|
||||
|
||||
fsync_path(&layer_destination_path).await.with_context(|| {
|
||||
format!(
|
||||
"Cannot fsync layer destination path {}",
|
||||
layer_destination_path.display(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
// write(tmp)
|
||||
// fsync(tmp)
|
||||
// rename(tmp, new)
|
||||
// fsync(new)
|
||||
// fsync(parent)
|
||||
// For more context about durable_rename check this email from postgres mailing list:
|
||||
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
|
||||
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
|
||||
let temp_file_path =
|
||||
path_with_suffix_extension(&layer_destination_path, TEMP_FILE_SUFFIX);
|
||||
|
||||
// TODO: this doesn't use the cached fd for some reason?
|
||||
let mut destination_file =
|
||||
fs::File::create(&temp_file_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create a destination file for layer '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut layer_download = storage.download_storage_object(None, &layer_destination_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to initiate the download the layer for {sync_id} into file '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let bytes_amount = io::copy(&mut layer_download.download_stream, &mut destination_file)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to download the layer for {sync_id} into file '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
// A file will not be closed immediately when it goes out of scope if there are any IO operations
|
||||
// that have not yet completed. To ensure that a file is closed immediately when it is dropped,
|
||||
// you should call flush before dropping it.
|
||||
//
|
||||
// From the tokio code I see that it waits for pending operations to complete. There shouldn't be any because
|
||||
// we assume that `destination_file` file is fully written. I.e there is no pending .write(...).await operations.
|
||||
// But for additional safety let's check/wait for any pending operations.
|
||||
destination_file.flush().await.with_context(|| {
|
||||
format!(
|
||||
"failed to flush source file at {}",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
match metadata.file_size() {
|
||||
Some(expected) if expected != bytes_amount => {
|
||||
anyhow::bail!(
|
||||
"According to layer file metadata should had downloaded {expected} bytes but downloaded {bytes_amount} bytes into file '{}'",
|
||||
temp_file_path.display()
|
||||
);
|
||||
},
|
||||
Some(_) | None => {
|
||||
// matches, or upgrading from an earlier IndexPart version
|
||||
}
|
||||
}
|
||||
|
||||
// not using sync_data because it can lose file size update
|
||||
destination_file.sync_all().await.with_context(|| {
|
||||
format!(
|
||||
"failed to fsync source file at {}",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
drop(destination_file);
|
||||
|
||||
fail::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
anyhow::bail!("remote-storage-download-pre-rename failpoint triggered")
|
||||
});
|
||||
|
||||
fs::rename(&temp_file_path, &layer_destination_path).await?;
|
||||
|
||||
fsync_path(&layer_destination_path).await.with_context(|| {
|
||||
format!(
|
||||
"Cannot fsync layer destination path {}",
|
||||
layer_destination_path.display(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok::<_, anyhow::Error>((layer_destination_path, LayerFileMetadata::new(bytes_amount)))
|
||||
Ok::<_, anyhow::Error>(layer_destination_path)
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
@@ -374,12 +324,9 @@ pub(super) async fn download_timeline_layers<'a>(
|
||||
let mut undo = HashSet::new();
|
||||
while let Some(download_result) = download_tasks.next().await {
|
||||
match download_result {
|
||||
Ok((downloaded_path, metadata)) => {
|
||||
Ok(downloaded_path) => {
|
||||
undo.insert(downloaded_path.clone());
|
||||
download.layers_to_skip.insert(downloaded_path.clone());
|
||||
// what if the key existed already? ignore, because then we would had
|
||||
// downloaded a partial file, and had to retry
|
||||
download.gathered_metadata.insert(downloaded_path, metadata);
|
||||
download.layers_to_skip.insert(downloaded_path);
|
||||
}
|
||||
Err(e) => {
|
||||
errors_happened = true;
|
||||
@@ -402,8 +349,6 @@ pub(super) async fn download_timeline_layers<'a>(
|
||||
);
|
||||
for item in undo {
|
||||
download.layers_to_skip.remove(&item);
|
||||
// intentionally don't clear the gathered_metadata because it exists for fsync_path
|
||||
// failure on parent directory
|
||||
}
|
||||
errors_happened = true;
|
||||
}
|
||||
@@ -508,9 +453,9 @@ mod tests {
|
||||
let timeline_upload =
|
||||
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
|
||||
|
||||
for local_path in timeline_upload.layers_to_upload.keys() {
|
||||
for local_path in timeline_upload.layers_to_upload {
|
||||
let remote_path =
|
||||
local_storage.resolve_in_storage(&storage.remote_object_id(local_path)?)?;
|
||||
local_storage.resolve_in_storage(&storage.remote_object_id(&local_path)?)?;
|
||||
let remote_parent_dir = remote_path.parent().unwrap();
|
||||
if !remote_parent_dir.exists() {
|
||||
fs::create_dir_all(&remote_parent_dir).await?;
|
||||
@@ -528,19 +473,11 @@ mod tests {
|
||||
|
||||
let mut remote_timeline = RemoteTimeline::new(metadata.clone());
|
||||
remote_timeline.awaits_download = true;
|
||||
remote_timeline.add_timeline_layers(layer_files.iter().map(|layer| {
|
||||
let layer_path = local_timeline_path.join(layer);
|
||||
|
||||
// this could had also been LayerFileMetadata::default(), but since in this test we
|
||||
// don't do the merge operation done by storage_sync::download_timeline_data, it would
|
||||
// not be merged back to timeline.
|
||||
let metadata_from_upload = timeline_upload
|
||||
.layers_to_upload
|
||||
.get(&layer_path)
|
||||
.expect("layer must exist in previously uploaded paths")
|
||||
.to_owned();
|
||||
(layer_path, metadata_from_upload)
|
||||
}));
|
||||
remote_timeline.add_timeline_layers(
|
||||
layer_files
|
||||
.iter()
|
||||
.map(|layer| local_timeline_path.join(layer)),
|
||||
);
|
||||
|
||||
let download_data = match download_timeline_layers(
|
||||
harness.conf,
|
||||
@@ -550,9 +487,9 @@ mod tests {
|
||||
sync_id,
|
||||
SyncData::new(
|
||||
current_retries,
|
||||
LayersDownload::from_skipped_layers(HashSet::from([
|
||||
local_timeline_path.join("layer_to_skip")
|
||||
])),
|
||||
LayersDownload {
|
||||
layers_to_skip: HashSet::from([local_timeline_path.join("layer_to_skip")]),
|
||||
},
|
||||
),
|
||||
)
|
||||
.await
|
||||
@@ -615,7 +552,12 @@ mod tests {
|
||||
&sync_queue,
|
||||
None,
|
||||
sync_id,
|
||||
SyncData::new(0, LayersDownload::from_skipped_layers(HashSet::new())),
|
||||
SyncData::new(
|
||||
0,
|
||||
LayersDownload {
|
||||
layers_to_skip: HashSet::new(),
|
||||
},
|
||||
),
|
||||
)
|
||||
.await;
|
||||
assert!(
|
||||
@@ -634,7 +576,12 @@ mod tests {
|
||||
&sync_queue,
|
||||
Some(¬_expecting_download_remote_timeline),
|
||||
sync_id,
|
||||
SyncData::new(0, LayersDownload::from_skipped_layers(HashSet::new())),
|
||||
SyncData::new(
|
||||
0,
|
||||
LayersDownload {
|
||||
layers_to_skip: HashSet::new(),
|
||||
},
|
||||
),
|
||||
)
|
||||
.await;
|
||||
assert!(
|
||||
|
||||
@@ -212,8 +212,8 @@ impl RemoteTimelineIndex {
|
||||
/// Restored index part data about the timeline, stored in the remote index.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoteTimeline {
|
||||
timeline_layers: HashMap<PathBuf, LayerFileMetadata>,
|
||||
missing_layers: HashMap<PathBuf, LayerFileMetadata>,
|
||||
timeline_layers: HashSet<PathBuf>,
|
||||
missing_layers: HashSet<PathBuf>,
|
||||
|
||||
pub metadata: TimelineMetadata,
|
||||
pub awaits_download: bool,
|
||||
@@ -222,161 +222,62 @@ pub struct RemoteTimeline {
|
||||
impl RemoteTimeline {
|
||||
pub fn new(metadata: TimelineMetadata) -> Self {
|
||||
Self {
|
||||
timeline_layers: HashMap::default(),
|
||||
missing_layers: HashMap::default(),
|
||||
timeline_layers: HashSet::new(),
|
||||
missing_layers: HashSet::new(),
|
||||
metadata,
|
||||
awaits_download: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_timeline_layers(
|
||||
&mut self,
|
||||
new_layers: impl IntoIterator<Item = (PathBuf, LayerFileMetadata)>,
|
||||
) {
|
||||
self.timeline_layers.extend(new_layers);
|
||||
pub fn add_timeline_layers(&mut self, new_layers: impl IntoIterator<Item = PathBuf>) {
|
||||
self.timeline_layers.extend(new_layers.into_iter());
|
||||
}
|
||||
|
||||
pub fn add_upload_failures(
|
||||
&mut self,
|
||||
upload_failures: impl IntoIterator<Item = (PathBuf, LayerFileMetadata)>,
|
||||
) {
|
||||
self.missing_layers.extend(upload_failures);
|
||||
pub fn add_upload_failures(&mut self, upload_failures: impl IntoIterator<Item = PathBuf>) {
|
||||
self.missing_layers.extend(upload_failures.into_iter());
|
||||
}
|
||||
|
||||
pub fn remove_layers(&mut self, layers_to_remove: &HashSet<PathBuf>) {
|
||||
self.timeline_layers
|
||||
.retain(|layer, _| !layers_to_remove.contains(layer));
|
||||
.retain(|layer| !layers_to_remove.contains(layer));
|
||||
self.missing_layers
|
||||
.retain(|layer, _| !layers_to_remove.contains(layer));
|
||||
.retain(|layer| !layers_to_remove.contains(layer));
|
||||
}
|
||||
|
||||
/// Lists all layer files in the given remote timeline. Omits the metadata file.
|
||||
pub fn stored_files(&self) -> &HashMap<PathBuf, LayerFileMetadata> {
|
||||
pub fn stored_files(&self) -> &HashSet<PathBuf> {
|
||||
&self.timeline_layers
|
||||
}
|
||||
|
||||
/// Combines metadata gathered or verified during downloading needed layer files to metadata on
|
||||
/// the [`RemoteIndex`], so it can be uploaded later.
|
||||
pub fn merge_metadata_from_downloaded(
|
||||
&mut self,
|
||||
downloaded: &HashMap<PathBuf, LayerFileMetadata>,
|
||||
) {
|
||||
downloaded.iter().for_each(|(path, metadata)| {
|
||||
if let Some(upgraded) = self.timeline_layers.get_mut(path) {
|
||||
upgraded.merge(metadata);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
pub fn from_index_part(timeline_path: &Path, index_part: IndexPart) -> anyhow::Result<Self> {
|
||||
let metadata = TimelineMetadata::from_bytes(&index_part.metadata_bytes)?;
|
||||
let default_metadata = &IndexLayerMetadata::default();
|
||||
|
||||
let find_metadata = |key: &RelativePath| -> LayerFileMetadata {
|
||||
index_part
|
||||
.layer_metadata
|
||||
.get(key)
|
||||
.unwrap_or(default_metadata)
|
||||
.into()
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
timeline_layers: index_part
|
||||
.timeline_layers
|
||||
.iter()
|
||||
.map(|layer_path| (layer_path.as_path(timeline_path), find_metadata(layer_path)))
|
||||
.collect(),
|
||||
missing_layers: index_part
|
||||
.missing_layers
|
||||
.iter()
|
||||
.map(|layer_path| (layer_path.as_path(timeline_path), find_metadata(layer_path)))
|
||||
.collect(),
|
||||
timeline_layers: to_local_paths(timeline_path, index_part.timeline_layers),
|
||||
missing_layers: to_local_paths(timeline_path, index_part.missing_layers),
|
||||
metadata,
|
||||
awaits_download: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
///
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
pub struct LayerFileMetadata {
|
||||
file_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
|
||||
fn from(other: &IndexLayerMetadata) -> Self {
|
||||
LayerFileMetadata {
|
||||
file_size: other.file_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LayerFileMetadata {
|
||||
pub fn new(file_size: u64) -> Self {
|
||||
LayerFileMetadata {
|
||||
file_size: Some(file_size),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file_size(&self) -> Option<u64> {
|
||||
self.file_size
|
||||
}
|
||||
|
||||
/// Metadata has holes due to version upgrades. This method is called to upgrade self with the
|
||||
/// other value.
|
||||
///
|
||||
/// This is called on the possibly outdated version.
|
||||
pub fn merge(&mut self, other: &Self) {
|
||||
self.file_size = other.file_size.or(self.file_size);
|
||||
}
|
||||
}
|
||||
|
||||
/// Part of the remote index, corresponding to a certain timeline.
|
||||
/// Contains the data about all files in the timeline, present remotely and its metadata.
|
||||
///
|
||||
/// This type needs to be backwards and forwards compatible. When changing the fields,
|
||||
/// remember to add a test case for the changed version.
|
||||
#[serde_as]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexPart {
|
||||
/// Debugging aid describing the version of this type.
|
||||
#[serde(default)]
|
||||
version: usize,
|
||||
|
||||
/// Each of the layers present on remote storage.
|
||||
///
|
||||
/// Additional metadata can might exist in `layer_metadata`.
|
||||
timeline_layers: HashSet<RelativePath>,
|
||||
|
||||
/// Currently is not really used in pageserver,
|
||||
/// present to manually keep track of the layer files that pageserver might never retrieve.
|
||||
///
|
||||
/// Such "holes" might appear if any upload task was evicted on an error threshold:
|
||||
/// the this layer will only be rescheduled for upload on pageserver restart.
|
||||
missing_layers: HashSet<RelativePath>,
|
||||
|
||||
/// Per layer file metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
/// that latest version stores.
|
||||
#[serde(default)]
|
||||
layer_metadata: HashMap<RelativePath, IndexLayerMetadata>,
|
||||
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata_bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl IndexPart {
|
||||
/// When adding or modifying any parts of `IndexPart`, increment the version so that it can be
|
||||
/// used to understand later versions.
|
||||
///
|
||||
/// Version is currently informative only.
|
||||
const LATEST_VERSION: usize = 1;
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -387,10 +288,8 @@ impl IndexPart {
|
||||
metadata_bytes: Vec<u8>,
|
||||
) -> Self {
|
||||
Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
timeline_layers,
|
||||
missing_layers,
|
||||
layer_metadata: HashMap::default(),
|
||||
disk_consistent_lsn,
|
||||
metadata_bytes,
|
||||
}
|
||||
@@ -405,68 +304,35 @@ impl IndexPart {
|
||||
remote_timeline: RemoteTimeline,
|
||||
) -> anyhow::Result<Self> {
|
||||
let metadata_bytes = remote_timeline.metadata.to_bytes()?;
|
||||
|
||||
let mut layer_metadata = HashMap::new();
|
||||
|
||||
let mut missing_layers = HashSet::new();
|
||||
|
||||
separate_paths_and_metadata(
|
||||
timeline_path,
|
||||
&remote_timeline.missing_layers,
|
||||
&mut missing_layers,
|
||||
&mut layer_metadata,
|
||||
)
|
||||
.context("Failed to convert missing layers' paths to relative ones")?;
|
||||
|
||||
let mut timeline_layers = HashSet::new();
|
||||
|
||||
separate_paths_and_metadata(
|
||||
timeline_path,
|
||||
&remote_timeline.timeline_layers,
|
||||
&mut timeline_layers,
|
||||
&mut layer_metadata,
|
||||
)
|
||||
.context("Failed to convert timeline layers' paths to relative ones")?;
|
||||
|
||||
Ok(Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
timeline_layers,
|
||||
missing_layers,
|
||||
layer_metadata,
|
||||
timeline_layers: to_relative_paths(timeline_path, remote_timeline.timeline_layers)
|
||||
.context("Failed to convert timeline layers' paths to relative ones")?,
|
||||
missing_layers: to_relative_paths(timeline_path, remote_timeline.missing_layers)
|
||||
.context("Failed to convert missing layers' paths to relative ones")?,
|
||||
disk_consistent_lsn: remote_timeline.metadata.disk_consistent_lsn(),
|
||||
metadata_bytes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialized form of [`LayerFileMetadata`].
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct IndexLayerMetadata {
|
||||
file_size: Option<u64>,
|
||||
}
|
||||
|
||||
impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
|
||||
fn from(other: &'_ LayerFileMetadata) -> Self {
|
||||
IndexLayerMetadata {
|
||||
file_size: other.file_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn separate_paths_and_metadata(
|
||||
fn to_local_paths(
|
||||
timeline_path: &Path,
|
||||
input: &HashMap<PathBuf, LayerFileMetadata>,
|
||||
output: &mut HashSet<RelativePath>,
|
||||
layer_metadata: &mut HashMap<RelativePath, IndexLayerMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
for (path, metadata) in input {
|
||||
let rel_path = RelativePath::new(timeline_path, path)?;
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
paths: impl IntoIterator<Item = RelativePath>,
|
||||
) -> HashSet<PathBuf> {
|
||||
paths
|
||||
.into_iter()
|
||||
.map(|path| path.as_path(timeline_path))
|
||||
.collect()
|
||||
}
|
||||
|
||||
layer_metadata.insert(rel_path.clone(), metadata);
|
||||
output.insert(rel_path);
|
||||
}
|
||||
Ok(())
|
||||
fn to_relative_paths(
|
||||
timeline_path: &Path,
|
||||
paths: impl IntoIterator<Item = PathBuf>,
|
||||
) -> anyhow::Result<HashSet<RelativePath>> {
|
||||
paths
|
||||
.into_iter()
|
||||
.map(|path| RelativePath::new(timeline_path, path))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -491,13 +357,13 @@ mod tests {
|
||||
DEFAULT_PG_VERSION,
|
||||
);
|
||||
let remote_timeline = RemoteTimeline {
|
||||
timeline_layers: HashMap::from([
|
||||
(timeline_path.join("layer_1"), LayerFileMetadata::new(1)),
|
||||
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
|
||||
timeline_layers: HashSet::from([
|
||||
timeline_path.join("layer_1"),
|
||||
timeline_path.join("layer_2"),
|
||||
]),
|
||||
missing_layers: HashMap::from([
|
||||
(timeline_path.join("missing_1"), LayerFileMetadata::new(3)),
|
||||
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
|
||||
missing_layers: HashSet::from([
|
||||
timeline_path.join("missing_1"),
|
||||
timeline_path.join("missing_2"),
|
||||
]),
|
||||
metadata: metadata.clone(),
|
||||
awaits_download: false,
|
||||
@@ -619,13 +485,13 @@ mod tests {
|
||||
let conversion_result = IndexPart::from_remote_timeline(
|
||||
&timeline_path,
|
||||
RemoteTimeline {
|
||||
timeline_layers: HashMap::from([
|
||||
(PathBuf::from("bad_path"), LayerFileMetadata::new(1)),
|
||||
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
|
||||
timeline_layers: HashSet::from([
|
||||
PathBuf::from("bad_path"),
|
||||
timeline_path.join("layer_2"),
|
||||
]),
|
||||
missing_layers: HashMap::from([
|
||||
(timeline_path.join("missing_1"), LayerFileMetadata::new(3)),
|
||||
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
|
||||
missing_layers: HashSet::from([
|
||||
timeline_path.join("missing_1"),
|
||||
timeline_path.join("missing_2"),
|
||||
]),
|
||||
metadata: metadata.clone(),
|
||||
awaits_download: false,
|
||||
@@ -636,13 +502,13 @@ mod tests {
|
||||
let conversion_result = IndexPart::from_remote_timeline(
|
||||
&timeline_path,
|
||||
RemoteTimeline {
|
||||
timeline_layers: HashMap::from([
|
||||
(timeline_path.join("layer_1"), LayerFileMetadata::new(1)),
|
||||
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
|
||||
timeline_layers: HashSet::from([
|
||||
timeline_path.join("layer_1"),
|
||||
timeline_path.join("layer_2"),
|
||||
]),
|
||||
missing_layers: HashMap::from([
|
||||
(PathBuf::from("bad_path"), LayerFileMetadata::new(3)),
|
||||
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
|
||||
missing_layers: HashSet::from([
|
||||
PathBuf::from("bad_path"),
|
||||
timeline_path.join("missing_2"),
|
||||
]),
|
||||
metadata,
|
||||
awaits_download: false,
|
||||
@@ -650,63 +516,4 @@ mod tests {
|
||||
);
|
||||
assert!(conversion_result.is_err(), "Should not be able to convert metadata with missing layer paths that are not in the timeline directory");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v0_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
|
||||
"missing_layers":["not_a_real_layer_but_adding_coverage"],
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 0,
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::default(),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
};
|
||||
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v1_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"version":1,
|
||||
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
|
||||
"missing_layers":["not_a_real_layer_but_adding_coverage"],
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::from([
|
||||
(RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned()), IndexLayerMetadata {
|
||||
file_size: Some(25600000),
|
||||
}),
|
||||
(RelativePath("not_a_real_layer_but_adding_coverage".to_owned()), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: Some(9007199254741001),
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
};
|
||||
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,25 +69,14 @@ pub(super) async fn upload_timeline_layers<'a>(
|
||||
.map(|meta| meta.disk_consistent_lsn());
|
||||
|
||||
let already_uploaded_layers = remote_timeline
|
||||
.map(|timeline| {
|
||||
timeline
|
||||
.stored_files()
|
||||
.keys()
|
||||
.cloned()
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
})
|
||||
.map(|timeline| timeline.stored_files())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
let layers_to_upload = upload
|
||||
.layers_to_upload
|
||||
.iter()
|
||||
.filter_map(|(k, v)| {
|
||||
if !already_uploaded_layers.contains(k) {
|
||||
Some((k.to_owned(), v.to_owned()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.difference(&already_uploaded_layers)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if layers_to_upload.is_empty() {
|
||||
@@ -109,7 +98,7 @@ pub(super) async fn upload_timeline_layers<'a>(
|
||||
|
||||
let mut upload_tasks = layers_to_upload
|
||||
.into_iter()
|
||||
.map(|(source_path, known_metadata)| async move {
|
||||
.map(|source_path| async move {
|
||||
let source_file = match fs::File::open(&source_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to upen a source file for layer '{}'",
|
||||
@@ -120,7 +109,7 @@ pub(super) async fn upload_timeline_layers<'a>(
|
||||
Err(e) => return Err(UploadError::MissingLocalFile(source_path, e)),
|
||||
};
|
||||
|
||||
let fs_size = source_file
|
||||
let source_size = source_file
|
||||
.metadata()
|
||||
.await
|
||||
.with_context(|| {
|
||||
@@ -130,24 +119,10 @@ pub(super) async fn upload_timeline_layers<'a>(
|
||||
)
|
||||
})
|
||||
.map_err(UploadError::Other)?
|
||||
.len();
|
||||
|
||||
// FIXME: this looks bad
|
||||
if let Some(metadata_size) = known_metadata.file_size() {
|
||||
if metadata_size != fs_size {
|
||||
return Err(UploadError::Other(anyhow::anyhow!(
|
||||
"File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
// this is a silly state we would like to avoid
|
||||
}
|
||||
|
||||
let fs_size = usize::try_from(fs_size).with_context(|| format!("File {source_path:?} size {fs_size} could not be converted to usize"))
|
||||
.map_err(UploadError::Other)?;
|
||||
.len() as usize;
|
||||
|
||||
match storage
|
||||
.upload_storage_object(Box::new(source_file), fs_size, &source_path)
|
||||
.upload_storage_object(Box::new(source_file), source_size, &source_path)
|
||||
.await
|
||||
.with_context(|| format!("Failed to upload layer file for {sync_id}"))
|
||||
{
|
||||
@@ -161,11 +136,8 @@ pub(super) async fn upload_timeline_layers<'a>(
|
||||
while let Some(upload_result) = upload_tasks.next().await {
|
||||
match upload_result {
|
||||
Ok(uploaded_path) => {
|
||||
let metadata = upload
|
||||
.layers_to_upload
|
||||
.remove(&uploaded_path)
|
||||
.expect("metadata should always exist, assuming no double uploads");
|
||||
upload.uploaded_layers.insert(uploaded_path, metadata);
|
||||
upload.layers_to_upload.remove(&uploaded_path);
|
||||
upload.uploaded_layers.insert(uploaded_path);
|
||||
}
|
||||
Err(e) => match e {
|
||||
UploadError::Other(e) => {
|
||||
@@ -290,7 +262,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
upload
|
||||
.uploaded_layers
|
||||
.keys()
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<BTreeSet<_>>(),
|
||||
layer_files
|
||||
@@ -385,7 +357,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
upload
|
||||
.uploaded_layers
|
||||
.keys()
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<BTreeSet<_>>(),
|
||||
layer_files
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -62,8 +62,6 @@ pub struct LayerMap {
|
||||
|
||||
struct LayerRTreeObject {
|
||||
layer: Arc<dyn Layer>,
|
||||
|
||||
envelope: AABB<[IntKey; 2]>,
|
||||
}
|
||||
|
||||
// Representation of Key as numeric type.
|
||||
@@ -199,16 +197,9 @@ impl PartialEq for LayerRTreeObject {
|
||||
impl RTreeObject for LayerRTreeObject {
|
||||
type Envelope = AABB<[IntKey; 2]>;
|
||||
fn envelope(&self) -> Self::Envelope {
|
||||
self.envelope
|
||||
}
|
||||
}
|
||||
|
||||
impl LayerRTreeObject {
|
||||
fn new(layer: Arc<dyn Layer>) -> Self {
|
||||
let key_range = layer.get_key_range();
|
||||
let lsn_range = layer.get_lsn_range();
|
||||
|
||||
let envelope = AABB::from_corners(
|
||||
let key_range = self.layer.get_key_range();
|
||||
let lsn_range = self.layer.get_lsn_range();
|
||||
AABB::from_corners(
|
||||
[
|
||||
IntKey::from(key_range.start.to_i128()),
|
||||
IntKey::from(lsn_range.start.0 as i128),
|
||||
@@ -217,8 +208,7 @@ impl LayerRTreeObject {
|
||||
IntKey::from(key_range.end.to_i128() - 1),
|
||||
IntKey::from(lsn_range.end.0 as i128 - 1),
|
||||
], // AABB::upper is inclusive, while `key_range.end` and `lsn_range.end` are exclusive
|
||||
);
|
||||
LayerRTreeObject { layer, envelope }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,7 +338,7 @@ impl LayerMap {
|
||||
if layer.get_key_range() == (Key::MIN..Key::MAX) {
|
||||
self.l0_delta_layers.push(layer.clone());
|
||||
}
|
||||
self.historic_layers.insert(LayerRTreeObject::new(layer));
|
||||
self.historic_layers.insert(LayerRTreeObject { layer });
|
||||
NUM_ONDISK_LAYERS.inc();
|
||||
}
|
||||
|
||||
@@ -372,7 +362,7 @@ impl LayerMap {
|
||||
}
|
||||
assert!(self
|
||||
.historic_layers
|
||||
.remove(&LayerRTreeObject::new(layer))
|
||||
.remove(&LayerRTreeObject { layer })
|
||||
.is_some());
|
||||
NUM_ONDISK_LAYERS.dec();
|
||||
}
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
//!
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use tokio::sync::watch;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tracing::*;
|
||||
|
||||
@@ -36,9 +34,8 @@ use crate::keyspace::{KeyPartitioning, KeySpace};
|
||||
use crate::metrics::TimelineMetrics;
|
||||
use crate::pgdatadir_mapping::BlockNumber;
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
|
||||
use crate::reltag::RelTag;
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use pageserver_api::reltag::RelTag;
|
||||
|
||||
use postgres_ffi::to_pg_timestamp;
|
||||
use utils::{
|
||||
@@ -55,11 +52,7 @@ use crate::task_mgr::TaskKind;
|
||||
use crate::walreceiver::{is_etcd_client_initialized, spawn_connection_manager_task};
|
||||
use crate::walredo::WalRedoManager;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::ZERO_PAGE;
|
||||
use crate::{
|
||||
page_cache,
|
||||
storage_sync::{self, index::LayerFileMetadata},
|
||||
};
|
||||
use crate::{page_cache, storage_sync};
|
||||
|
||||
pub struct Timeline {
|
||||
conf: &'static PageServerConf,
|
||||
@@ -162,8 +155,6 @@ pub struct Timeline {
|
||||
|
||||
/// Relation size cache
|
||||
pub rel_size_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
|
||||
|
||||
state: watch::Sender<TimelineState>,
|
||||
}
|
||||
|
||||
/// Internal structure to hold all data needed for logical size calculation.
|
||||
@@ -311,6 +302,10 @@ pub struct GcInfo {
|
||||
|
||||
/// Public interface functions
|
||||
impl Timeline {
|
||||
//------------------------------------------------------------------------------
|
||||
// Public GET functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/// Get the LSN where this branch was created
|
||||
pub fn get_ancestor_lsn(&self) -> Lsn {
|
||||
self.ancestor_lsn
|
||||
@@ -420,11 +415,9 @@ impl Timeline {
|
||||
/// those functions with an LSN that has been processed yet is an error.
|
||||
///
|
||||
pub async fn wait_lsn(&self, lsn: Lsn) -> anyhow::Result<()> {
|
||||
anyhow::ensure!(self.is_active(), "Cannot wait for Lsn on inactive timeline");
|
||||
|
||||
// This should never be called from the WAL receiver, because that could lead
|
||||
// to a deadlock.
|
||||
anyhow::ensure!(
|
||||
ensure!(
|
||||
task_mgr::current_task_kind() != Some(TaskKind::WalReceiverConnection),
|
||||
"wait_lsn cannot be called in WAL receiver"
|
||||
);
|
||||
@@ -447,7 +440,7 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> Result<()> {
|
||||
ensure!(
|
||||
lsn >= **latest_gc_cutoff_lsn,
|
||||
"LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)",
|
||||
@@ -457,6 +450,12 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Public PUT functions, to update the repository with new page versions.
|
||||
//
|
||||
// These are called by the WAL receiver to digest WAL records.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
///
|
||||
/// NOTE: This has nothing to do with checkpoint in PostgreSQL. We don't
|
||||
@@ -475,91 +474,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compact(&self) -> anyhow::Result<()> {
|
||||
let last_record_lsn = self.get_last_record_lsn();
|
||||
|
||||
// Last record Lsn could be zero in case the timelie was just created
|
||||
if !last_record_lsn.is_valid() {
|
||||
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
//
|
||||
// High level strategy for compaction / image creation:
|
||||
//
|
||||
// 1. First, calculate the desired "partitioning" of the
|
||||
// currently in-use key space. The goal is to partition the
|
||||
// key space into roughly fixed-size chunks, but also take into
|
||||
// account any existing image layers, and try to align the
|
||||
// chunk boundaries with the existing image layers to avoid
|
||||
// too much churn. Also try to align chunk boundaries with
|
||||
// relation boundaries. In principle, we don't know about
|
||||
// relation boundaries here, we just deal with key-value
|
||||
// pairs, and the code in pgdatadir_mapping.rs knows how to
|
||||
// map relations into key-value pairs. But in practice we know
|
||||
// that 'field6' is the block number, and the fields 1-5
|
||||
// identify a relation. This is just an optimization,
|
||||
// though.
|
||||
//
|
||||
// 2. Once we know the partitioning, for each partition,
|
||||
// decide if it's time to create a new image layer. The
|
||||
// criteria is: there has been too much "churn" since the last
|
||||
// image layer? The "churn" is fuzzy concept, it's a
|
||||
// combination of too many delta files, or too much WAL in
|
||||
// total in the delta file. Or perhaps: if creating an image
|
||||
// file would allow to delete some older files.
|
||||
//
|
||||
// 3. After that, we compact all level0 delta files if there
|
||||
// are too many of them. While compacting, we also garbage
|
||||
// collect any page versions that are no longer needed because
|
||||
// of the new image layers we created in step 2.
|
||||
//
|
||||
// TODO: This high level strategy hasn't been implemented yet.
|
||||
// Below are functions compact_level0() and create_image_layers()
|
||||
// but they are a bit ad hoc and don't quite work like it's explained
|
||||
// above. Rewrite it.
|
||||
let _layer_removal_cs = self.layer_removal_cs.lock().unwrap();
|
||||
|
||||
let target_file_size = self.get_checkpoint_distance();
|
||||
|
||||
// Define partitioning schema if needed
|
||||
|
||||
match self.repartition(
|
||||
self.get_last_record_lsn(),
|
||||
self.get_compaction_target_size(),
|
||||
) {
|
||||
Ok((partitioning, lsn)) => {
|
||||
// 2. Create new image layers for partitions that have been modified
|
||||
// "enough".
|
||||
let layer_paths_to_upload = self.create_image_layers(&partitioning, lsn, false)?;
|
||||
if !layer_paths_to_upload.is_empty()
|
||||
&& self.upload_layers.load(atomic::Ordering::Relaxed)
|
||||
{
|
||||
storage_sync::schedule_layer_upload(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
layer_paths_to_upload,
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Compact
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
self.compact_level0(target_file_size)?;
|
||||
timer.stop_and_record();
|
||||
}
|
||||
Err(err) => {
|
||||
// no partitioning? This is normal, if the timeline was just created
|
||||
// as an empty timeline. Also in unit tests, when we use the timeline
|
||||
// as a simple key-value store, ignoring the datadir layout. Log the
|
||||
// error but continue.
|
||||
error!("could not compact, repartitioning keyspace failed: {err:?}");
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mutate the timeline with a [`TimelineWriter`].
|
||||
pub fn writer(&self) -> TimelineWriter<'_> {
|
||||
TimelineWriter {
|
||||
@@ -567,109 +481,6 @@ impl Timeline {
|
||||
_write_guard: self.write_lock.lock().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve current logical size of the timeline.
|
||||
///
|
||||
/// The size could be lagging behind the actual number, in case
|
||||
/// the initial size calculation has not been run (gets triggered on the first size access).
|
||||
pub fn get_current_logical_size(self: &Arc<Self>) -> anyhow::Result<u64> {
|
||||
let current_size = self.current_logical_size.current_size()?;
|
||||
debug!("Current size: {current_size:?}");
|
||||
|
||||
let size = current_size.size();
|
||||
if let (CurrentLogicalSize::Approximate(_), Some(init_lsn)) =
|
||||
(current_size, self.current_logical_size.initial_part_end)
|
||||
{
|
||||
self.try_spawn_size_init_task(init_lsn);
|
||||
}
|
||||
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
/// Check if more than 'checkpoint_distance' of WAL has been accumulated in
|
||||
/// the in-memory layer, and initiate flushing it if so.
|
||||
///
|
||||
/// Also flush after a period of time without new data -- it helps
|
||||
/// safekeepers to regard pageserver as caught up and suspend activity.
|
||||
pub fn check_checkpoint_distance(self: &Arc<Timeline>) -> anyhow::Result<()> {
|
||||
let last_lsn = self.get_last_record_lsn();
|
||||
let layers = self.layers.read().unwrap();
|
||||
if let Some(open_layer) = &layers.open_layer {
|
||||
let open_layer_size = open_layer.size()?;
|
||||
drop(layers);
|
||||
let last_freeze_at = self.last_freeze_at.load();
|
||||
let last_freeze_ts = *(self.last_freeze_ts.read().unwrap());
|
||||
let distance = last_lsn.widening_sub(last_freeze_at);
|
||||
// Checkpointing the open layer can be triggered by layer size or LSN range.
|
||||
// S3 has a 5 GB limit on the size of one upload (without multi-part upload), and
|
||||
// we want to stay below that with a big margin. The LSN distance determines how
|
||||
// much WAL the safekeepers need to store.
|
||||
if distance >= self.get_checkpoint_distance().into()
|
||||
|| open_layer_size > self.get_checkpoint_distance()
|
||||
|| (distance > 0 && last_freeze_ts.elapsed() >= self.get_checkpoint_timeout())
|
||||
{
|
||||
info!(
|
||||
"check_checkpoint_distance {}, layer size {}, elapsed since last flush {:?}",
|
||||
distance,
|
||||
open_layer_size,
|
||||
last_freeze_ts.elapsed()
|
||||
);
|
||||
|
||||
self.freeze_inmem_layer(true);
|
||||
self.last_freeze_at.store(last_lsn);
|
||||
*(self.last_freeze_ts.write().unwrap()) = Instant::now();
|
||||
|
||||
// Launch a task to flush the frozen layer to disk, unless
|
||||
// a task was already running. (If the task was running
|
||||
// at the time that we froze the layer, it must've seen the
|
||||
// the layer we just froze before it exited; see comments
|
||||
// in flush_frozen_layers())
|
||||
if let Ok(guard) = self.layer_flush_lock.try_lock() {
|
||||
drop(guard);
|
||||
let self_clone = Arc::clone(self);
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::LayerFlushTask,
|
||||
Some(self.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
async move { self_clone.flush_frozen_layers(false) },
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_state(&self, new_state: TimelineState) {
|
||||
match (self.current_state(), new_state) {
|
||||
(equal_state_1, equal_state_2) if equal_state_1 == equal_state_2 => {
|
||||
debug!("Ignoring new state, equal to the existing one: {equal_state_2:?}");
|
||||
}
|
||||
(TimelineState::Broken, _) => {
|
||||
error!("Ignoring state update {new_state:?} for broken tenant");
|
||||
}
|
||||
(TimelineState::Paused, TimelineState::Active) => {
|
||||
debug!("Not activating a paused timeline");
|
||||
}
|
||||
(_, new_state) => {
|
||||
self.state.send_replace(new_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn current_state(&self) -> TimelineState {
|
||||
*self.state.borrow()
|
||||
}
|
||||
|
||||
pub fn is_active(&self) -> bool {
|
||||
self.current_state() == TimelineState::Active
|
||||
}
|
||||
|
||||
pub fn subscribe_for_state_updates(&self) -> watch::Receiver<TimelineState> {
|
||||
self.state.subscribe()
|
||||
}
|
||||
}
|
||||
|
||||
// Private functions
|
||||
@@ -713,7 +524,7 @@ impl Timeline {
|
||||
///
|
||||
/// Loads the metadata for the timeline into memory, but not the layer map.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) fn new(
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
metadata: TimelineMetadata,
|
||||
@@ -723,9 +534,8 @@ impl Timeline {
|
||||
walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
upload_layers: bool,
|
||||
pg_version: u32,
|
||||
) -> Self {
|
||||
) -> Timeline {
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn();
|
||||
let (state, _) = watch::channel(TimelineState::Suspended);
|
||||
|
||||
let mut result = Timeline {
|
||||
conf,
|
||||
@@ -782,17 +592,16 @@ impl Timeline {
|
||||
|
||||
last_received_wal: Mutex::new(None),
|
||||
rel_size_cache: RwLock::new(HashMap::new()),
|
||||
state,
|
||||
};
|
||||
result.repartition_threshold = result.get_checkpoint_distance() / 10;
|
||||
result
|
||||
}
|
||||
|
||||
pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
|
||||
pub fn launch_wal_receiver(self: &Arc<Self>) -> anyhow::Result<()> {
|
||||
if !is_etcd_client_initialized() {
|
||||
if cfg!(test) {
|
||||
info!("not launching WAL receiver because etcd client hasn't been initialized");
|
||||
return;
|
||||
return Ok(());
|
||||
} else {
|
||||
panic!("etcd client not initialized");
|
||||
}
|
||||
@@ -820,14 +629,16 @@ impl Timeline {
|
||||
walreceiver_connect_timeout,
|
||||
lagging_wal_timeout,
|
||||
max_lsn_wal_lag,
|
||||
);
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Scan the timeline directory to populate the layer map.
|
||||
/// Returns all timeline-related files that were found and loaded.
|
||||
///
|
||||
pub(super) fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
|
||||
pub fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
|
||||
let mut layers = self.layers.write().unwrap();
|
||||
let mut num_layers = 0;
|
||||
|
||||
@@ -913,13 +724,33 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn layer_removal_guard(&self) -> anyhow::Result<MutexGuard<()>> {
|
||||
pub fn layer_removal_guard(&self) -> anyhow::Result<MutexGuard<()>> {
|
||||
self.layer_removal_cs
|
||||
.try_lock()
|
||||
.map_err(|e| anyhow!("cannot lock compaction critical section {e}"))
|
||||
}
|
||||
|
||||
/// Retrieve current logical size of the timeline.
|
||||
///
|
||||
/// The size could be lagging behind the actual number, in case
|
||||
/// the initial size calculation has not been run (gets triggered on the first size access).
|
||||
pub fn get_current_logical_size(self: &Arc<Self>) -> anyhow::Result<u64> {
|
||||
let current_size = self.current_logical_size.current_size()?;
|
||||
debug!("Current size: {current_size:?}");
|
||||
|
||||
let size = current_size.size();
|
||||
if let (CurrentLogicalSize::Approximate(_), Some(init_lsn)) =
|
||||
(current_size, self.current_logical_size.initial_part_end)
|
||||
{
|
||||
self.try_spawn_size_init_task(init_lsn);
|
||||
}
|
||||
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
fn try_spawn_size_init_task(self: &Arc<Self>, init_lsn: Lsn) {
|
||||
let timeline_id = self.timeline_id;
|
||||
|
||||
// Atomically check if the timeline size calculation had already started.
|
||||
// If the flag was not already set, this sets it.
|
||||
if !self
|
||||
@@ -936,42 +767,17 @@ impl Timeline {
|
||||
"initial size calculation",
|
||||
false,
|
||||
async move {
|
||||
let mut timeline_state_updates = self_clone.subscribe_for_state_updates();
|
||||
let self_calculation = Arc::clone(&self_clone);
|
||||
tokio::select! {
|
||||
calculation_result = spawn_blocking(move || self_calculation.calculate_logical_size(init_lsn)) => {
|
||||
let calculated_size = calculation_result
|
||||
.context("Failed to spawn calculation result task")?
|
||||
.context("Failed to calculate logical size")?;
|
||||
match self_clone.current_logical_size.initial_logical_size.set(calculated_size) {
|
||||
Ok(()) => info!("Successfully calculated initial logical size"),
|
||||
Err(existing_size) => error!("Tried to update initial timeline size value to {calculated_size}, but the size was already set to {existing_size}, not changing"),
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
new_event = async {
|
||||
loop {
|
||||
match timeline_state_updates.changed().await {
|
||||
Ok(()) => {
|
||||
let new_state = *timeline_state_updates.borrow();
|
||||
match new_state {
|
||||
// we're running this job for active timelines only
|
||||
TimelineState::Active => continue,
|
||||
TimelineState::Broken | TimelineState::Paused | TimelineState::Suspended => return Some(new_state),
|
||||
}
|
||||
}
|
||||
Err(_sender_dropped_error) => return None,
|
||||
}
|
||||
}
|
||||
} => {
|
||||
match new_event {
|
||||
Some(new_state) => info!("Timeline became inactive (new state: {new_state:?}), dropping current connections until it reactivates"),
|
||||
None => info!("Timeline dropped state updates sender, stopping init size calculation"),
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
let calculated_size = self_clone.calculate_logical_size(init_lsn)?;
|
||||
let result = spawn_blocking(move || {
|
||||
self_clone.current_logical_size.initial_logical_size.set(calculated_size)
|
||||
}).await?;
|
||||
match result {
|
||||
Ok(()) => info!("Successfully calculated initial logical size"),
|
||||
Err(existing_size) => error!("Tried to update initial timeline size value to {calculated_size}, but the size was already set to {existing_size}, not changing"),
|
||||
}
|
||||
}.instrument(info_span!("initial_logical_size_calculation", tenant = %self.tenant_id, timeline = %self.timeline_id)),
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!("initial_logical_size_calculation", timeline = %timeline_id))
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1162,7 +968,7 @@ impl Timeline {
|
||||
Some((lsn, img))
|
||||
}
|
||||
|
||||
fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
|
||||
fn get_ancestor_timeline(&self) -> Result<Arc<Timeline>> {
|
||||
let ancestor = self.ancestor_timeline.as_ref().with_context(|| {
|
||||
format!(
|
||||
"Ancestor is missing. Timeline id: {} Ancestor id {:?}",
|
||||
@@ -1221,14 +1027,14 @@ impl Timeline {
|
||||
Ok(layer)
|
||||
}
|
||||
|
||||
fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> anyhow::Result<()> {
|
||||
fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
|
||||
//info!("PUT: key {} at {}", key, lsn);
|
||||
let layer = self.get_layer_for_write(lsn)?;
|
||||
layer.put_value(key, lsn, val)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn put_tombstone(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
|
||||
fn put_tombstone(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()> {
|
||||
let layer = self.get_layer_for_write(lsn)?;
|
||||
layer.put_tombstone(key_range, lsn)?;
|
||||
|
||||
@@ -1267,6 +1073,64 @@ impl Timeline {
|
||||
drop(layers);
|
||||
}
|
||||
|
||||
///
|
||||
/// Check if more than 'checkpoint_distance' of WAL has been accumulated in
|
||||
/// the in-memory layer, and initiate flushing it if so.
|
||||
///
|
||||
/// Also flush after a period of time without new data -- it helps
|
||||
/// safekeepers to regard pageserver as caught up and suspend activity.
|
||||
///
|
||||
pub fn check_checkpoint_distance(self: &Arc<Timeline>) -> Result<()> {
|
||||
let last_lsn = self.get_last_record_lsn();
|
||||
let layers = self.layers.read().unwrap();
|
||||
if let Some(open_layer) = &layers.open_layer {
|
||||
let open_layer_size = open_layer.size()?;
|
||||
drop(layers);
|
||||
let last_freeze_at = self.last_freeze_at.load();
|
||||
let last_freeze_ts = *(self.last_freeze_ts.read().unwrap());
|
||||
let distance = last_lsn.widening_sub(last_freeze_at);
|
||||
// Checkpointing the open layer can be triggered by layer size or LSN range.
|
||||
// S3 has a 5 GB limit on the size of one upload (without multi-part upload), and
|
||||
// we want to stay below that with a big margin. The LSN distance determines how
|
||||
// much WAL the safekeepers need to store.
|
||||
if distance >= self.get_checkpoint_distance().into()
|
||||
|| open_layer_size > self.get_checkpoint_distance()
|
||||
|| (distance > 0 && last_freeze_ts.elapsed() >= self.get_checkpoint_timeout())
|
||||
{
|
||||
info!(
|
||||
"check_checkpoint_distance {}, layer size {}, elapsed since last flush {:?}",
|
||||
distance,
|
||||
open_layer_size,
|
||||
last_freeze_ts.elapsed()
|
||||
);
|
||||
|
||||
self.freeze_inmem_layer(true);
|
||||
self.last_freeze_at.store(last_lsn);
|
||||
*(self.last_freeze_ts.write().unwrap()) = Instant::now();
|
||||
|
||||
// Launch a task to flush the frozen layer to disk, unless
|
||||
// a task was already running. (If the task was running
|
||||
// at the time that we froze the layer, it must've seen the
|
||||
// the layer we just froze before it exited; see comments
|
||||
// in flush_frozen_layers())
|
||||
if let Ok(guard) = self.layer_flush_lock.try_lock() {
|
||||
drop(guard);
|
||||
let self_clone = Arc::clone(self);
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
task_mgr::TaskKind::LayerFlushTask,
|
||||
Some(self.tenant_id),
|
||||
Some(self.timeline_id),
|
||||
"layer flush task",
|
||||
false,
|
||||
async move { self_clone.flush_frozen_layers(false) },
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Flush all frozen layers to disk.
|
||||
///
|
||||
/// Only one task at a time can be doing layer-flushing for a
|
||||
@@ -1274,7 +1138,7 @@ impl Timeline {
|
||||
/// currently doing the flushing, this function will wait for it
|
||||
/// to finish. If 'wait' is false, this function will return
|
||||
/// immediately instead.
|
||||
fn flush_frozen_layers(&self, wait: bool) -> anyhow::Result<()> {
|
||||
fn flush_frozen_layers(&self, wait: bool) -> Result<()> {
|
||||
let flush_lock_guard = if wait {
|
||||
self.layer_flush_lock.lock().unwrap()
|
||||
} else {
|
||||
@@ -1313,7 +1177,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Flush one frozen in-memory layer to disk, as a new delta layer.
|
||||
fn flush_frozen_layer(&self, frozen_layer: Arc<InMemoryLayer>) -> anyhow::Result<()> {
|
||||
fn flush_frozen_layer(&self, frozen_layer: Arc<InMemoryLayer>) -> Result<()> {
|
||||
// As a special case, when we have just imported an image into the repository,
|
||||
// instead of writing out a L0 delta layer, we directly write out image layer
|
||||
// files instead. This is possible as long as *all* the data imported into the
|
||||
@@ -1326,8 +1190,8 @@ impl Timeline {
|
||||
self.create_image_layers(&partitioning, self.initdb_lsn, true)?
|
||||
} else {
|
||||
// normal case, write out a L0 delta layer file.
|
||||
let (delta_path, metadata) = self.create_delta_layer(&frozen_layer)?;
|
||||
HashMap::from([(delta_path, metadata)])
|
||||
let delta_path = self.create_delta_layer(&frozen_layer)?;
|
||||
HashSet::from([delta_path])
|
||||
};
|
||||
|
||||
fail_point!("flush-frozen-before-sync");
|
||||
@@ -1353,86 +1217,85 @@ impl Timeline {
|
||||
// TODO: This perhaps should be done in 'flush_frozen_layers', after flushing
|
||||
// *all* the layers, to avoid fsyncing the file multiple times.
|
||||
let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
|
||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
self.update_disk_consistent_lsn(disk_consistent_lsn, layer_paths_to_upload)?;
|
||||
|
||||
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
||||
// After crash, we will restart WAL streaming and processing from that point.
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
self.update_metadata_file(disk_consistent_lsn, layer_paths_to_upload)?;
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update metadata file
|
||||
fn update_metadata_file(
|
||||
fn update_disk_consistent_lsn(
|
||||
&self,
|
||||
disk_consistent_lsn: Lsn,
|
||||
layer_paths_to_upload: HashMap<PathBuf, LayerFileMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
// We can only save a valid 'prev_record_lsn' value on disk if we
|
||||
// flushed *all* in-memory changes to disk. We only track
|
||||
// 'prev_record_lsn' in memory for the latest processed record, so we
|
||||
// don't remember what the correct value that corresponds to some old
|
||||
// LSN is. But if we flush everything, then the value corresponding
|
||||
// current 'last_record_lsn' is correct and we can store it on disk.
|
||||
let RecordLsn {
|
||||
last: last_record_lsn,
|
||||
prev: prev_record_lsn,
|
||||
} = self.last_record_lsn.load();
|
||||
let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
|
||||
Some(prev_record_lsn)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
layer_paths_to_upload: HashSet<PathBuf>,
|
||||
) -> Result<()> {
|
||||
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
||||
// After crash, we will restart WAL streaming and processing from that point.
|
||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
if disk_consistent_lsn != old_disk_consistent_lsn {
|
||||
assert!(disk_consistent_lsn > old_disk_consistent_lsn);
|
||||
|
||||
let ancestor_timeline_id = self
|
||||
.ancestor_timeline
|
||||
.as_ref()
|
||||
.map(|ancestor| ancestor.timeline_id);
|
||||
// We can only save a valid 'prev_record_lsn' value on disk if we
|
||||
// flushed *all* in-memory changes to disk. We only track
|
||||
// 'prev_record_lsn' in memory for the latest processed record, so we
|
||||
// don't remember what the correct value that corresponds to some old
|
||||
// LSN is. But if we flush everything, then the value corresponding
|
||||
// current 'last_record_lsn' is correct and we can store it on disk.
|
||||
let RecordLsn {
|
||||
last: last_record_lsn,
|
||||
prev: prev_record_lsn,
|
||||
} = self.last_record_lsn.load();
|
||||
let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn {
|
||||
Some(prev_record_lsn)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let metadata = TimelineMetadata::new(
|
||||
disk_consistent_lsn,
|
||||
ondisk_prev_record_lsn,
|
||||
ancestor_timeline_id,
|
||||
self.ancestor_lsn,
|
||||
*self.latest_gc_cutoff_lsn.read(),
|
||||
self.initdb_lsn,
|
||||
self.pg_version,
|
||||
);
|
||||
let ancestor_timeline_id = self
|
||||
.ancestor_timeline
|
||||
.as_ref()
|
||||
.map(|ancestor| ancestor.timeline_id);
|
||||
|
||||
fail_point!("checkpoint-before-saving-metadata", |x| bail!(
|
||||
"{}",
|
||||
x.unwrap()
|
||||
));
|
||||
|
||||
save_metadata(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
&metadata,
|
||||
false,
|
||||
)?;
|
||||
|
||||
if self.can_upload_layers() {
|
||||
storage_sync::schedule_layer_upload(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
layer_paths_to_upload,
|
||||
Some(metadata),
|
||||
let metadata = TimelineMetadata::new(
|
||||
disk_consistent_lsn,
|
||||
ondisk_prev_record_lsn,
|
||||
ancestor_timeline_id,
|
||||
self.ancestor_lsn,
|
||||
*self.latest_gc_cutoff_lsn.read(),
|
||||
self.initdb_lsn,
|
||||
self.pg_version,
|
||||
);
|
||||
|
||||
fail_point!("checkpoint-before-saving-metadata", |x| bail!(
|
||||
"{}",
|
||||
x.unwrap()
|
||||
));
|
||||
|
||||
save_metadata(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_id,
|
||||
&metadata,
|
||||
false,
|
||||
)?;
|
||||
|
||||
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
||||
storage_sync::schedule_layer_upload(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
layer_paths_to_upload,
|
||||
Some(metadata),
|
||||
);
|
||||
}
|
||||
|
||||
// Also update the in-memory copy
|
||||
self.disk_consistent_lsn.store(disk_consistent_lsn);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Write out the given frozen in-memory layer as a new L0 delta file
|
||||
fn create_delta_layer(
|
||||
&self,
|
||||
frozen_layer: &InMemoryLayer,
|
||||
) -> anyhow::Result<(PathBuf, LayerFileMetadata)> {
|
||||
fn create_delta_layer(&self, frozen_layer: &InMemoryLayer) -> Result<PathBuf> {
|
||||
// Write it out
|
||||
let new_delta = frozen_layer.write_to_disk()?;
|
||||
let new_delta_path = new_delta.path();
|
||||
@@ -1458,16 +1321,100 @@ impl Timeline {
|
||||
|
||||
// update the timeline's physical size
|
||||
let sz = new_delta_path.metadata()?.len();
|
||||
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
// update metrics
|
||||
self.metrics.num_persistent_files_created.inc_by(1);
|
||||
self.metrics.persistent_bytes_written.inc_by(sz);
|
||||
|
||||
Ok((new_delta_path, LayerFileMetadata::new(sz)))
|
||||
Ok(new_delta_path)
|
||||
}
|
||||
|
||||
fn repartition(&self, lsn: Lsn, partition_size: u64) -> anyhow::Result<(KeyPartitioning, Lsn)> {
|
||||
pub fn compact(&self) -> anyhow::Result<()> {
|
||||
let last_record_lsn = self.get_last_record_lsn();
|
||||
|
||||
// Last record Lsn could be zero in case the timelie was just created
|
||||
if !last_record_lsn.is_valid() {
|
||||
warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
//
|
||||
// High level strategy for compaction / image creation:
|
||||
//
|
||||
// 1. First, calculate the desired "partitioning" of the
|
||||
// currently in-use key space. The goal is to partition the
|
||||
// key space into roughly fixed-size chunks, but also take into
|
||||
// account any existing image layers, and try to align the
|
||||
// chunk boundaries with the existing image layers to avoid
|
||||
// too much churn. Also try to align chunk boundaries with
|
||||
// relation boundaries. In principle, we don't know about
|
||||
// relation boundaries here, we just deal with key-value
|
||||
// pairs, and the code in pgdatadir_mapping.rs knows how to
|
||||
// map relations into key-value pairs. But in practice we know
|
||||
// that 'field6' is the block number, and the fields 1-5
|
||||
// identify a relation. This is just an optimization,
|
||||
// though.
|
||||
//
|
||||
// 2. Once we know the partitioning, for each partition,
|
||||
// decide if it's time to create a new image layer. The
|
||||
// criteria is: there has been too much "churn" since the last
|
||||
// image layer? The "churn" is fuzzy concept, it's a
|
||||
// combination of too many delta files, or too much WAL in
|
||||
// total in the delta file. Or perhaps: if creating an image
|
||||
// file would allow to delete some older files.
|
||||
//
|
||||
// 3. After that, we compact all level0 delta files if there
|
||||
// are too many of them. While compacting, we also garbage
|
||||
// collect any page versions that are no longer needed because
|
||||
// of the new image layers we created in step 2.
|
||||
//
|
||||
// TODO: This high level strategy hasn't been implemented yet.
|
||||
// Below are functions compact_level0() and create_image_layers()
|
||||
// but they are a bit ad hoc and don't quite work like it's explained
|
||||
// above. Rewrite it.
|
||||
let _layer_removal_cs = self.layer_removal_cs.lock().unwrap();
|
||||
|
||||
let target_file_size = self.get_checkpoint_distance();
|
||||
|
||||
// Define partitioning schema if needed
|
||||
|
||||
match self.repartition(
|
||||
self.get_last_record_lsn(),
|
||||
self.get_compaction_target_size(),
|
||||
) {
|
||||
Ok((partitioning, lsn)) => {
|
||||
// 2. Create new image layers for partitions that have been modified
|
||||
// "enough".
|
||||
let layer_paths_to_upload = self.create_image_layers(&partitioning, lsn, false)?;
|
||||
if !layer_paths_to_upload.is_empty()
|
||||
&& self.upload_layers.load(atomic::Ordering::Relaxed)
|
||||
{
|
||||
storage_sync::schedule_layer_upload(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
HashSet::from_iter(layer_paths_to_upload),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Compact
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
self.compact_level0(target_file_size)?;
|
||||
timer.stop_and_record();
|
||||
}
|
||||
Err(err) => {
|
||||
// no partitioning? This is normal, if the timeline was just created
|
||||
// as an empty timeline. Also in unit tests, when we use the timeline
|
||||
// as a simple key-value store, ignoring the datadir layout. Log the
|
||||
// error but continue.
|
||||
error!("could not compact, repartitioning keyspace failed: {err:?}");
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn repartition(&self, lsn: Lsn, partition_size: u64) -> Result<(KeyPartitioning, Lsn)> {
|
||||
let mut partitioning_guard = self.partitioning.lock().unwrap();
|
||||
if partitioning_guard.1 == Lsn(0)
|
||||
|| lsn.0 - partitioning_guard.1 .0 > self.repartition_threshold
|
||||
@@ -1481,7 +1428,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// Is it time to create a new image layer for the given partition?
|
||||
fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> anyhow::Result<bool> {
|
||||
fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> Result<bool> {
|
||||
let layers = self.layers.read().unwrap();
|
||||
|
||||
for part_range in &partition.ranges {
|
||||
@@ -1526,9 +1473,10 @@ impl Timeline {
|
||||
partitioning: &KeyPartitioning,
|
||||
lsn: Lsn,
|
||||
force: bool,
|
||||
) -> anyhow::Result<HashMap<PathBuf, LayerFileMetadata>> {
|
||||
) -> Result<HashSet<PathBuf>> {
|
||||
let timer = self.metrics.create_images_time_histo.start_timer();
|
||||
let mut image_layers: Vec<ImageLayer> = Vec::new();
|
||||
let mut layer_paths_to_upload = HashSet::new();
|
||||
for partition in partitioning.parts.iter() {
|
||||
if force || self.time_for_new_image_layer(partition, lsn)? {
|
||||
let img_range =
|
||||
@@ -1544,37 +1492,13 @@ impl Timeline {
|
||||
for range in &partition.ranges {
|
||||
let mut key = range.start;
|
||||
while key < range.end {
|
||||
let img = match self.get(key, lsn) {
|
||||
Ok(img) => img,
|
||||
Err(err) => {
|
||||
// If we fail to reconstruct a VM or FSM page, we can zero the
|
||||
// page without losing any actual user data. That seems better
|
||||
// than failing repeatedly and getting stuck.
|
||||
//
|
||||
// We had a bug at one point, where we truncated the FSM and VM
|
||||
// in the pageserver, but the Postgres didn't know about that
|
||||
// and continued to generate incremental WAL records for pages
|
||||
// that didn't exist in the pageserver. Trying to replay those
|
||||
// WAL records failed to find the previous image of the page.
|
||||
// This special case allows us to recover from that situation.
|
||||
// See https://github.com/neondatabase/neon/issues/2601.
|
||||
//
|
||||
// Unfortunately we cannot do this for the main fork, or for
|
||||
// any metadata keys, keys, as that would lead to actual data
|
||||
// loss.
|
||||
if is_rel_fsm_block_key(key) || is_rel_vm_block_key(key) {
|
||||
warn!("could not reconstruct FSM or VM key {key}, filling with zeros: {err:?}");
|
||||
ZERO_PAGE.clone()
|
||||
} else {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
};
|
||||
let img = self.get(key, lsn)?;
|
||||
image_layer_writer.put_image(key, &img)?;
|
||||
key = key.next();
|
||||
}
|
||||
}
|
||||
let image_layer = image_layer_writer.finish()?;
|
||||
layer_paths_to_upload.insert(image_layer.path());
|
||||
image_layers.push(image_layer);
|
||||
}
|
||||
}
|
||||
@@ -1588,25 +1512,15 @@ impl Timeline {
|
||||
//
|
||||
// Compaction creates multiple image layers. It would be better to create them all
|
||||
// and fsync them all in parallel.
|
||||
let all_paths = image_layers
|
||||
.iter()
|
||||
.map(|layer| layer.path())
|
||||
.chain(std::iter::once(
|
||||
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
))
|
||||
.collect::<Vec<_>>();
|
||||
let mut all_paths = Vec::from_iter(layer_paths_to_upload.clone());
|
||||
all_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
|
||||
par_fsync::par_fsync(&all_paths)?;
|
||||
|
||||
let mut layer_paths_to_upload = HashMap::with_capacity(image_layers.len());
|
||||
|
||||
let mut layers = self.layers.write().unwrap();
|
||||
for l in image_layers {
|
||||
let path = l.path();
|
||||
let metadata = path.metadata()?;
|
||||
|
||||
layer_paths_to_upload.insert(path, LayerFileMetadata::new(metadata.len()));
|
||||
|
||||
self.metrics.current_physical_size_gauge.add(metadata.len());
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.add(l.path().metadata()?.len());
|
||||
layers.insert_historic(Arc::new(l));
|
||||
}
|
||||
drop(layers);
|
||||
@@ -1619,7 +1533,7 @@ impl Timeline {
|
||||
/// Collect a bunch of Level 0 layer files, and compact and reshuffle them as
|
||||
/// as Level 1 files.
|
||||
///
|
||||
fn compact_level0(&self, target_file_size: u64) -> anyhow::Result<()> {
|
||||
fn compact_level0(&self, target_file_size: u64) -> Result<()> {
|
||||
let layers = self.layers.read().unwrap();
|
||||
let mut level0_deltas = layers.get_level0_deltas()?;
|
||||
drop(layers);
|
||||
@@ -1857,16 +1771,16 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let mut layers = self.layers.write().unwrap();
|
||||
let mut new_layer_paths = HashMap::with_capacity(new_layers.len());
|
||||
let mut new_layer_paths = HashSet::with_capacity(new_layers.len());
|
||||
for l in new_layers {
|
||||
let new_delta_path = l.path();
|
||||
|
||||
let metadata = new_delta_path.metadata()?;
|
||||
|
||||
// update the timeline's physical size
|
||||
self.metrics.current_physical_size_gauge.add(metadata.len());
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.add(new_delta_path.metadata()?.len());
|
||||
|
||||
new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len()));
|
||||
new_layer_paths.insert(new_delta_path);
|
||||
layers.insert_historic(Arc::new(l));
|
||||
}
|
||||
|
||||
@@ -1886,7 +1800,7 @@ impl Timeline {
|
||||
}
|
||||
drop(layers);
|
||||
|
||||
if self.can_upload_layers() {
|
||||
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
||||
storage_sync::schedule_layer_upload(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
@@ -1929,12 +1843,12 @@ impl Timeline {
|
||||
///
|
||||
/// The 'pitr' duration is used to calculate a 'pitr_cutoff', which can be used to determine
|
||||
/// whether a record is needed for PITR.
|
||||
pub(super) fn update_gc_info(
|
||||
pub fn update_gc_info(
|
||||
&self,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
cutoff_horizon: Lsn,
|
||||
pitr: Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> Result<()> {
|
||||
let mut gc_info = self.gc_info.write().unwrap();
|
||||
|
||||
gc_info.horizon_cutoff = cutoff_horizon;
|
||||
@@ -1989,8 +1903,8 @@ impl Timeline {
|
||||
/// within a layer file. We can only remove the whole file if it's fully
|
||||
/// obsolete.
|
||||
///
|
||||
pub(super) fn gc(&self) -> anyhow::Result<GcResult> {
|
||||
let mut result: GcResult = GcResult::default();
|
||||
pub fn gc(&self) -> Result<GcResult> {
|
||||
let mut result: GcResult = Default::default();
|
||||
let now = SystemTime::now();
|
||||
|
||||
fail_point!("before-timeline-gc");
|
||||
@@ -2033,9 +1947,6 @@ impl Timeline {
|
||||
);
|
||||
write_guard.store_and_unlock(new_gc_cutoff).wait();
|
||||
}
|
||||
// Persist the new GC cutoff value in the metadata file, before
|
||||
// we actually remove anything.
|
||||
self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?;
|
||||
|
||||
info!("GC starting");
|
||||
|
||||
@@ -2161,16 +2072,7 @@ impl Timeline {
|
||||
result.layers_removed += 1;
|
||||
}
|
||||
|
||||
info!(
|
||||
"GC completed removing {} layers, cutoff {}",
|
||||
result.layers_removed, new_gc_cutoff
|
||||
);
|
||||
|
||||
if result.layers_removed != 0 {
|
||||
fail_point!("after-timeline-gc-removed-layers");
|
||||
}
|
||||
|
||||
if self.can_upload_layers() {
|
||||
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
||||
storage_sync::schedule_layer_delete(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
@@ -2259,11 +2161,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn can_upload_layers(&self) -> bool {
|
||||
self.upload_layers.load(atomic::Ordering::Relaxed)
|
||||
&& self.current_state() != TimelineState::Broken
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function for get_reconstruct_data() to add the path of layers traversed
|
||||
@@ -2314,11 +2211,11 @@ impl<'a> TimelineWriter<'a> {
|
||||
///
|
||||
/// This will implicitly extend the relation, if the page is beyond the
|
||||
/// current end-of-file.
|
||||
pub fn put(&self, key: Key, lsn: Lsn, value: &Value) -> anyhow::Result<()> {
|
||||
pub fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()> {
|
||||
self.tl.put_value(key, lsn, value)
|
||||
}
|
||||
|
||||
pub fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
|
||||
pub fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()> {
|
||||
self.tl.put_tombstone(key_range, lsn)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! This module acts as a switchboard to access different repositories managed by this
|
||||
//! page server.
|
||||
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::collections::{hash_map, HashMap, HashSet};
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -12,10 +12,10 @@ use tracing::*;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
|
||||
use crate::config::{PageServerConf, METADATA_FILE_NAME, TIMELINE_UNINIT_MARK_SUFFIX};
|
||||
use crate::config::{PageServerConf, METADATA_FILE_NAME};
|
||||
use crate::http::models::TenantInfo;
|
||||
use crate::storage_sync::index::{LayerFileMetadata, RemoteIndex, RemoteTimelineIndex};
|
||||
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData, TimelineLocalFiles};
|
||||
use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
|
||||
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::{
|
||||
ephemeral_file::is_ephemeral_file, metadata::TimelineMetadata, Tenant, TenantState,
|
||||
@@ -24,7 +24,7 @@ use crate::tenant_config::TenantConfOpt;
|
||||
use crate::walredo::PostgresRedoManager;
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
|
||||
use utils::crashsafe::{self, path_with_suffix_extension};
|
||||
use utils::crashsafe_dir::{self, path_with_suffix_extension};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
mod tenants_state {
|
||||
@@ -104,7 +104,7 @@ pub fn init_tenant_mgr(
|
||||
if let TenantAttachData::Ready(t) = new_timeline_values {
|
||||
for (timeline_id, old_value) in old_values {
|
||||
if let LocalTimelineInitStatus::LocallyComplete(metadata) = old_value {
|
||||
t.insert(timeline_id, TimelineLocalFiles::ready(metadata));
|
||||
t.insert(timeline_id, (metadata, HashSet::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -189,7 +189,7 @@ pub fn attach_local_tenants(
|
||||
let has_timelines = !timelines.is_empty();
|
||||
let timelines_to_attach = timelines
|
||||
.iter()
|
||||
.map(|(&k, v)| (k, v.metadata().to_owned()))
|
||||
.map(|(&k, (v, _))| (k, v.clone()))
|
||||
.collect();
|
||||
match tenant.init_attach_timelines(timelines_to_attach) {
|
||||
Ok(()) => {
|
||||
@@ -265,98 +265,58 @@ fn create_tenant_files(
|
||||
temporary_tenant_dir.display()
|
||||
);
|
||||
|
||||
let temporary_tenant_timelines_dir = rebase_directory(
|
||||
&conf.timelines_path(&tenant_id),
|
||||
&target_tenant_directory,
|
||||
&temporary_tenant_dir,
|
||||
)?;
|
||||
let temporary_tenant_config_path = rebase_directory(
|
||||
&conf.tenant_config_path(tenant_id),
|
||||
&target_tenant_directory,
|
||||
&temporary_tenant_dir,
|
||||
)?;
|
||||
|
||||
// top-level dir may exist if we are creating it through CLI
|
||||
crashsafe::create_dir_all(&temporary_tenant_dir).with_context(|| {
|
||||
crashsafe_dir::create_dir_all(&temporary_tenant_dir).with_context(|| {
|
||||
format!(
|
||||
"could not create temporary tenant directory {}",
|
||||
temporary_tenant_dir.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let creation_result = try_create_target_tenant_dir(
|
||||
conf,
|
||||
tenant_conf,
|
||||
tenant_id,
|
||||
&temporary_tenant_dir,
|
||||
&target_tenant_directory,
|
||||
);
|
||||
|
||||
if creation_result.is_err() {
|
||||
error!("Failed to create directory structure for tenant {tenant_id}, cleaning tmp data");
|
||||
if let Err(e) = fs::remove_dir_all(&temporary_tenant_dir) {
|
||||
error!("Failed to remove temporary tenant directory {temporary_tenant_dir:?}: {e}")
|
||||
} else if let Err(e) = crashsafe::fsync(&temporary_tenant_dir) {
|
||||
error!(
|
||||
"Failed to fsync removed temporary tenant directory {temporary_tenant_dir:?}: {e}"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
creation_result
|
||||
}
|
||||
|
||||
fn try_create_target_tenant_dir(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
temporary_tenant_dir: &Path,
|
||||
target_tenant_directory: &Path,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let temporary_tenant_timelines_dir = rebase_directory(
|
||||
&conf.timelines_path(&tenant_id),
|
||||
target_tenant_directory,
|
||||
temporary_tenant_dir,
|
||||
)
|
||||
.with_context(|| format!("Failed to resolve tenant {tenant_id} temporary timelines dir"))?;
|
||||
let temporary_tenant_config_path = rebase_directory(
|
||||
&conf.tenant_config_path(tenant_id),
|
||||
target_tenant_directory,
|
||||
temporary_tenant_dir,
|
||||
)
|
||||
.with_context(|| format!("Failed to resolve tenant {tenant_id} temporary config path"))?;
|
||||
|
||||
Tenant::persist_tenant_config(&temporary_tenant_config_path, tenant_conf, true).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"Failed to write tenant {} config to {}",
|
||||
tenant_id,
|
||||
temporary_tenant_config_path.display()
|
||||
)
|
||||
},
|
||||
)?;
|
||||
crashsafe::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
|
||||
// first, create a config in the top-level temp directory, fsync the file
|
||||
Tenant::persist_tenant_config(&temporary_tenant_config_path, tenant_conf, true)?;
|
||||
// then, create a subdirectory in the top-level temp directory, fsynced
|
||||
crashsafe_dir::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
|
||||
format!(
|
||||
"could not create tenant {} temporary timelines directory {}",
|
||||
tenant_id,
|
||||
"could not create temporary tenant timelines directory {}",
|
||||
temporary_tenant_timelines_dir.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
fail::fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
anyhow::bail!("failpoint tenant-creation-before-tmp-rename");
|
||||
});
|
||||
|
||||
fs::rename(&temporary_tenant_dir, target_tenant_directory).with_context(|| {
|
||||
// move-rename tmp directory with all files synced into a permanent directory, fsync its parent
|
||||
fs::rename(&temporary_tenant_dir, &target_tenant_directory).with_context(|| {
|
||||
format!(
|
||||
"failed to move tenant {} temporary directory {} into the permanent one {}",
|
||||
tenant_id,
|
||||
"failed to move temporary tenant directory {} into the permanent one {}",
|
||||
temporary_tenant_dir.display(),
|
||||
target_tenant_directory.display()
|
||||
)
|
||||
})?;
|
||||
let target_dir_parent = target_tenant_directory.parent().with_context(|| {
|
||||
format!(
|
||||
"Failed to get tenant {} dir parent for {}",
|
||||
tenant_id,
|
||||
"Failed to get tenant dir parent for {}",
|
||||
target_tenant_directory.display()
|
||||
)
|
||||
})?;
|
||||
crashsafe::fsync(target_dir_parent).with_context(|| {
|
||||
format!(
|
||||
"Failed to fsync renamed directory's parent {} for tenant {}",
|
||||
target_dir_parent.display(),
|
||||
tenant_id,
|
||||
)
|
||||
})?;
|
||||
fs::File::open(target_dir_parent)?.sync_all()?;
|
||||
|
||||
info!(
|
||||
"created tenant directory structure in {}",
|
||||
target_tenant_directory.display()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -523,7 +483,7 @@ pub fn list_tenant_info(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TenantAttachData {
|
||||
Ready(HashMap<TimelineId, TimelineLocalFiles>),
|
||||
Ready(HashMap<TimelineId, (TimelineMetadata, HashSet<PathBuf>)>),
|
||||
Broken(anyhow::Error),
|
||||
}
|
||||
/// Attempts to collect information about all tenant and timelines, existing on the local FS.
|
||||
@@ -642,15 +602,7 @@ fn is_temporary(path: &Path) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_uninit_mark(path: &Path) -> bool {
|
||||
match path.file_name() {
|
||||
Some(name) => name
|
||||
.to_string_lossy()
|
||||
.ends_with(TIMELINE_UNINIT_MARK_SUFFIX),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn collect_timelines_for_tenant(
|
||||
config: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
@@ -693,74 +645,25 @@ fn collect_timelines_for_tenant(
|
||||
e
|
||||
);
|
||||
}
|
||||
} else if is_uninit_mark(&timeline_dir) {
|
||||
let timeline_uninit_mark_file = &timeline_dir;
|
||||
info!(
|
||||
"Found an uninit mark file {}, removing the timeline and its uninit mark",
|
||||
timeline_uninit_mark_file.display()
|
||||
);
|
||||
let timeline_id = timeline_uninit_mark_file
|
||||
.file_stem()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default()
|
||||
.parse::<TimelineId>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Could not parse timeline id out of the timeline uninit mark name {}",
|
||||
timeline_uninit_mark_file.display()
|
||||
)
|
||||
})?;
|
||||
let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);
|
||||
if let Err(e) =
|
||||
remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
|
||||
{
|
||||
error!("Failed to clean up uninit marked timeline: {e:?}");
|
||||
}
|
||||
} else {
|
||||
let timeline_id = timeline_dir
|
||||
.file_name()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default()
|
||||
.parse::<TimelineId>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Could not parse timeline id out of the timeline dir name {}",
|
||||
timeline_dir.display()
|
||||
)
|
||||
})?;
|
||||
let timeline_uninit_mark_file =
|
||||
config.timeline_uninit_mark_file_path(tenant_id, timeline_id);
|
||||
if timeline_uninit_mark_file.exists() {
|
||||
info!("Found an uninit mark file for timeline {tenant_id}/{timeline_id}, removing the timeline and its uninit mark");
|
||||
if let Err(e) = remove_timeline_and_uninit_mark(
|
||||
&timeline_dir,
|
||||
&timeline_uninit_mark_file,
|
||||
) {
|
||||
error!("Failed to clean up uninit marked timeline: {e:?}");
|
||||
match collect_timeline_files(&timeline_dir) {
|
||||
Ok((timeline_id, metadata, timeline_files)) => {
|
||||
tenant_timelines.insert(timeline_id, (metadata, timeline_files));
|
||||
}
|
||||
} else {
|
||||
match collect_timeline_files(&timeline_dir) {
|
||||
Ok((metadata, timeline_files)) => {
|
||||
tenant_timelines.insert(
|
||||
timeline_id,
|
||||
TimelineLocalFiles::collected(metadata, timeline_files),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to process timeline dir contents at '{}', reason: {:?}",
|
||||
timeline_dir.display(),
|
||||
e
|
||||
);
|
||||
match remove_if_empty(&timeline_dir) {
|
||||
Ok(true) => info!(
|
||||
"Removed empty timeline directory {}",
|
||||
timeline_dir.display()
|
||||
),
|
||||
Ok(false) => (),
|
||||
Err(e) => {
|
||||
error!("Failed to remove empty timeline directory: {e:?}")
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to process timeline dir contents at '{}', reason: {:?}",
|
||||
timeline_dir.display(),
|
||||
e
|
||||
);
|
||||
match remove_if_empty(&timeline_dir) {
|
||||
Ok(true) => info!(
|
||||
"Removed empty timeline directory {}",
|
||||
timeline_dir.display()
|
||||
),
|
||||
Ok(false) => (),
|
||||
Err(e) => {
|
||||
error!("Failed to remove empty timeline directory: {e:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -783,48 +686,25 @@ fn collect_timelines_for_tenant(
|
||||
Ok((tenant_id, TenantAttachData::Ready(tenant_timelines)))
|
||||
}
|
||||
|
||||
fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> anyhow::Result<()> {
|
||||
fs::remove_dir_all(&timeline_dir)
|
||||
.or_else(|e| {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
// we can leave the uninit mark without a timeline dir,
|
||||
// just remove the mark then
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
})
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to remove unit marked timeline directory {}",
|
||||
timeline_dir.display()
|
||||
)
|
||||
})?;
|
||||
fs::remove_file(&uninit_mark).with_context(|| {
|
||||
format!(
|
||||
"Failed to remove timeline uninit mark file {}",
|
||||
uninit_mark.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// discover timeline files and extract timeline metadata
|
||||
// NOTE: ephemeral files are excluded from the list
|
||||
fn collect_timeline_files(
|
||||
timeline_dir: &Path,
|
||||
) -> anyhow::Result<(TimelineMetadata, HashMap<PathBuf, LayerFileMetadata>)> {
|
||||
let mut timeline_files = HashMap::new();
|
||||
) -> anyhow::Result<(TimelineId, TimelineMetadata, HashSet<PathBuf>)> {
|
||||
let mut timeline_files = HashSet::new();
|
||||
let mut timeline_metadata_path = None;
|
||||
|
||||
let timeline_id = timeline_dir
|
||||
.file_name()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default()
|
||||
.parse::<TimelineId>()
|
||||
.context("Could not parse timeline id out of the timeline dir name")?;
|
||||
let timeline_dir_entries =
|
||||
fs::read_dir(&timeline_dir).context("Failed to list timeline dir contents")?;
|
||||
for entry in timeline_dir_entries {
|
||||
let entry_path = entry.context("Failed to list timeline dir entry")?.path();
|
||||
let metadata = entry_path.metadata()?;
|
||||
|
||||
if metadata.is_file() {
|
||||
if entry_path.is_file() {
|
||||
if entry_path.file_name().and_then(OsStr::to_str) == Some(METADATA_FILE_NAME) {
|
||||
timeline_metadata_path = Some(entry_path);
|
||||
} else if is_ephemeral_file(&entry_path.file_name().unwrap().to_string_lossy()) {
|
||||
@@ -839,8 +719,7 @@ fn collect_timeline_files(
|
||||
)
|
||||
})?;
|
||||
} else {
|
||||
let layer_metadata = LayerFileMetadata::new(metadata.len());
|
||||
timeline_files.insert(entry_path, layer_metadata);
|
||||
timeline_files.insert(entry_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -866,5 +745,5 @@ fn collect_timeline_files(
|
||||
"Timeline has no ancestor and no layer files"
|
||||
);
|
||||
|
||||
Ok((metadata, timeline_files))
|
||||
Ok((timeline_id, metadata, timeline_files))
|
||||
}
|
||||
|
||||
@@ -70,10 +70,8 @@ async fn compaction_loop(tenant_id: TenantId) {
|
||||
// Run compaction
|
||||
let mut sleep_duration = tenant.get_compaction_period();
|
||||
if let Err(e) = tenant.compaction_iteration() {
|
||||
error!("Compaction failed, retrying: {e:#}");
|
||||
sleep_duration = wait_duration;
|
||||
error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
#[cfg(feature = "testing")]
|
||||
std::process::abort();
|
||||
}
|
||||
|
||||
// Sleep
|
||||
@@ -121,10 +119,8 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
if gc_horizon > 0 {
|
||||
if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false)
|
||||
{
|
||||
error!("Gc failed, retrying: {e:#}");
|
||||
sleep_duration = wait_duration;
|
||||
error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
|
||||
#[cfg(feature = "testing")]
|
||||
std::process::abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,7 +171,7 @@ async fn wait_for_active_tenant(
|
||||
}
|
||||
state => {
|
||||
debug!("Not running the task loop, tenant is not active with background jobs enabled: {state:?}");
|
||||
continue;
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,10 +31,9 @@ use bytes::{Buf, Bytes, BytesMut};
|
||||
use tracing::*;
|
||||
|
||||
use crate::pgdatadir_mapping::*;
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::tenant::Timeline;
|
||||
use crate::walrecord::*;
|
||||
use crate::ZERO_PAGE;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
|
||||
@@ -44,6 +43,8 @@ use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
|
||||
|
||||
pub struct WalIngest<'a> {
|
||||
timeline: &'a Timeline,
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
use std::{
|
||||
collections::{hash_map, HashMap},
|
||||
num::NonZeroU64,
|
||||
ops::ControlFlow,
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
@@ -27,8 +26,7 @@ use etcd_broker::{
|
||||
subscription_key::SubscriptionKey, subscription_value::SkTimelineInfo, BrokerSubscription,
|
||||
BrokerUpdate, Client,
|
||||
};
|
||||
use pageserver_api::models::TimelineState;
|
||||
use tokio::{select, sync::watch};
|
||||
use tokio::select;
|
||||
use tracing::*;
|
||||
|
||||
use crate::{
|
||||
@@ -49,7 +47,7 @@ pub fn spawn_connection_manager_task(
|
||||
wal_connect_timeout: Duration,
|
||||
lagging_wal_timeout: Duration,
|
||||
max_lsn_wal_lag: NonZeroU64,
|
||||
) {
|
||||
) -> anyhow::Result<()> {
|
||||
let mut etcd_client = get_etcd_client().clone();
|
||||
|
||||
let tenant_id = timeline.tenant_id;
|
||||
@@ -60,7 +58,10 @@ pub fn spawn_connection_manager_task(
|
||||
TaskKind::WalReceiverManager,
|
||||
Some(tenant_id),
|
||||
Some(timeline_id),
|
||||
&format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
|
||||
&format!(
|
||||
"walreceiver for tenant {} timeline {}",
|
||||
timeline.tenant_id, timeline.timeline_id
|
||||
),
|
||||
false,
|
||||
async move {
|
||||
info!("WAL receiver broker started, connecting to etcd");
|
||||
@@ -74,21 +75,19 @@ pub fn spawn_connection_manager_task(
|
||||
select! {
|
||||
_ = task_mgr::shutdown_watcher() => {
|
||||
info!("WAL receiver shutdown requested, shutting down");
|
||||
walreceiver_state.shutdown().await;
|
||||
// Kill current connection, if any
|
||||
if let Some(wal_connection) = walreceiver_state.wal_connection.take()
|
||||
{
|
||||
wal_connection.connection_task.shutdown().await;
|
||||
}
|
||||
return Ok(());
|
||||
},
|
||||
loop_step_result = connection_manager_loop_step(
|
||||
|
||||
_ = connection_manager_loop_step(
|
||||
&broker_loop_prefix,
|
||||
&mut etcd_client,
|
||||
&mut walreceiver_state,
|
||||
) => match loop_step_result {
|
||||
ControlFlow::Continue(()) => continue,
|
||||
ControlFlow::Break(()) => {
|
||||
info!("Connection manager loop ended, shutting down");
|
||||
walreceiver_state.shutdown().await;
|
||||
return Ok(());
|
||||
}
|
||||
},
|
||||
) => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,6 +95,7 @@ pub fn spawn_connection_manager_task(
|
||||
info_span!("wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
|
||||
),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker.
|
||||
@@ -105,17 +105,7 @@ async fn connection_manager_loop_step(
|
||||
broker_prefix: &str,
|
||||
etcd_client: &mut Client,
|
||||
walreceiver_state: &mut WalreceiverState,
|
||||
) -> ControlFlow<(), ()> {
|
||||
let mut timeline_state_updates = walreceiver_state.timeline.subscribe_for_state_updates();
|
||||
|
||||
match wait_for_active_timeline(&mut timeline_state_updates).await {
|
||||
ControlFlow::Continue(()) => {}
|
||||
ControlFlow::Break(()) => {
|
||||
info!("Timeline dropped state updates sender before becoming active, stopping wal connection manager loop");
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
}
|
||||
|
||||
) {
|
||||
let id = TenantTimelineId {
|
||||
tenant_id: walreceiver_state.timeline.tenant_id,
|
||||
timeline_id: walreceiver_state.timeline.timeline_id,
|
||||
@@ -140,12 +130,10 @@ async fn connection_manager_loop_step(
|
||||
// - change connection if the rules decide so, or if the current connection dies
|
||||
// - receive updates from broker
|
||||
// - this might change the current desired connection
|
||||
// - timeline state changes to something that does not allow walreceiver to run concurrently
|
||||
select! {
|
||||
broker_connection_result = &mut broker_subscription.watcher_handle => {
|
||||
info!("Broker connection was closed from the other side, ending current broker loop step");
|
||||
cleanup_broker_connection(broker_connection_result, walreceiver_state);
|
||||
return ControlFlow::Continue(());
|
||||
return;
|
||||
},
|
||||
|
||||
Some(wal_connection_update) = async {
|
||||
@@ -198,36 +186,11 @@ async fn connection_manager_loop_step(
|
||||
(&mut broker_subscription.watcher_handle).await,
|
||||
walreceiver_state,
|
||||
);
|
||||
return ControlFlow::Continue(());
|
||||
return;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
new_event = async {
|
||||
loop {
|
||||
match timeline_state_updates.changed().await {
|
||||
Ok(()) => {
|
||||
let new_state = walreceiver_state.timeline.current_state();
|
||||
match new_state {
|
||||
// we're already active as walreceiver, no need to reactivate
|
||||
TimelineState::Active => continue,
|
||||
TimelineState::Broken | TimelineState::Paused | TimelineState::Suspended => return ControlFlow::Continue(new_state),
|
||||
}
|
||||
}
|
||||
Err(_sender_dropped_error) => return ControlFlow::Break(()),
|
||||
}
|
||||
}
|
||||
} => match new_event {
|
||||
ControlFlow::Continue(new_state) => {
|
||||
info!("Timeline became inactive (new state: {new_state:?}), dropping current connections until it reactivates");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
info!("Timeline dropped state updates sender, stopping wal connection manager loop");
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
},
|
||||
|
||||
_ = async { tokio::time::sleep(time_until_next_retry.unwrap()).await }, if time_until_next_retry.is_some() => {}
|
||||
}
|
||||
|
||||
@@ -254,34 +217,6 @@ async fn connection_manager_loop_step(
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_active_timeline(
|
||||
timeline_state_updates: &mut watch::Receiver<TimelineState>,
|
||||
) -> ControlFlow<(), ()> {
|
||||
let current_state = *timeline_state_updates.borrow();
|
||||
if current_state == TimelineState::Active {
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
|
||||
loop {
|
||||
match timeline_state_updates.changed().await {
|
||||
Ok(()) => {
|
||||
let new_state = *timeline_state_updates.borrow();
|
||||
match new_state {
|
||||
TimelineState::Active => {
|
||||
debug!("Timeline state changed to active, continuing the walreceiver connection manager");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
state => {
|
||||
debug!("Not running the walreceiver connection manager, timeline is not active: {state:?}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_sender_dropped_error) => return ControlFlow::Break(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cleanup_broker_connection(
|
||||
broker_connection_result: Result<Result<(), etcd_broker::BrokerError>, tokio::task::JoinError>,
|
||||
walreceiver_state: &mut WalreceiverState,
|
||||
@@ -789,12 +724,6 @@ impl WalreceiverState {
|
||||
self.wal_connection_retries.remove(&node_id);
|
||||
}
|
||||
}
|
||||
|
||||
async fn shutdown(mut self) {
|
||||
if let Some(wal_connection) = self.wal_connection.take() {
|
||||
wal_connection.connection_task.shutdown().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -1476,9 +1405,7 @@ mod tests {
|
||||
timeline: harness
|
||||
.load()
|
||||
.create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION)
|
||||
.expect("Failed to create an empty timeline for dummy wal connection manager")
|
||||
.initialize()
|
||||
.unwrap(),
|
||||
.expect("Failed to create an empty timeline for dummy wal connection manager"),
|
||||
wal_connect_timeout: Duration::from_secs(1),
|
||||
lagging_wal_timeout: Duration::from_secs(1),
|
||||
max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
|
||||
|
||||
@@ -35,18 +35,17 @@ use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use tracing::*;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::crashsafe_dir::path_with_suffix_extension;
|
||||
use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock};
|
||||
|
||||
use crate::metrics::{
|
||||
WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
|
||||
WAL_REDO_WAIT_TIME,
|
||||
WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, WAL_REDO_WAIT_TIME,
|
||||
};
|
||||
use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::repository::Key;
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use crate::{config::PageServerConf, TEMP_FILE_SUFFIX};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
|
||||
use postgres_ffi::v14::nonrelfile_utils::{
|
||||
@@ -245,23 +244,12 @@ impl PostgresRedoManager {
|
||||
let end_time = Instant::now();
|
||||
let duration = end_time.duration_since(lock_time);
|
||||
|
||||
let len = records.len();
|
||||
let nbytes = records.iter().fold(0, |acumulator, record| {
|
||||
acumulator
|
||||
+ match &record.1 {
|
||||
NeonWalRecord::Postgres { rec, .. } => rec.len(),
|
||||
_ => unreachable!("Only PostgreSQL records are accepted in this batch"),
|
||||
}
|
||||
});
|
||||
|
||||
WAL_REDO_TIME.observe(duration.as_secs_f64());
|
||||
WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
|
||||
WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);
|
||||
WAL_REDO_RECORDS_HISTOGRAM.observe(records.len() as f64);
|
||||
|
||||
debug!(
|
||||
"postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
|
||||
len,
|
||||
nbytes,
|
||||
"postgres applied {} WAL records in {} us to reconstruct page image at LSN {}",
|
||||
records.len(),
|
||||
duration.as_micros(),
|
||||
lsn
|
||||
);
|
||||
@@ -270,9 +258,8 @@ impl PostgresRedoManager {
|
||||
// next request will launch a new one.
|
||||
if result.is_err() {
|
||||
error!(
|
||||
"error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
|
||||
"error applying {} WAL records to reconstruct page image at LSN {}",
|
||||
records.len(),
|
||||
nbytes,
|
||||
lsn
|
||||
);
|
||||
let process = process_guard.take().unwrap();
|
||||
@@ -610,26 +597,13 @@ impl PostgresRedoProcess {
|
||||
);
|
||||
fs::remove_dir_all(&datadir)?;
|
||||
}
|
||||
let pg_bin_dir_path = conf.pg_bin_dir(pg_version).map_err(|e| {
|
||||
Error::new(
|
||||
ErrorKind::Other,
|
||||
format!("incorrect pg_bin_dir path: {}", e),
|
||||
)
|
||||
})?;
|
||||
let pg_lib_dir_path = conf.pg_lib_dir(pg_version).map_err(|e| {
|
||||
Error::new(
|
||||
ErrorKind::Other,
|
||||
format!("incorrect pg_lib_dir path: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
info!("running initdb in {}", datadir.display());
|
||||
let initdb = Command::new(pg_bin_dir_path.join("initdb"))
|
||||
let initdb = Command::new(conf.pg_bin_dir(pg_version).join("initdb"))
|
||||
.args(&["-D", &datadir.to_string_lossy()])
|
||||
.arg("-N")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("DYLD_LIBRARY_PATH", &pg_lib_dir_path) // macOS
|
||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir(pg_version))
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir(pg_version))
|
||||
.close_fds()
|
||||
.output()
|
||||
.map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {e}")))?;
|
||||
@@ -655,14 +629,14 @@ impl PostgresRedoProcess {
|
||||
}
|
||||
|
||||
// Start postgres itself
|
||||
let mut child = Command::new(pg_bin_dir_path.join("postgres"))
|
||||
let mut child = Command::new(conf.pg_bin_dir(pg_version).join("postgres"))
|
||||
.arg("--wal-redo")
|
||||
.stdin(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir(pg_version))
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir(pg_version))
|
||||
.env("PGDATA", &datadir)
|
||||
// The redo process is not trusted, so it runs in seccomp mode
|
||||
// (see seccomp in zenith_wal_redo.c). We have to make sure it doesn't
|
||||
|
||||
@@ -10,12 +10,51 @@ struct WalProposerConn
|
||||
PGconn *pg_conn;
|
||||
bool is_nonblocking; /* whether the connection is non-blocking */
|
||||
char *recvbuf; /* last received data from
|
||||
* walprop_async_read */
|
||||
* libpqprop_async_read */
|
||||
};
|
||||
|
||||
/* Prototypes for exported functions */
|
||||
static char *libpqprop_error_message(WalProposerConn * conn);
|
||||
static WalProposerConnStatusType libpqprop_status(WalProposerConn * conn);
|
||||
static WalProposerConn * libpqprop_connect_start(char *conninfo);
|
||||
static WalProposerConnectPollStatusType libpqprop_connect_poll(WalProposerConn * conn);
|
||||
static bool libpqprop_send_query(WalProposerConn * conn, char *query);
|
||||
static WalProposerExecStatusType libpqprop_get_query_result(WalProposerConn * conn);
|
||||
static pgsocket libpqprop_socket(WalProposerConn * conn);
|
||||
static int libpqprop_flush(WalProposerConn * conn);
|
||||
static void libpqprop_finish(WalProposerConn * conn);
|
||||
static PGAsyncReadResult libpqprop_async_read(WalProposerConn * conn, char **buf, int *amount);
|
||||
static PGAsyncWriteResult libpqprop_async_write(WalProposerConn * conn, void const *buf, size_t size);
|
||||
static bool libpqprop_blocking_write(WalProposerConn * conn, void const *buf, size_t size);
|
||||
|
||||
static WalProposerFunctionsType PQWalProposerFunctions =
|
||||
{
|
||||
libpqprop_error_message,
|
||||
libpqprop_status,
|
||||
libpqprop_connect_start,
|
||||
libpqprop_connect_poll,
|
||||
libpqprop_send_query,
|
||||
libpqprop_get_query_result,
|
||||
libpqprop_socket,
|
||||
libpqprop_flush,
|
||||
libpqprop_finish,
|
||||
libpqprop_async_read,
|
||||
libpqprop_async_write,
|
||||
libpqprop_blocking_write,
|
||||
};
|
||||
|
||||
/* Module initialization */
|
||||
void
|
||||
pg_init_libpqwalproposer(void)
|
||||
{
|
||||
if (WalProposerFunctions != NULL)
|
||||
elog(ERROR, "libpqwalproposer already loaded");
|
||||
WalProposerFunctions = &PQWalProposerFunctions;
|
||||
}
|
||||
|
||||
/* Helper function */
|
||||
static bool
|
||||
ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
|
||||
ensure_nonblocking_status(WalProposerConn * conn, bool is_nonblocking)
|
||||
{
|
||||
/* If we're already correctly blocking or nonblocking, all good */
|
||||
if (is_nonblocking == conn->is_nonblocking)
|
||||
@@ -30,14 +69,14 @@ ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
|
||||
}
|
||||
|
||||
/* Exported function definitions */
|
||||
char *
|
||||
walprop_error_message(WalProposerConn *conn)
|
||||
static char *
|
||||
libpqprop_error_message(WalProposerConn * conn)
|
||||
{
|
||||
return PQerrorMessage(conn->pg_conn);
|
||||
}
|
||||
|
||||
WalProposerConnStatusType
|
||||
walprop_status(WalProposerConn *conn)
|
||||
static WalProposerConnStatusType
|
||||
libpqprop_status(WalProposerConn * conn)
|
||||
{
|
||||
switch (PQstatus(conn->pg_conn))
|
||||
{
|
||||
@@ -50,8 +89,8 @@ walprop_status(WalProposerConn *conn)
|
||||
}
|
||||
}
|
||||
|
||||
WalProposerConn *
|
||||
walprop_connect_start(char *conninfo)
|
||||
static WalProposerConn *
|
||||
libpqprop_connect_start(char *conninfo)
|
||||
{
|
||||
WalProposerConn *conn;
|
||||
PGconn *pg_conn;
|
||||
@@ -80,8 +119,8 @@ walprop_connect_start(char *conninfo)
|
||||
return conn;
|
||||
}
|
||||
|
||||
WalProposerConnectPollStatusType
|
||||
walprop_connect_poll(WalProposerConn *conn)
|
||||
static WalProposerConnectPollStatusType
|
||||
libpqprop_connect_poll(WalProposerConn * conn)
|
||||
{
|
||||
WalProposerConnectPollStatusType return_val;
|
||||
|
||||
@@ -121,8 +160,8 @@ walprop_connect_poll(WalProposerConn *conn)
|
||||
return return_val;
|
||||
}
|
||||
|
||||
bool
|
||||
walprop_send_query(WalProposerConn *conn, char *query)
|
||||
static bool
|
||||
libpqprop_send_query(WalProposerConn * conn, char *query)
|
||||
{
|
||||
/*
|
||||
* We need to be in blocking mode for sending the query to run without
|
||||
@@ -138,8 +177,8 @@ walprop_send_query(WalProposerConn *conn, char *query)
|
||||
return true;
|
||||
}
|
||||
|
||||
WalProposerExecStatusType
|
||||
walprop_get_query_result(WalProposerConn *conn)
|
||||
static WalProposerExecStatusType
|
||||
libpqprop_get_query_result(WalProposerConn * conn)
|
||||
{
|
||||
PGresult *result;
|
||||
WalProposerExecStatusType return_val;
|
||||
@@ -216,20 +255,20 @@ walprop_get_query_result(WalProposerConn *conn)
|
||||
return return_val;
|
||||
}
|
||||
|
||||
pgsocket
|
||||
walprop_socket(WalProposerConn *conn)
|
||||
static pgsocket
|
||||
libpqprop_socket(WalProposerConn * conn)
|
||||
{
|
||||
return PQsocket(conn->pg_conn);
|
||||
}
|
||||
|
||||
int
|
||||
walprop_flush(WalProposerConn *conn)
|
||||
static int
|
||||
libpqprop_flush(WalProposerConn * conn)
|
||||
{
|
||||
return (PQflush(conn->pg_conn));
|
||||
}
|
||||
|
||||
void
|
||||
walprop_finish(WalProposerConn *conn)
|
||||
static void
|
||||
libpqprop_finish(WalProposerConn * conn)
|
||||
{
|
||||
if (conn->recvbuf != NULL)
|
||||
PQfreemem(conn->recvbuf);
|
||||
@@ -243,8 +282,8 @@ walprop_finish(WalProposerConn *conn)
|
||||
* On success, the data is placed in *buf. It is valid until the next call
|
||||
* to this function.
|
||||
*/
|
||||
PGAsyncReadResult
|
||||
walprop_async_read(WalProposerConn *conn, char **buf, int *amount)
|
||||
static PGAsyncReadResult
|
||||
libpqprop_async_read(WalProposerConn * conn, char **buf, int *amount)
|
||||
{
|
||||
int result;
|
||||
|
||||
@@ -314,8 +353,8 @@ walprop_async_read(WalProposerConn *conn, char **buf, int *amount)
|
||||
}
|
||||
}
|
||||
|
||||
PGAsyncWriteResult
|
||||
walprop_async_write(WalProposerConn *conn, void const *buf, size_t size)
|
||||
static PGAsyncWriteResult
|
||||
libpqprop_async_write(WalProposerConn * conn, void const *buf, size_t size)
|
||||
{
|
||||
int result;
|
||||
|
||||
@@ -369,12 +408,8 @@ walprop_async_write(WalProposerConn *conn, void const *buf, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is very similar to walprop_async_write. For more
|
||||
* information, refer to the comments there.
|
||||
*/
|
||||
bool
|
||||
walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size)
|
||||
static bool
|
||||
libpqprop_blocking_write(WalProposerConn * conn, void const *buf, size_t size)
|
||||
{
|
||||
int result;
|
||||
|
||||
@@ -382,6 +417,10 @@ walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size)
|
||||
if (!ensure_nonblocking_status(conn, false))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Ths function is very similar to libpqprop_async_write. For more
|
||||
* information, refer to the comments there
|
||||
*/
|
||||
if ((result = PQputCopyData(conn->pg_conn, buf, size)) == -1)
|
||||
return false;
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ void
|
||||
_PG_init(void)
|
||||
{
|
||||
pg_init_libpagestore();
|
||||
pg_init_libpqwalproposer();
|
||||
pg_init_walproposer();
|
||||
|
||||
EmitWarningsOnPlaceholders("neon");
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#define NEON_H
|
||||
|
||||
extern void pg_init_libpagestore(void);
|
||||
extern void pg_init_libpqwalproposer(void);
|
||||
extern void pg_init_walproposer(void);
|
||||
|
||||
#endif /* NEON_H */
|
||||
|
||||
@@ -79,6 +79,9 @@ bool am_wal_proposer;
|
||||
char *neon_timeline_walproposer = NULL;
|
||||
char *neon_tenant_walproposer = NULL;
|
||||
|
||||
/* Declared in walproposer.h, defined here, initialized in libpqwalproposer.c */
|
||||
WalProposerFunctionsType *WalProposerFunctions = NULL;
|
||||
|
||||
#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
|
||||
|
||||
static int n_safekeepers = 0;
|
||||
@@ -435,6 +438,10 @@ WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId)
|
||||
char *sep;
|
||||
char *port;
|
||||
|
||||
/* Load the libpq-specific functions */
|
||||
if (WalProposerFunctions == NULL)
|
||||
elog(ERROR, "libpqwalproposer didn't initialize correctly");
|
||||
|
||||
load_file("libpqwalreceiver", false);
|
||||
if (WalReceiverFunctions == NULL)
|
||||
elog(ERROR, "libpqwalreceiver didn't initialize correctly");
|
||||
|
||||
@@ -446,31 +446,31 @@ typedef enum
|
||||
} WalProposerConnStatusType;
|
||||
|
||||
/* Re-exported PQerrorMessage */
|
||||
extern char *walprop_error_message(WalProposerConn *conn);
|
||||
typedef char *(*walprop_error_message_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Re-exported PQstatus */
|
||||
extern WalProposerConnStatusType walprop_status(WalProposerConn *conn);
|
||||
typedef WalProposerConnStatusType(*walprop_status_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Re-exported PQconnectStart */
|
||||
extern WalProposerConn * walprop_connect_start(char *conninfo);
|
||||
typedef WalProposerConn * (*walprop_connect_start_fn) (char *conninfo);
|
||||
|
||||
/* Re-exported PQconectPoll */
|
||||
extern WalProposerConnectPollStatusType walprop_connect_poll(WalProposerConn *conn);
|
||||
typedef WalProposerConnectPollStatusType(*walprop_connect_poll_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Blocking wrapper around PQsendQuery */
|
||||
extern bool walprop_send_query(WalProposerConn *conn, char *query);
|
||||
typedef bool (*walprop_send_query_fn) (WalProposerConn * conn, char *query);
|
||||
|
||||
/* Wrapper around PQconsumeInput + PQisBusy + PQgetResult */
|
||||
extern WalProposerExecStatusType walprop_get_query_result(WalProposerConn *conn);
|
||||
typedef WalProposerExecStatusType(*walprop_get_query_result_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Re-exported PQsocket */
|
||||
extern pgsocket walprop_socket(WalProposerConn *conn);
|
||||
typedef pgsocket (*walprop_socket_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Wrapper around PQconsumeInput (if socket's read-ready) + PQflush */
|
||||
extern int walprop_flush(WalProposerConn *conn);
|
||||
typedef int (*walprop_flush_fn) (WalProposerConn * conn);
|
||||
|
||||
/* Re-exported PQfinish */
|
||||
extern void walprop_finish(WalProposerConn *conn);
|
||||
typedef void (*walprop_finish_fn) (WalProposerConn * conn);
|
||||
|
||||
/*
|
||||
* Ergonomic wrapper around PGgetCopyData
|
||||
@@ -486,7 +486,9 @@ extern void walprop_finish(WalProposerConn *conn);
|
||||
* performs a bit of extra checking work that's always required and is normally
|
||||
* somewhat verbose.
|
||||
*/
|
||||
extern PGAsyncReadResult walprop_async_read(WalProposerConn *conn, char **buf, int *amount);
|
||||
typedef PGAsyncReadResult(*walprop_async_read_fn) (WalProposerConn * conn,
|
||||
char **buf,
|
||||
int *amount);
|
||||
|
||||
/*
|
||||
* Ergonomic wrapper around PQputCopyData + PQflush
|
||||
@@ -495,14 +497,69 @@ extern PGAsyncReadResult walprop_async_read(WalProposerConn *conn, char **buf, i
|
||||
*
|
||||
* For information on the meaning of return codes, refer to PGAsyncWriteResult.
|
||||
*/
|
||||
extern PGAsyncWriteResult walprop_async_write(WalProposerConn *conn, void const *buf, size_t size);
|
||||
typedef PGAsyncWriteResult(*walprop_async_write_fn) (WalProposerConn * conn,
|
||||
void const *buf,
|
||||
size_t size);
|
||||
|
||||
/*
|
||||
* Blocking equivalent to walprop_async_write_fn
|
||||
*
|
||||
* Returns 'true' if successful, 'false' on failure.
|
||||
*/
|
||||
extern bool walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size);
|
||||
typedef bool (*walprop_blocking_write_fn) (WalProposerConn * conn, void const *buf, size_t size);
|
||||
|
||||
/* All libpqwalproposer exported functions collected together. */
|
||||
typedef struct WalProposerFunctionsType
|
||||
{
|
||||
walprop_error_message_fn walprop_error_message;
|
||||
walprop_status_fn walprop_status;
|
||||
walprop_connect_start_fn walprop_connect_start;
|
||||
walprop_connect_poll_fn walprop_connect_poll;
|
||||
walprop_send_query_fn walprop_send_query;
|
||||
walprop_get_query_result_fn walprop_get_query_result;
|
||||
walprop_socket_fn walprop_socket;
|
||||
walprop_flush_fn walprop_flush;
|
||||
walprop_finish_fn walprop_finish;
|
||||
walprop_async_read_fn walprop_async_read;
|
||||
walprop_async_write_fn walprop_async_write;
|
||||
walprop_blocking_write_fn walprop_blocking_write;
|
||||
} WalProposerFunctionsType;
|
||||
|
||||
/* Allow the above functions to be "called" with normal syntax */
|
||||
#define walprop_error_message(conn) \
|
||||
WalProposerFunctions->walprop_error_message(conn)
|
||||
#define walprop_status(conn) \
|
||||
WalProposerFunctions->walprop_status(conn)
|
||||
#define walprop_connect_start(conninfo) \
|
||||
WalProposerFunctions->walprop_connect_start(conninfo)
|
||||
#define walprop_connect_poll(conn) \
|
||||
WalProposerFunctions->walprop_connect_poll(conn)
|
||||
#define walprop_send_query(conn, query) \
|
||||
WalProposerFunctions->walprop_send_query(conn, query)
|
||||
#define walprop_get_query_result(conn) \
|
||||
WalProposerFunctions->walprop_get_query_result(conn)
|
||||
#define walprop_set_nonblocking(conn, arg) \
|
||||
WalProposerFunctions->walprop_set_nonblocking(conn, arg)
|
||||
#define walprop_socket(conn) \
|
||||
WalProposerFunctions->walprop_socket(conn)
|
||||
#define walprop_flush(conn) \
|
||||
WalProposerFunctions->walprop_flush(conn)
|
||||
#define walprop_finish(conn) \
|
||||
WalProposerFunctions->walprop_finish(conn)
|
||||
#define walprop_async_read(conn, buf, amount) \
|
||||
WalProposerFunctions->walprop_async_read(conn, buf, amount)
|
||||
#define walprop_async_write(conn, buf, size) \
|
||||
WalProposerFunctions->walprop_async_write(conn, buf, size)
|
||||
#define walprop_blocking_write(conn, buf, size) \
|
||||
WalProposerFunctions->walprop_blocking_write(conn, buf, size)
|
||||
|
||||
/*
|
||||
* The runtime location of the libpqwalproposer functions.
|
||||
*
|
||||
* This pointer is set by the initializer in libpqwalproposer, so that we
|
||||
* can use it later.
|
||||
*/
|
||||
extern PGDLLIMPORT WalProposerFunctionsType * WalProposerFunctions;
|
||||
|
||||
extern uint64 BackpressureThrottlingTime(void);
|
||||
|
||||
|
||||
64
poetry.lock
generated
64
poetry.lock
generated
@@ -11,7 +11,7 @@ async-timeout = ">=3.0,<5.0"
|
||||
psycopg2-binary = ">=2.8.4"
|
||||
|
||||
[package.extras]
|
||||
sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"]
|
||||
sa = ["sqlalchemy[postgresql_psycopg2binary] (>=1.3,<1.5)"]
|
||||
|
||||
[[package]]
|
||||
name = "allure-pytest"
|
||||
@@ -80,7 +80,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"]
|
||||
docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
|
||||
tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"]
|
||||
tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
|
||||
tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sam-translator"
|
||||
@@ -514,6 +514,14 @@ python-versions = ">=3.7"
|
||||
[package.dependencies]
|
||||
typing-extensions = ">=4.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "cached-property"
|
||||
version = "1.5.2"
|
||||
description = "A decorator for caching properties in classes."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2022.6.15"
|
||||
@@ -560,7 +568,7 @@ optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
|
||||
[package.extras]
|
||||
unicode-backport = ["unicodedata2"]
|
||||
unicode_backport = ["unicodedata2"]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
@@ -593,7 +601,7 @@ python-versions = ">=3.6"
|
||||
cffi = ">=1.12"
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx_rtd_theme"]
|
||||
docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
|
||||
docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
|
||||
pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
|
||||
sdist = ["setuptools_rust (>=0.11.4)"]
|
||||
@@ -738,9 +746,9 @@ python-versions = ">=3.6.1,<4.0"
|
||||
|
||||
[package.extras]
|
||||
colors = ["colorama (>=0.4.3,<0.5.0)"]
|
||||
pipfile-deprecated-finder = ["pipreqs", "requirementslib"]
|
||||
pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
|
||||
plugins = ["setuptools"]
|
||||
requirements-deprecated-finder = ["pip-api", "pipreqs"]
|
||||
requirements_deprecated_finder = ["pip-api", "pipreqs"]
|
||||
|
||||
[[package]]
|
||||
name = "itsdangerous"
|
||||
@@ -815,7 +823,7 @@ python-versions = ">=2.7"
|
||||
[package.extras]
|
||||
docs = ["jaraco.packaging (>=3.2)", "rst.linker (>=1.9)", "sphinx"]
|
||||
testing = ["ecdsa", "enum34", "feedparser", "jsonlib", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8 (<1.1.0)", "pytest-flake8 (>=1.1.1)", "scikit-learn", "sqlalchemy"]
|
||||
testing-libs = ["simplejson", "ujson", "yajl"]
|
||||
"testing.libs" = ["simplejson", "ujson", "yajl"]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpointer"
|
||||
@@ -836,12 +844,11 @@ python-versions = "*"
|
||||
[package.dependencies]
|
||||
attrs = ">=17.4.0"
|
||||
pyrsistent = ">=0.14.0"
|
||||
setuptools = "*"
|
||||
six = ">=1.11.0"
|
||||
|
||||
[package.extras]
|
||||
format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"]
|
||||
format-nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"]
|
||||
format_nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"]
|
||||
|
||||
[[package]]
|
||||
name = "junit-xml"
|
||||
@@ -901,7 +908,6 @@ pytz = "*"
|
||||
PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""}
|
||||
requests = ">=2.5"
|
||||
responses = ">=0.9.0"
|
||||
setuptools = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
sshpubkeys = {version = ">=3.1.0", optional = true, markers = "extra == \"server\""}
|
||||
werkzeug = ">=0.5,<2.2.0"
|
||||
xmltodict = "*"
|
||||
@@ -1010,7 +1016,6 @@ python-versions = ">=3.7.0,<4.0.0"
|
||||
jsonschema = ">=3.2.0,<5.0.0"
|
||||
openapi-schema-validator = ">=0.2.0,<0.3.0"
|
||||
PyYAML = ">=5.1"
|
||||
setuptools = "*"
|
||||
|
||||
[package.extras]
|
||||
requests = ["requests"]
|
||||
@@ -1343,7 +1348,7 @@ urllib3 = ">=1.21.1,<1.27"
|
||||
|
||||
[package.extras]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "responses"
|
||||
@@ -1397,19 +1402,6 @@ python-versions = ">= 2.7"
|
||||
attrs = "*"
|
||||
pbr = "*"
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "65.5.0"
|
||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
||||
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
||||
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
@@ -1476,14 +1468,6 @@ category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
|
||||
[[package]]
|
||||
name = "types-toml"
|
||||
version = "0.10.8"
|
||||
description = "Typing stubs for toml"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "types-urllib3"
|
||||
version = "1.26.17"
|
||||
@@ -1568,7 +1552,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "17cdbfe90f1b06dffaf24c3e076384ec08dd4a2dce5a05e50565f7364932eb2d"
|
||||
content-hash = "ead1495454ee6d880bb240447025db93a25ebe263c2709de5f144cc2d85dc975"
|
||||
|
||||
[metadata.files]
|
||||
aiopg = [
|
||||
@@ -1663,6 +1647,10 @@ botocore-stubs = [
|
||||
{file = "botocore-stubs-1.27.38.tar.gz", hash = "sha256:408e8b86b5d171b58f81c74ca9d3b5317a5a8e2d3bc2073aa841ac13b8939e56"},
|
||||
{file = "botocore_stubs-1.27.38-py3-none-any.whl", hash = "sha256:7add7641e9a479a9c8366893bb522fd9ca3d58714201e43662a200a148a1bc38"},
|
||||
]
|
||||
cached-property = [
|
||||
{file = "cached-property-1.5.2.tar.gz", hash = "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130"},
|
||||
{file = "cached_property-1.5.2-py2.py3-none-any.whl", hash = "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"},
|
||||
]
|
||||
certifi = [
|
||||
{file = "certifi-2022.6.15-py3-none-any.whl", hash = "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412"},
|
||||
{file = "certifi-2022.6.15.tar.gz", hash = "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"},
|
||||
@@ -2206,10 +2194,6 @@ sarif-om = [
|
||||
{file = "sarif_om-1.0.4-py3-none-any.whl", hash = "sha256:539ef47a662329b1c8502388ad92457425e95dc0aaaf995fe46f4984c4771911"},
|
||||
{file = "sarif_om-1.0.4.tar.gz", hash = "sha256:cd5f416b3083e00d402a92e449a7ff67af46f11241073eea0461802a3b5aef98"},
|
||||
]
|
||||
setuptools = [
|
||||
{file = "setuptools-65.5.0-py3-none-any.whl", hash = "sha256:f62ea9da9ed6289bfe868cd6845968a2c854d1427f8548d52cae02a42b4f0356"},
|
||||
{file = "setuptools-65.5.0.tar.gz", hash = "sha256:512e5536220e38146176efb833d4a62aa726b7bbff82cfbc8ba9eaa3996e0b17"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
@@ -2238,10 +2222,6 @@ types-s3transfer = [
|
||||
{file = "types-s3transfer-0.6.0.post3.tar.gz", hash = "sha256:92c3704e5d041202bfb5ddb79d083fd1a02de2c5dfec6a91576823e6b5c93993"},
|
||||
{file = "types_s3transfer-0.6.0.post3-py3-none-any.whl", hash = "sha256:eedc5117275565b3c83662c0ccc81662a34da5dda8bd502b89d296b6d5cb091d"},
|
||||
]
|
||||
types-toml = [
|
||||
{file = "types-toml-0.10.8.tar.gz", hash = "sha256:b7e7ea572308b1030dc86c3ba825c5210814c2825612ec679eb7814f8dd9295a"},
|
||||
{file = "types_toml-0.10.8-py3-none-any.whl", hash = "sha256:8300fd093e5829eb9c1fba69cee38130347d4b74ddf32d0a7df650ae55c2b599"},
|
||||
]
|
||||
types-urllib3 = [
|
||||
{file = "types-urllib3-1.26.17.tar.gz", hash = "sha256:73fd274524c3fc7cd8cd9ceb0cb67ed99b45f9cb2831013e46d50c1451044800"},
|
||||
{file = "types_urllib3-1.26.17-py3-none-any.whl", hash = "sha256:0d027fcd27dbb3cb532453b4d977e05bc1e13aefd70519866af211b3003d895d"},
|
||||
|
||||
@@ -7,9 +7,9 @@ edition = "2021"
|
||||
anyhow = "1.0"
|
||||
atty = "0.2.14"
|
||||
base64 = "0.13.0"
|
||||
bstr = "1.0"
|
||||
bstr = "0.2.17"
|
||||
bytes = { version = "1.0.1", features = ['serde'] }
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
futures = "0.3.13"
|
||||
git-version = "0.3.5"
|
||||
hashbrown = "0.12"
|
||||
@@ -22,11 +22,7 @@ once_cell = "1.13.0"
|
||||
parking_lot = "0.12"
|
||||
pin-project-lite = "0.2.7"
|
||||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = [
|
||||
"blocking",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
] }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
routerify = "3"
|
||||
rustls = "0.20.0"
|
||||
rustls-pemfile = "1"
|
||||
@@ -37,13 +33,13 @@ sha2 = "0.10.2"
|
||||
socket2 = "0.4.4"
|
||||
thiserror = "1.0.30"
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-rustls = "0.23.0"
|
||||
tracing = "0.1.36"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
url = "2.2.2"
|
||||
uuid = { version = "1.2", features = ["v4", "serde"] }
|
||||
x509-parser = "0.14"
|
||||
uuid = { version = "0.8.2", features = ["v4", "serde"]}
|
||||
x509-parser = "0.13.2"
|
||||
|
||||
utils = { path = "../libs/utils" }
|
||||
metrics = { path = "../libs/metrics" }
|
||||
@@ -51,6 +47,6 @@ workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
async-trait = "0.1"
|
||||
rcgen = "0.10"
|
||||
rstest = "0.15"
|
||||
rcgen = "0.8.14"
|
||||
rstest = "0.12"
|
||||
tokio-postgres-rustls = "0.9.0"
|
||||
|
||||
@@ -45,43 +45,98 @@ async fn main() -> anyhow::Result<()> {
|
||||
.with_target(false)
|
||||
.init();
|
||||
|
||||
let arg_matches = cli().get_matches();
|
||||
let arg_matches = clap::App::new("Neon proxy/router")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("proxy")
|
||||
.short('p')
|
||||
.long("proxy")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming client connections on ip:port")
|
||||
.default_value("127.0.0.1:4432"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-backend")
|
||||
.long("auth-backend")
|
||||
.takes_value(true)
|
||||
.possible_values(["console", "postgres", "link"])
|
||||
.default_value("link"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("mgmt")
|
||||
.short('m')
|
||||
.long("mgmt")
|
||||
.takes_value(true)
|
||||
.help("listen for management callback connection on ip:port")
|
||||
.default_value("127.0.0.1:7000"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("http")
|
||||
.short('h')
|
||||
.long("http")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming http connections (metrics, etc) on ip:port")
|
||||
.default_value("127.0.0.1:7001"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("uri")
|
||||
.short('u')
|
||||
.long("uri")
|
||||
.takes_value(true)
|
||||
.help("redirect unauthenticated users to the given uri in case of link auth")
|
||||
.default_value("http://localhost:3000/psql_session/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-endpoint")
|
||||
.short('a')
|
||||
.long("auth-endpoint")
|
||||
.takes_value(true)
|
||||
.help("cloud API endpoint for authenticating users")
|
||||
.default_value("http://localhost:3000/authenticate_proxy_request/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tls-key")
|
||||
.short('k')
|
||||
.long("tls-key")
|
||||
.alias("ssl-key") // backwards compatibility
|
||||
.takes_value(true)
|
||||
.help("path to TLS key for client postgres connections"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tls-cert")
|
||||
.short('c')
|
||||
.long("tls-cert")
|
||||
.alias("ssl-cert") // backwards compatibility
|
||||
.takes_value(true)
|
||||
.help("path to TLS cert for client postgres connections"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let tls_config = match (
|
||||
arg_matches.get_one::<String>("tls-key"),
|
||||
arg_matches.get_one::<String>("tls-cert"),
|
||||
arg_matches.value_of("tls-key"),
|
||||
arg_matches.value_of("tls-cert"),
|
||||
) {
|
||||
(Some(key_path), Some(cert_path)) => Some(config::configure_tls(key_path, cert_path)?),
|
||||
(None, None) => None,
|
||||
_ => bail!("either both or neither tls-key and tls-cert must be specified"),
|
||||
};
|
||||
|
||||
let proxy_address: SocketAddr = arg_matches.get_one::<String>("proxy").unwrap().parse()?;
|
||||
let mgmt_address: SocketAddr = arg_matches.get_one::<String>("mgmt").unwrap().parse()?;
|
||||
let http_address: SocketAddr = arg_matches.get_one::<String>("http").unwrap().parse()?;
|
||||
let proxy_address: SocketAddr = arg_matches.value_of("proxy").unwrap().parse()?;
|
||||
let mgmt_address: SocketAddr = arg_matches.value_of("mgmt").unwrap().parse()?;
|
||||
let http_address: SocketAddr = arg_matches.value_of("http").unwrap().parse()?;
|
||||
|
||||
let auth_backend = match arg_matches
|
||||
.get_one::<String>("auth-backend")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
{
|
||||
let auth_backend = match arg_matches.value_of("auth-backend").unwrap() {
|
||||
"console" => {
|
||||
let url = arg_matches
|
||||
.get_one::<String>("auth-endpoint")
|
||||
.unwrap()
|
||||
.parse()?;
|
||||
let url = arg_matches.value_of("auth-endpoint").unwrap().parse()?;
|
||||
let endpoint = http::Endpoint::new(url, reqwest::Client::new());
|
||||
auth::BackendType::Console(Cow::Owned(endpoint), ())
|
||||
}
|
||||
"postgres" => {
|
||||
let url = arg_matches
|
||||
.get_one::<String>("auth-endpoint")
|
||||
.unwrap()
|
||||
.parse()?;
|
||||
let url = arg_matches.value_of("auth-endpoint").unwrap().parse()?;
|
||||
auth::BackendType::Postgres(Cow::Owned(url), ())
|
||||
}
|
||||
"link" => {
|
||||
let url = arg_matches.get_one::<String>("uri").unwrap().parse()?;
|
||||
let url = arg_matches.value_of("uri").unwrap().parse()?;
|
||||
auth::BackendType::Link(Cow::Owned(url))
|
||||
}
|
||||
other => bail!("unsupported auth backend: {other}"),
|
||||
@@ -119,68 +174,3 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cli() -> clap::Command {
|
||||
clap::Command::new("Neon proxy/router")
|
||||
.disable_help_flag(true)
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("proxy")
|
||||
.short('p')
|
||||
.long("proxy")
|
||||
.help("listen for incoming client connections on ip:port")
|
||||
.default_value("127.0.0.1:4432"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-backend")
|
||||
.long("auth-backend")
|
||||
.value_parser(["console", "postgres", "link"])
|
||||
.default_value("link"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("mgmt")
|
||||
.short('m')
|
||||
.long("mgmt")
|
||||
.help("listen for management callback connection on ip:port")
|
||||
.default_value("127.0.0.1:7000"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("http")
|
||||
.long("http")
|
||||
.help("listen for incoming http connections (metrics, etc) on ip:port")
|
||||
.default_value("127.0.0.1:7001"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("uri")
|
||||
.short('u')
|
||||
.long("uri")
|
||||
.help("redirect unauthenticated users to the given uri in case of link auth")
|
||||
.default_value("http://localhost:3000/psql_session/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-endpoint")
|
||||
.short('a')
|
||||
.long("auth-endpoint")
|
||||
.help("cloud API endpoint for authenticating users")
|
||||
.default_value("http://localhost:3000/authenticate_proxy_request/"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tls-key")
|
||||
.short('k')
|
||||
.long("tls-key")
|
||||
.alias("ssl-key") // backwards compatibility
|
||||
.help("path to TLS key for client postgres connections"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tls-cert")
|
||||
.short('c')
|
||||
.long("tls-cert")
|
||||
.alias("ssl-cert") // backwards compatibility
|
||||
.help("path to TLS cert for client postgres connections"),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ requests = "^2.26.0"
|
||||
pytest-xdist = "^2.3.0"
|
||||
asyncpg = "^0.24.0"
|
||||
aiopg = "^1.3.1"
|
||||
cached-property = "^1.5.2"
|
||||
Jinja2 = "^3.0.2"
|
||||
types-requests = "^2.28.5"
|
||||
types-psycopg2 = "^2.9.18"
|
||||
@@ -28,14 +29,12 @@ Werkzeug = "2.1.2"
|
||||
pytest-order = "^1.0.1"
|
||||
allure-pytest = "^2.10.0"
|
||||
pytest-asyncio = "^0.19.0"
|
||||
toml = "^0.10.2"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
flake8 = "^5.0.4"
|
||||
mypy = "==0.971"
|
||||
black = "^22.6.0"
|
||||
isort = "^5.10.1"
|
||||
types-toml = "^0.10.8"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
@@ -75,6 +74,7 @@ strict = true
|
||||
[[tool.mypy.overrides]]
|
||||
module = [
|
||||
"asyncpg.*",
|
||||
"cached_property.*",
|
||||
"pg8000.*",
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
@@ -11,7 +11,7 @@ hyper = "0.14"
|
||||
fs2 = "0.4.3"
|
||||
serde_json = "1"
|
||||
tracing = "0.1.27"
|
||||
clap = "4.0"
|
||||
clap = "3.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["macros", "fs"] }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// Main entry point for the safekeeper executable
|
||||
//
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, Command};
|
||||
use clap::{App, Arg};
|
||||
use const_format::formatcp;
|
||||
use daemonize::Daemonize;
|
||||
use fs2::FileExt;
|
||||
@@ -21,7 +21,7 @@ use metrics::set_build_info_metric;
|
||||
use safekeeper::broker;
|
||||
use safekeeper::control_file;
|
||||
use safekeeper::defaults::{
|
||||
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES,
|
||||
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG,
|
||||
DEFAULT_PG_LISTEN_ADDR, DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
|
||||
};
|
||||
use safekeeper::http;
|
||||
@@ -32,12 +32,8 @@ use safekeeper::GlobalTimelines;
|
||||
use safekeeper::SafeKeeperConf;
|
||||
use utils::auth::JwtAuth;
|
||||
use utils::{
|
||||
http::endpoint,
|
||||
id::NodeId,
|
||||
logging::{self, LogFormat},
|
||||
project_git_version,
|
||||
shutdown::exit_now,
|
||||
signals, tcp_listener,
|
||||
http::endpoint, id::NodeId, logging, project_git_version, shutdown::exit_now, signals,
|
||||
tcp_listener,
|
||||
};
|
||||
|
||||
const LOCK_FILE_NAME: &str = "safekeeper.lock";
|
||||
@@ -45,40 +41,147 @@ const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = cli().get_matches();
|
||||
let arg_matches = App::new("Neon safekeeper")
|
||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.short('D')
|
||||
.long("dir")
|
||||
.takes_value(true)
|
||||
.help("Path to the safekeeper data directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize safekeeper with ID"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("listen-pg")
|
||||
.short('l')
|
||||
.long("listen-pg")
|
||||
.alias("listen") // for compatibility
|
||||
.takes_value(true)
|
||||
.help(formatcp!("listen for incoming WAL data connections on ip:port (default: {DEFAULT_PG_LISTEN_ADDR})")),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("listen-http")
|
||||
.long("listen-http")
|
||||
.takes_value(true)
|
||||
.help(formatcp!("http endpoint address for metrics on ip:port (default: {DEFAULT_HTTP_LISTEN_ADDR})")),
|
||||
)
|
||||
// FIXME this argument is no longer needed since pageserver address is forwarded from compute.
|
||||
// However because this argument is in use by console's e2e tests let's keep it for now and remove separately.
|
||||
// So currently it is a noop.
|
||||
.arg(
|
||||
Arg::new("pageserver")
|
||||
.short('p')
|
||||
.long("pageserver")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("no-sync")
|
||||
.short('n')
|
||||
.long("no-sync")
|
||||
.takes_value(false)
|
||||
.help("Do not wait for changes to be written safely to disk"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("dump-control-file")
|
||||
.long("dump-control-file")
|
||||
.takes_value(true)
|
||||
.help("Dump control file at path specified by this argument and exit"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("id").long("id").takes_value(true).help("safekeeper node id: integer")
|
||||
).arg(
|
||||
Arg::new("broker-endpoints")
|
||||
.long("broker-endpoints")
|
||||
.takes_value(true)
|
||||
.help("a comma separated broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("broker-etcd-prefix")
|
||||
.long("broker-etcd-prefix")
|
||||
.takes_value(true)
|
||||
.help("a prefix to always use when polling/pusing data in etcd from this safekeeper"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("heartbeat-timeout")
|
||||
.long("heartbeat-timeout")
|
||||
.takes_value(true)
|
||||
.help(formatcp!("Peer is considered dead after not receiving heartbeats from it during this period (default {}s), passed as a human readable duration.", DEFAULT_HEARTBEAT_TIMEOUT.as_secs()))
|
||||
)
|
||||
.arg(
|
||||
Arg::new("wal-backup-threads").long("backup-threads").takes_value(true).help(formatcp!("number of threads for wal backup (default {DEFAULT_WAL_BACKUP_RUNTIME_THREADS}")),
|
||||
).arg(
|
||||
Arg::new("remote-storage")
|
||||
.long("remote-storage")
|
||||
.takes_value(true)
|
||||
.help("Remote storage configuration for WAL backup (offloading to s3) as TOML inline table, e.g. {\"max_concurrent_syncs\" = 17, \"max_sync_errors\": 13, \"bucket_name\": \"<BUCKETNAME>\", \"bucket_region\":\"<REGION>\", \"concurrency_limit\": 119}.\nSafekeeper offloads WAL to [prefix_in_bucket/]<tenant_id>/<timeline_id>/<segment_file>, mirroring structure on the file system.")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("max-offloader-lag")
|
||||
.long("max-offloader-lag")
|
||||
.takes_value(true)
|
||||
.help(formatcp!("Safekeeper won't be elected for WAL offloading if it is lagging for more than this value (default {}MB) in bytes", DEFAULT_MAX_OFFLOADER_LAG / (1 << 20)))
|
||||
)
|
||||
.arg(
|
||||
Arg::new("enable-wal-backup")
|
||||
.long("enable-wal-backup")
|
||||
.takes_value(true)
|
||||
.default_value("true")
|
||||
.default_missing_value("true")
|
||||
.help("Enable/disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring WAL backup horizon."),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-validation-public-key-path")
|
||||
.long("auth-validation-public-key-path")
|
||||
.takes_value(true)
|
||||
.help("Path to an RSA .pem public key which is used to check JWT tokens")
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if let Some(addr) = arg_matches.get_one::<String>("dump-control-file") {
|
||||
if let Some(addr) = arg_matches.value_of("dump-control-file") {
|
||||
let state = control_file::FileStorage::load_control_file(Path::new(addr))?;
|
||||
let json = serde_json::to_string(&state)?;
|
||||
print!("{json}");
|
||||
print!("{}", json);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut conf = SafeKeeperConf::default();
|
||||
|
||||
if let Some(dir) = arg_matches.get_one::<PathBuf>("datadir") {
|
||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||
// change into the data directory.
|
||||
std::env::set_current_dir(dir)?;
|
||||
std::env::set_current_dir(PathBuf::from(dir))?;
|
||||
}
|
||||
|
||||
if arg_matches.get_flag("no-sync") {
|
||||
if arg_matches.is_present("no-sync") {
|
||||
conf.no_sync = true;
|
||||
}
|
||||
|
||||
if arg_matches.get_flag("daemonize") {
|
||||
if arg_matches.is_present("daemonize") {
|
||||
conf.daemonize = true;
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.get_one::<String>("listen-pg") {
|
||||
conf.listen_pg_addr = addr.to_string();
|
||||
if let Some(addr) = arg_matches.value_of("listen-pg") {
|
||||
conf.listen_pg_addr = addr.to_owned();
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.get_one::<String>("listen-http") {
|
||||
conf.listen_http_addr = addr.to_string();
|
||||
if let Some(addr) = arg_matches.value_of("listen-http") {
|
||||
conf.listen_http_addr = addr.to_owned();
|
||||
}
|
||||
|
||||
let mut given_id = None;
|
||||
if let Some(given_id_str) = arg_matches.get_one::<String>("id") {
|
||||
if let Some(given_id_str) = arg_matches.value_of("id") {
|
||||
given_id = Some(NodeId(
|
||||
given_id_str
|
||||
.parse()
|
||||
@@ -86,15 +189,15 @@ fn main() -> anyhow::Result<()> {
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.get_one::<String>("broker-endpoints") {
|
||||
if let Some(addr) = arg_matches.value_of("broker-endpoints") {
|
||||
let collected_ep: Result<Vec<Url>, ParseError> = addr.split(',').map(Url::parse).collect();
|
||||
conf.broker_endpoints = collected_ep.context("Failed to parse broker endpoint urls")?;
|
||||
}
|
||||
if let Some(prefix) = arg_matches.get_one::<String>("broker-etcd-prefix") {
|
||||
if let Some(prefix) = arg_matches.value_of("broker-etcd-prefix") {
|
||||
conf.broker_etcd_prefix = prefix.to_string();
|
||||
}
|
||||
|
||||
if let Some(heartbeat_timeout_str) = arg_matches.get_one::<String>("heartbeat-timeout") {
|
||||
if let Some(heartbeat_timeout_str) = arg_matches.value_of("heartbeat-timeout") {
|
||||
conf.heartbeat_timeout =
|
||||
humantime::parse_duration(heartbeat_timeout_str).with_context(|| {
|
||||
format!(
|
||||
@@ -104,20 +207,20 @@ fn main() -> anyhow::Result<()> {
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(backup_threads) = arg_matches.get_one::<String>("wal-backup-threads") {
|
||||
if let Some(backup_threads) = arg_matches.value_of("wal-backup-threads") {
|
||||
conf.backup_runtime_threads = backup_threads
|
||||
.parse()
|
||||
.with_context(|| format!("Failed to parse backup threads {}", backup_threads))?;
|
||||
}
|
||||
if let Some(storage_conf) = arg_matches.get_one::<String>("remote-storage") {
|
||||
if let Some(storage_conf) = arg_matches.value_of("remote-storage") {
|
||||
// funny toml doesn't consider plain inline table as valid document, so wrap in a key to parse
|
||||
let storage_conf_toml = format!("remote_storage = {}", storage_conf);
|
||||
let parsed_toml = storage_conf_toml.parse::<Document>()?; // parse
|
||||
let (_, storage_conf_parsed_toml) = parsed_toml.iter().next().unwrap(); // and strip key off again
|
||||
conf.remote_storage = Some(RemoteStorageConfig::from_toml(storage_conf_parsed_toml)?);
|
||||
}
|
||||
if let Some(max_offloader_lag_str) = arg_matches.get_one::<String>("max-offloader-lag") {
|
||||
conf.max_offloader_lag_bytes = max_offloader_lag_str.parse().with_context(|| {
|
||||
if let Some(max_offloader_lag_str) = arg_matches.value_of("max-offloader-lag") {
|
||||
conf.max_offloader_lag = max_offloader_lag_str.parse().with_context(|| {
|
||||
format!(
|
||||
"failed to parse max offloader lag {}",
|
||||
max_offloader_lag_str
|
||||
@@ -126,24 +229,20 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
// Seems like there is no better way to accept bool values explicitly in clap.
|
||||
conf.wal_backup_enabled = arg_matches
|
||||
.get_one::<String>("enable-wal-backup")
|
||||
.value_of("enable-wal-backup")
|
||||
.unwrap()
|
||||
.parse()
|
||||
.context("failed to parse bool enable-s3-offload bool")?;
|
||||
|
||||
conf.auth_validation_public_key_path = arg_matches
|
||||
.get_one::<String>("auth-validation-public-key-path")
|
||||
.value_of("auth-validation-public-key-path")
|
||||
.map(PathBuf::from);
|
||||
|
||||
if let Some(log_format) = arg_matches.get_one::<String>("log-format") {
|
||||
conf.log_format = LogFormat::from_config(log_format)?;
|
||||
}
|
||||
|
||||
start_safekeeper(conf, given_id, arg_matches.get_flag("init"))
|
||||
start_safekeeper(conf, given_id, arg_matches.is_present("init"))
|
||||
}
|
||||
|
||||
fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bool) -> Result<()> {
|
||||
let log_file = logging::init("safekeeper.log", conf.daemonize, conf.log_format)?;
|
||||
let log_file = logging::init("safekeeper.log", conf.daemonize)?;
|
||||
|
||||
info!("version: {GIT_VERSION}");
|
||||
|
||||
@@ -346,112 +445,3 @@ fn set_id(conf: &mut SafeKeeperConf, given_id: Option<NodeId>) -> Result<()> {
|
||||
conf.my_id = my_id;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
Command::new("Neon safekeeper")
|
||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.short('D')
|
||||
.long("dir")
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.help("Path to the safekeeper data directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Initialize safekeeper with ID"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("listen-pg")
|
||||
.short('l')
|
||||
.long("listen-pg")
|
||||
.alias("listen") // for compatibility
|
||||
.help(formatcp!("listen for incoming WAL data connections on ip:port (default: {DEFAULT_PG_LISTEN_ADDR})")),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("listen-http")
|
||||
.long("listen-http")
|
||||
.help(formatcp!("http endpoint address for metrics on ip:port (default: {DEFAULT_HTTP_LISTEN_ADDR})")),
|
||||
)
|
||||
// FIXME this argument is no longer needed since pageserver address is forwarded from compute.
|
||||
// However because this argument is in use by console's e2e tests let's keep it for now and remove separately.
|
||||
// So currently it is a noop.
|
||||
.arg(
|
||||
Arg::new("pageserver")
|
||||
.short('p')
|
||||
.long("pageserver"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("no-sync")
|
||||
.short('n')
|
||||
.long("no-sync")
|
||||
.action(ArgAction::SetTrue)
|
||||
.help("Do not wait for changes to be written safely to disk"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("dump-control-file")
|
||||
.long("dump-control-file")
|
||||
.help("Dump control file at path specified by this argument and exit"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("id").long("id").help("safekeeper node id: integer")
|
||||
).arg(
|
||||
Arg::new("broker-endpoints")
|
||||
.long("broker-endpoints")
|
||||
.help("a comma separated broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("broker-etcd-prefix")
|
||||
.long("broker-etcd-prefix")
|
||||
.help("a prefix to always use when polling/pusing data in etcd from this safekeeper"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("heartbeat-timeout")
|
||||
.long("heartbeat-timeout")
|
||||
.help(formatcp!("Peer is considered dead after not receiving heartbeats from it during this period (default {}s), passed as a human readable duration.", DEFAULT_HEARTBEAT_TIMEOUT.as_secs()))
|
||||
)
|
||||
.arg(
|
||||
Arg::new("wal-backup-threads").long("backup-threads").help(formatcp!("number of threads for wal backup (default {DEFAULT_WAL_BACKUP_RUNTIME_THREADS}")),
|
||||
).arg(
|
||||
Arg::new("remote-storage")
|
||||
.long("remote-storage")
|
||||
.help("Remote storage configuration for WAL backup (offloading to s3) as TOML inline table, e.g. {\"max_concurrent_syncs\" = 17, \"max_sync_errors\": 13, \"bucket_name\": \"<BUCKETNAME>\", \"bucket_region\":\"<REGION>\", \"concurrency_limit\": 119}.\nSafekeeper offloads WAL to [prefix_in_bucket/]<tenant_id>/<timeline_id>/<segment_file>, mirroring structure on the file system.")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("max-offloader-lag")
|
||||
.long("max-offloader-lag")
|
||||
.help(formatcp!("Safekeeper won't be elected for WAL offloading if it is lagging for more than this value (default {}MB) in bytes", DEFAULT_MAX_OFFLOADER_LAG_BYTES / (1 << 20)))
|
||||
)
|
||||
.arg(
|
||||
Arg::new("enable-wal-backup")
|
||||
.long("enable-wal-backup")
|
||||
.default_value("true")
|
||||
.default_missing_value("true")
|
||||
.help("Enable/disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring WAL backup horizon."),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("auth-validation-public-key-path")
|
||||
.long("auth-validation-public-key-path")
|
||||
.help("Path to an RSA .pem public key which is used to check JWT tokens")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("log-format")
|
||||
.long("log-format")
|
||||
.help("Format for logging, either 'plain' or 'json'")
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_cli() {
|
||||
cli().debug_assert();
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
||||
let handles = active_tlis
|
||||
.iter()
|
||||
.map(|tli| {
|
||||
let sk_info = tli.get_safekeeper_info(&conf);
|
||||
let sk_info = tli.get_safekeer_info(&conf);
|
||||
let key =
|
||||
timeline_safekeeper_path(conf.broker_etcd_prefix.clone(), tli.ttid, conf.my_id);
|
||||
let lease = leases.remove(&tli.ttid).unwrap();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use defaults::{
|
||||
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
|
||||
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_MAX_OFFLOADER_LAG, DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
|
||||
};
|
||||
//
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
@@ -7,10 +7,7 @@ use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use url::Url;
|
||||
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TenantTimelineId},
|
||||
logging::LogFormat,
|
||||
};
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId};
|
||||
|
||||
pub mod broker;
|
||||
pub mod control_file;
|
||||
@@ -39,9 +36,10 @@ pub mod defaults {
|
||||
DEFAULT_PG_LISTEN_PORT,
|
||||
};
|
||||
|
||||
pub const DEFAULT_RECALL_PERIOD: Duration = Duration::from_secs(10);
|
||||
pub const DEFAULT_WAL_BACKUP_RUNTIME_THREADS: usize = 8;
|
||||
pub const DEFAULT_HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
pub const DEFAULT_MAX_OFFLOADER_LAG_BYTES: u64 = 128 * (1 << 20);
|
||||
pub const DEFAULT_MAX_OFFLOADER_LAG: u64 = 128 * (1 << 20);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -58,6 +56,7 @@ pub struct SafeKeeperConf {
|
||||
pub no_sync: bool,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub recall_period: Duration,
|
||||
pub remote_storage: Option<RemoteStorageConfig>,
|
||||
pub backup_runtime_threads: usize,
|
||||
pub wal_backup_enabled: bool,
|
||||
@@ -66,8 +65,7 @@ pub struct SafeKeeperConf {
|
||||
pub broker_etcd_prefix: String,
|
||||
pub auth_validation_public_key_path: Option<PathBuf>,
|
||||
pub heartbeat_timeout: Duration,
|
||||
pub max_offloader_lag_bytes: u64,
|
||||
pub log_format: LogFormat,
|
||||
pub max_offloader_lag: u64,
|
||||
}
|
||||
|
||||
impl SafeKeeperConf {
|
||||
@@ -93,6 +91,7 @@ impl Default for SafeKeeperConf {
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
remote_storage: None,
|
||||
recall_period: defaults::DEFAULT_RECALL_PERIOD,
|
||||
my_id: NodeId(0),
|
||||
broker_endpoints: Vec::new(),
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
@@ -100,8 +99,7 @@ impl Default for SafeKeeperConf {
|
||||
wal_backup_enabled: true,
|
||||
auth_validation_public_key_path: None,
|
||||
heartbeat_timeout: DEFAULT_HEARTBEAT_TIMEOUT,
|
||||
max_offloader_lag_bytes: DEFAULT_MAX_OFFLOADER_LAG_BYTES,
|
||||
log_format: LogFormat::Plain,
|
||||
max_offloader_lag: DEFAULT_MAX_OFFLOADER_LAG,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user