Compare commits

..

1 Commits

Author SHA1 Message Date
Konstantin Knizhnik
121c19fcd6 Replace R-Tree with B-Tree in layer map 2022-10-07 13:11:24 +03:00
176 changed files with 4055 additions and 14184 deletions

View File

@@ -10,7 +10,7 @@
<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
### Checklist after release
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/120/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)

View File

@@ -47,7 +47,7 @@ runs:
else
key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
fi
echo "KEY=${key}" >> $GITHUB_OUTPUT
echo "::set-output name=KEY::${key}"
- uses: actions/setup-java@v3
if: ${{ inputs.action == 'generate' }}
@@ -186,7 +186,7 @@ runs:
aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
echo "::set-output name=report-url::${REPORT_URL}"
- name: Release Allure lock
if: ${{ inputs.action == 'generate' && always() }}

View File

@@ -34,7 +34,7 @@ runs:
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
if [ -z "${S3_KEY}" ]; then
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
echo 'SKIPPED=true' >> $GITHUB_OUTPUT
echo '::set-output name=SKIPPED::true'
exit 0
else
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
@@ -42,7 +42,7 @@ runs:
fi
fi
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
echo '::set-output name=SKIPPED::false'
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}

View File

@@ -41,8 +41,8 @@ runs:
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
echo "::set-output name=api_host::${API_HOST}"
echo "::set-output name=region_id::${REGION_ID}"
env:
ENVIRONMENT: ${{ inputs.environment }}
REGION_ID: ${{ inputs.region_id }}
@@ -72,10 +72,10 @@ runs:
dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
echo "::add-mask::${dsn}"
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
echo "::set-output name=dsn::${dsn}"
project_id=$(echo $project | jq --raw-output '.id')
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
echo "::set-output name=project_id::${project_id}"
env:
API_KEY: ${{ inputs.api_key }}
API_HOST: ${{ steps.parse-input.outputs.api_host }}

View File

@@ -32,7 +32,7 @@ runs:
;;
esac
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
echo "::set-output name=api_host::${API_HOST}"
env:
ENVIRONMENT: ${{ inputs.environment }}

View File

@@ -73,13 +73,6 @@ runs:
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Download compatibility snapshot for Postgres 14
uses: ./.github/actions/download
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
path: /tmp/compatibility_snapshot_pg14
prefix: latest
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
@@ -87,8 +80,6 @@ runs:
BUILD_TYPE: ${{ inputs.build_type }}
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
ALLOW_BREAKING_CHANGES: contains(github.event.pull_request.labels.*.name, 'breaking changes')
shell: bash -euxo pipefail {0}
run: |
# PLATFORM will be embedded in the perf test report
@@ -163,15 +154,6 @@ runs:
scripts/generate_and_push_perf_report.sh
fi
- name: Upload compatibility snapshot for Postgres 14
if: github.ref_name == 'release'
uses: ./.github/actions/upload
with:
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
# The path includes a test name (test_prepare_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
path: /tmp/test_output/test_prepare_snapshot/compatibility_snapshot_pg14/
prefix: latest
- name: Create Allure report
if: always()
uses: ./.github/actions/allure-report

View File

@@ -2,6 +2,3 @@ zenith_install.tar.gz
.zenith_current_version
neon_install.tar.gz
.neon_current_version
collections/*
!collections/.keep

View File

@@ -3,7 +3,6 @@
localhost_warning = False
host_key_checking = False
timeout = 30
collections_paths = ./collections
[ssh_connection]
ssh_args = -F ./ansible.ssh.cfg

View File

View File

@@ -1,7 +1,7 @@
- name: Upload Neon binaries
hosts: storage
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:
@@ -14,8 +14,7 @@
- safekeeper
- name: inform about versions
debug:
msg: "Version to deploy - {{ current_version }}"
debug: msg="Version to deploy - {{ current_version }}"
tags:
- pageserver
- safekeeper
@@ -36,7 +35,7 @@
- name: Deploy pageserver
hosts: pageservers
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:
@@ -64,29 +63,15 @@
tags:
- pageserver
- name: read the existing remote pageserver config
ansible.builtin.slurp:
src: /storage/pageserver/data/pageserver.toml
register: _remote_ps_config
tags:
- pageserver
- name: parse the existing pageserver configuration
ansible.builtin.set_fact:
_existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
tags:
- pageserver
- name: construct the final pageserver configuration dict
ansible.builtin.set_fact:
pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
tags:
- pageserver
- name: template the pageserver config
template:
src: templates/pageserver.toml.j2
dest: /storage/pageserver/data/pageserver.toml
- name: update remote storage (s3) config
lineinfile:
path: /storage/pageserver/data/pageserver.toml
line: "{{ item }}"
loop:
- "[remote_storage]"
- "bucket_name = '{{ bucket_name }}'"
- "bucket_region = '{{ bucket_region }}'"
- "prefix_in_bucket = '{{ inventory_hostname }}'"
become: true
tags:
- pageserver
@@ -124,7 +109,7 @@
- name: Deploy safekeeper
hosts: safekeepers
gather_facts: False
remote_user: "{{ remote_user }}"
remote_user: admin
tasks:

View File

@@ -23,7 +23,6 @@ docker cp ${ID}:/data/postgres_install.tar.gz .
tar -xzf postgres_install.tar.gz -C neon_install
mkdir neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/

20
.github/ansible/neon-stress.hosts vendored Normal file
View File

@@ -0,0 +1,20 @@
[pageservers]
neon-stress-ps-1 console_region_id=1
neon-stress-ps-2 console_region_id=1
[safekeepers]
neon-stress-sk-1 console_region_id=1
neon-stress-sk-2 console_region_id=1
neon-stress-sk-3 console_region_id=1
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = neon-stress
console_mgmt_base_url = http://neon-stress-console.local
bucket_name = neon-storage-ireland
bucket_region = eu-west-1
etcd_endpoints = etcd-stress.local:2379
safekeeper_enable_s3_offload = false

View File

@@ -1,31 +0,0 @@
storage:
vars:
bucket_name: neon-storage-ireland
bucket_region: eu-west-1
console_mgmt_base_url: http://neon-stress-console.local
etcd_endpoints: neon-stress-etcd.local:2379
safekeeper_enable_s3_offload: 'false'
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: neon-stress/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
neon-stress-ps-1:
console_region_id: aws-eu-west-1
neon-stress-ps-2:
console_region_id: aws-eu-west-1
safekeepers:
hosts:
neon-stress-sk-1:
console_region_id: aws-eu-west-1
neon-stress-sk-2:
console_region_id: aws-eu-west-1
neon-stress-sk-3:
console_region_id: aws-eu-west-1

20
.github/ansible/production.hosts vendored Normal file
View File

@@ -0,0 +1,20 @@
[pageservers]
#zenith-1-ps-1 console_region_id=1
zenith-1-ps-2 console_region_id=1
zenith-1-ps-3 console_region_id=1
[safekeepers]
zenith-1-sk-1 console_region_id=1
zenith-1-sk-2 console_region_id=1
zenith-1-sk-3 console_region_id=1
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = prod-1
console_mgmt_base_url = http://console-release.local
bucket_name = zenith-storage-oregon
bucket_region = us-west-2
etcd_endpoints = zenith-1-etcd.local:2379

View File

@@ -1,33 +0,0 @@
---
storage:
vars:
console_mgmt_base_url: http://console-release.local
bucket_name: zenith-storage-oregon
bucket_region: us-west-2
etcd_endpoints: zenith-1-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: prod-1/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-1-ps-2:
console_region_id: aws-us-west-2
zenith-1-ps-3:
console_region_id: aws-us-west-2
safekeepers:
hosts:
zenith-1-sk-1:
console_region_id: aws-us-west-2
zenith-1-sk-2:
console_region_id: aws-us-west-2
zenith-1-sk-3:
console_region_id: aws-us-west-2

View File

@@ -12,19 +12,18 @@ cat <<EOF | tee /tmp/payload
"version": 1,
"host": "${HOST}",
"port": 6400,
"region_id": "{{ console_region_id }}",
"region_id": {{ console_region_id }},
"instance_id": "${INSTANCE_ID}",
"http_host": "${HOST}",
"http_port": 9898,
"active": false
"http_port": 9898
}
EOF
# check if pageserver already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/pageservers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/pageservers -d@/tmp/payload | jq -r '.ID')
# init pageserver
sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data

View File

@@ -14,18 +14,18 @@ cat <<EOF | tee /tmp/payload
"host": "${HOST}",
"port": 6500,
"http_port": 7676,
"region_id": "{{ console_region_id }}",
"region_id": {{ console_region_id }},
"instance_id": "${INSTANCE_ID}",
"availability_zone_id": "${AZ_ID}",
"active": false
"availability_zone_id": "${AZ_ID}"
}
EOF
# check if safekeeper already registered or not
if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
# not registered, so register it now
ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
# init safekeeper
sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
fi

View File

@@ -1,3 +0,0 @@
ansible_connection: aws_ssm
ansible_aws_ssm_bucket_name: neon-dev-bucket
ansible_python_interpreter: /usr/bin/python3

25
.github/ansible/staging.hosts vendored Normal file
View File

@@ -0,0 +1,25 @@
[pageservers]
#zenith-us-stage-ps-1 console_region_id=27
zenith-us-stage-ps-2 console_region_id=27
zenith-us-stage-ps-3 console_region_id=27
zenith-us-stage-ps-4 console_region_id=27
zenith-us-stage-test-ps-1 console_region_id=28
[safekeepers]
zenith-us-stage-sk-4 console_region_id=27
zenith-us-stage-sk-5 console_region_id=27
zenith-us-stage-sk-6 console_region_id=27
zenith-us-stage-test-sk-1 console_region_id=28
zenith-us-stage-test-sk-2 console_region_id=28
zenith-us-stage-test-sk-3 console_region_id=28
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = us-stage
console_mgmt_base_url = http://console-staging.local
bucket_name = zenith-staging-storage-us-east-1
bucket_region = us-east-1
etcd_endpoints = zenith-us-stage-etcd.local:2379

View File

@@ -1,34 +0,0 @@
storage:
vars:
bucket_name: zenith-staging-storage-us-east-1
bucket_region: us-east-1
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: zenith-us-stage-etcd.local:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "{{ inventory_hostname }}"
safekeeper_s3_prefix: us-stage/wal
hostname_suffix: ".local"
remote_user: admin
children:
pageservers:
hosts:
zenith-us-stage-ps-2:
console_region_id: aws-us-east-1
zenith-us-stage-ps-3:
console_region_id: aws-us-east-1
zenith-us-stage-ps-4:
console_region_id: aws-us-east-1
safekeepers:
hosts:
zenith-us-stage-sk-4:
console_region_id: aws-us-east-1
zenith-us-stage-sk-5:
console_region_id: aws-us-east-1
zenith-us-stage-sk-6:
console_region_id: aws-us-east-1

View File

@@ -1,32 +0,0 @@
storage:
vars:
bucket_name: neon-staging-storage-us-east-2
bucket_region: us-east-2
console_mgmt_base_url: http://console-staging.local
etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
pageserver_config_stub:
pg_distrib_dir: /usr/local
remote_storage:
bucket_name: "{{ bucket_name }}"
bucket_region: "{{ bucket_region }}"
prefix_in_bucket: "pageserver/v1"
safekeeper_s3_prefix: safekeeper/v1/wal
hostname_suffix: ""
remote_user: ssm-user
ansible_aws_ssm_region: us-east-2
console_region_id: aws-us-east-2
children:
pageservers:
hosts:
pageserver-0.us-east-2.aws.neon.build:
ansible_host: i-0c3e70929edb5d691
safekeepers:
hosts:
safekeeper-0.us-east-2.aws.neon.build:
ansible_host: i-027662bd552bf5db0
safekeeper-1.us-east-2.aws.neon.build:
ansible_host: i-0171efc3604a7b907
safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce

View File

@@ -1,5 +1,5 @@
[Unit]
Description=Neon pageserver
Description=Zenith pageserver
After=network.target auditd.service
[Service]

View File

@@ -1,12 +1,12 @@
[Unit]
Description=Neon safekeeper
Description=Zenith safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -1 +0,0 @@
{{ pageserver_config | sivel.toiletwater.to_toml }}

View File

@@ -1,31 +0,0 @@
# Helm chart values for neon-proxy-scram.
# This is a YAML-formatted file.
image:
repository: neondatabase/neon
settings:
authBackend: "console"
authEndpoint: "http://console-staging.local/management/api/v2"
domain: "*.us-east-2.aws.neon.build"
# -- Additional labels for neon-proxy pods
podLabels:
zenith_service: proxy-scram
zenith_env: dev
zenith_region: us-east-2
zenith_region_slug: us-east-2
exposedService:
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
#metrics:
# enabled: true
# serviceMonitor:
# enabled: true
# selector:
# release: kube-prometheus-stack

View File

@@ -46,7 +46,7 @@ jobs:
runs-on: [self-hosted, zenith-benchmarker]
env:
POSTGRES_DISTRIB_DIR: /usr/pgsql
POSTGRES_DISTRIB_DIR: /tmp/pg_install
DEFAULT_PG_VERSION: 14
steps:
@@ -138,31 +138,22 @@ jobs:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
pgbench-compare:
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: "10gb"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
strategy:
fail-fast: false
matrix:
# neon-captest-new: Run pgbench in a freshly created project
# neon-captest-reuse: Same, but reusing existing project
# neon-captest-prefetch: Same, with prefetching enabled (new project)
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
db_size: [ 10gb ]
include:
- platform: neon-captest-new
db_size: 50gb
- platform: neon-captest-prefetch
db_size: 50gb
- platform: rds-aurora
db_size: 50gb
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
PLATFORM: ${{ matrix.platform }}
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-aurora ]
runs-on: dev
container:
@@ -187,7 +178,7 @@ jobs:
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project
if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
if: matrix.platform != 'neon-captest-reuse'
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
@@ -213,9 +204,11 @@ jobs:
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
echo "::set-output name=connstr::${CONNSTR}"
psql ${CONNSTR} -c "SELECT version();"
env:
PLATFORM: ${{ matrix.platform }}
- name: Set database options
if: matrix.platform == 'neon-captest-prefetch'
@@ -234,6 +227,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
env:
PLATFORM: ${{ matrix.platform }}
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -247,6 +241,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
env:
PLATFORM: ${{ matrix.platform }}
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -260,6 +255,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
env:
PLATFORM: ${{ matrix.platform }}
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -272,7 +268,7 @@ jobs:
build_type: ${{ env.BUILD_TYPE }}
- name: Delete Neon Project
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
if: ${{ matrix.platform != 'neon-captest-reuse' && always() }}
uses: ./.github/actions/neon-project-delete
with:
environment: dev

View File

@@ -35,12 +35,12 @@ jobs:
echo ref:$GITHUB_REF_NAME
echo rev:$(git rev-list --count HEAD)
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
echo "::set-output name=tag::$GITHUB_RUN_ID"
fi
shell: bash
id: build-tag
@@ -78,12 +78,12 @@ jobs:
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
shell: bash -euxo pipefail {0}
# Set some environment variables used by all the steps.
@@ -481,7 +481,6 @@ jobs:
neon-image:
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
steps:
@@ -495,11 +494,10 @@ jobs:
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build neon
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
compute-tools-image:
runs-on: dev
needs: [ tag ]
container: gcr.io/kaniko-project/executor:v1.9.0-debug
steps:
@@ -510,12 +508,11 @@ jobs:
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute tools
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
compute-node-image:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -530,12 +527,11 @@ jobs:
# cloud repo depends on this image name, thus duplicating it
# remove compute-node when cloud repo is updated
- name: Kaniko build compute node with extensions v14 (compatibility)
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
compute-node-image-v14:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -547,13 +543,12 @@ jobs:
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute node with extensions v14
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID
compute-node-image-v15:
runs-on: dev
container: gcr.io/kaniko-project/executor:v1.9.0-debug
needs: [ tag ]
steps:
- name: Checkout
uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -565,11 +560,11 @@ jobs:
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
- name: Kaniko build compute node with extensions v15
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}
run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID
promote-images:
runs-on: dev
needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
if: github.event_name != 'workflow_dispatch'
container: amazon/aws-cli
strategy:
@@ -582,9 +577,8 @@ jobs:
steps:
- name: Promote image to latest
run: |
export MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=${{needs.tag.outputs.build-tag}} --query 'images[].imageManifest' --output text)
aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
run:
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
push-docker-hub:
runs-on: dev
@@ -603,19 +597,19 @@ jobs:
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
- name: Pull neon image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
- name: Pull compute tools image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
- name: Pull compute node image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
- name: Pull compute node v14 image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14
- name: Pull compute node v15 image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest compute-node-v15
- name: Pull rust image from ECR
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
@@ -625,11 +619,9 @@ jobs:
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
run: |
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
- name: Configure Docker Hub login
run: |
@@ -677,12 +669,12 @@ jobs:
- id: set-matrix
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
echo "::set-output name=include::[$PRODUCTION]"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
@@ -718,7 +710,7 @@ jobs:
- name: Setup ansible
run: |
export PATH="/root/.local/bin:$PATH"
pip install --progress-bar off --user ansible boto3 toml
pip install --progress-bar off --user ansible boto3
- name: Redeploy
run: |
@@ -740,48 +732,8 @@ jobs:
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
rm -f neon_install.tar.gz .neon_current_version
deploy-new:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Redeploy
run: |
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
RELEASE=true ./get_binaries.sh
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
ansible-galaxy collection install sivel.toiletwater
ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
@@ -825,52 +777,3 @@ jobs:
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
deploy-proxy-new:
runs-on: dev
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
if: |
(github.ref_name == 'main') &&
github.event_name != 'workflow_dispatch'
defaults:
run:
shell: bash
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Configure environment
run: |
helm repo add neondatabase https://neondatabase.github.io/helm-charts
aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
promote-compatibility-test-snapshot:
runs-on: dev
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
needs: [ deploy, deploy-proxy ]
if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
steps:
- name: Promote compatibility snapshot for the release
shell: bash -euxo pipefail {0}
env:
BUCKET: neon-github-public-dev
PREFIX: artifacts/latest
run: |
for build_type in debug release; do
OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
done

View File

@@ -36,7 +36,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
@@ -56,12 +56,12 @@ jobs:
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
shell: bash -euxo pipefail {0}
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
shell: bash -euxo pipefail {0}
- name: Cache postgres v14 build

895
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,3 @@
# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
# build work with older cargo versions.
#
# We have this because as of this writing, the latest cargo Debian package
# that's available is 1.56. (Confusingly, the Debian package version number
# is 0.57, whereas 'cargo --version' says 1.56.)
#
# See https://tracker.debian.org/pkg/cargo for the current status of the
# package. When that gets updated, we can remove this.
cargo-features = ["named-profiles"]
[workspace]
members = [
"compute_tools",

View File

@@ -44,7 +44,7 @@ COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin proxy --locked --release \
&& mold -run cargo build --bin pageserver --bin safekeeper --bin proxy --locked --release \
&& cachepot -s
# Build final image
@@ -63,11 +63,9 @@ RUN set -e \
&& useradd -d /data neon \
&& chown -R neon:neon /data
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
@@ -87,3 +85,4 @@ VOLUME ["/data"]
USER neon
EXPOSE 6400
EXPOSE 9898
CMD ["/bin/bash"]

View File

@@ -1,26 +1,24 @@
#
# This file is identical to the Dockerfile.compute-node-v15 file
# except for the version of Postgres that is built.
#
ARG TAG=pinned
# apparently, ARGs don't get replaced in RUN commands in kaniko
# ARG POSTGIS_VERSION=3.3.0
# ARG PLV8_VERSION=3.1.4
# ARG PG_VERSION=v14
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
apt update
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v14 postgres
RUN cd postgres && \
@@ -31,20 +29,22 @@ RUN cd postgres && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
# PostGIS compiles against neon postgres sources without changes. Perhaps we
# could even use the upstream binaries, compiled against vanilla Postgres, but
# it would require some investigation to check that it works, and also keeps
# working in the future. So for now, we compile our own binaries.
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
tar xvzf postgis-3.3.1.tar.gz && \
cd postgis-3.3.1 && \
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
tar xvzf postgis-3.3.0.tar.gz && \
cd postgis-3.3.0 && \
./autogen.sh && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
./configure && \
@@ -57,55 +57,39 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# https://github.com/plv8/plv8/issues/475
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
RUN apt update && \
apt install -y --no-install-recommends -t testing binutils
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN apt update && \
apt install -y --no-install-recommends -t testing cmake
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
@@ -124,18 +108,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
# plv8 still sometimes crashes during the creation
# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
@@ -145,22 +127,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
@@ -178,12 +154,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
@@ -200,6 +174,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
# GLIBC 2.34 for plv8.
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
@@ -212,6 +188,12 @@ RUN apt update && \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
echo "Installing GLIBC 2.34" && \
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
apt update && \
apt install -y --no-install-recommends -t testing libc6 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres

View File

@@ -4,23 +4,26 @@
#
ARG TAG=pinned
# apparently, ARGs don't get replaced in RUN commands in kaniko
# ARG POSTGIS_VERSION=3.3.1
# ARG PLV8_VERSION=3.1.4
# ARG PG_VERSION=v15
#########################################################################################
#
# Layer "build-deps"
#
#########################################################################################
FROM debian:bullseye-slim AS build-deps
RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
apt update
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
#########################################################################################
#
# Layer "pg-build"
# Build Postgres from the neon postgres repository.
#
#########################################################################################
FROM build-deps AS pg-build
COPY vendor/postgres-v15 postgres
RUN cd postgres && \
@@ -31,12 +34,14 @@ RUN cd postgres && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
#########################################################################################
#
# Layer "postgis-build"
# Build PostGIS from the upstream PostGIS mirror.
#
#########################################################################################
# PostGIS compiles against neon postgres sources without changes. Perhaps we
# could even use the upstream binaries, compiled against vanilla Postgres, but
# it would require some investigation to check that it works, and also keeps
# working in the future. So for now, we compile our own binaries.
FROM build-deps AS postgis-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
@@ -57,55 +62,39 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
#########################################################################################
#
# Layer "plv8-build"
# Build plv8
#
#########################################################################################
FROM build-deps AS plv8-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils
apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
# https://github.com/plv8/plv8/issues/475:
# v8 uses gold for linking and sets `--thread-count=4` which breaks
# gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
# Install newer gold version manually as debian-testing binutils version updates
# libc version, which in turn breaks other extension built against non-testing libc.
RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
tar xvzf binutils-2.38.tar.gz && \
cd binutils-2.38 && \
cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
cd ../bfd && ./configure && make bfdver.h && \
cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
cp /usr/local/bin/ld.gold /usr/bin/gold
# https://github.com/plv8/plv8/issues/475
# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
RUN apt update && \
apt install -y --no-install-recommends -t testing binutils
# Sed is used to patch for https://github.com/plv8/plv8/issues/503
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
tar xvzf v3.1.4.tar.gz && \
cd plv8-3.1.4 && \
export PATH="/usr/local/pgsql/bin:$PATH" && \
sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
rm -rf /plv8-* && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
#########################################################################################
#
# Layer "h3-pg-build"
# Build h3_pg
#
#########################################################################################
FROM build-deps AS h3-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# packaged cmake is too old
RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
-q -O /tmp/cmake-install.sh \
&& chmod u+x /tmp/cmake-install.sh \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh
RUN apt update && \
apt install -y --no-install-recommends -t testing cmake
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
tar xvzf h3.tgz && \
@@ -124,18 +113,16 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3
export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
#########################################################################################
#
# Layer "neon-pg-ext-build"
# compile neon extensions
#
#########################################################################################
FROM build-deps AS neon-pg-ext-build
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
# plv8 still sometimes crashes during the creation
# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=h3-pg-build /h3/usr /
COPY pgxn/ pgxn/
@@ -145,22 +132,16 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon \
-s install
#########################################################################################
#
# Compile and run the Neon-specific `compute_ctl` binary
#
#########################################################################################
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
#########################################################################################
#
# Clean up postgres folder before inclusion
#
#########################################################################################
FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
@@ -178,12 +159,10 @@ RUN rm -r /usr/local/pgsql/lib/pgxs/src
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Final layer
# Put it all together into the final image
#
#########################################################################################
FROM debian:bullseye-slim
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
@@ -200,6 +179,8 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
# libreadline8 for psql
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
# GLIBC 2.34 for plv8.
# Debian bullseye provides GLIBC 2.31, so we install the library from testing
#
# Lastly, link compute_ctl into zenith_ctl while we're at it,
# so that we don't need to put this in another layer.
@@ -212,6 +193,12 @@ RUN apt update && \
libproj19 \
libprotobuf-c1 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
echo "Installing GLIBC 2.34" && \
echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
apt update && \
apt install -y --no-install-recommends -t testing libc6 && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
USER postgres

View File

@@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
chrono = "0.4"
clap = "4.0"
clap = "3.0"
env_logger = "0.9"
futures = "0.3.13"
hyper = { version = "0.14", features = ["full"] }

View File

@@ -51,19 +51,53 @@ fn main() -> Result<()> {
// TODO: re-use `utils::logging` later
init_logger(DEFAULT_LOG_LEVEL)?;
let matches = cli().get_matches();
// Env variable is set by `cargo`
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
let matches = clap::App::new("compute_ctl")
.version(version.unwrap_or("unknown"))
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
.get_matches();
let pgdata = matches
.get_one::<String>("pgdata")
.expect("PGDATA path is required");
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
let connstr = matches
.get_one::<String>("connstr")
.value_of("connstr")
.expect("Postgres connection string is required");
let spec = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path");
let spec = matches.value_of("spec");
let spec_path = matches.value_of("spec-path");
// Try to use just 'postgres' if no path is provided
let pgbin = matches.get_one::<String>("pgbin").unwrap();
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
let spec: ComputeSpec = match spec {
// First, try to get cluster spec from the cli argument
@@ -139,48 +173,3 @@ fn main() -> Result<()> {
}
}
}
fn cli() -> clap::Command {
// Env variable is set by `cargo`
let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
clap::Command::new("compute_ctl")
.version(version)
.arg(
Arg::new("connstr")
.short('C')
.long("connstr")
.value_name("DATABASE_URL")
.required(true),
)
.arg(
Arg::new("pgdata")
.short('D')
.long("pgdata")
.value_name("DATADIR")
.required(true),
)
.arg(
Arg::new("pgbin")
.short('b')
.long("pgbin")
.default_value("postgres")
.value_name("POSTGRES_PATH"),
)
.arg(
Arg::new("spec")
.short('s')
.long("spec")
.value_name("SPEC_JSON"),
)
.arg(
Arg::new("spec-path")
.short('S')
.long("spec-path")
.value_name("SPEC_PATH"),
)
}
#[test]
fn verify_cli() {
cli().debug_assert()
}

View File

@@ -8,10 +8,11 @@ use std::process::Child;
use std::time::{Duration, Instant};
use anyhow::{bail, Result};
use notify::{RecursiveMode, Watcher};
use postgres::{Client, Transaction};
use serde::Deserialize;
use notify::{RecursiveMode, Watcher};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Rust representation of Postgres role info with only those fields
@@ -168,7 +169,7 @@ impl Database {
/// it may require a proper quoting too.
pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.pg_quote())
write!(params, " OWNER {}", &self.owner.quote())
.expect("String is documented to not to error during write operations");
params
@@ -179,17 +180,18 @@ impl Database {
/// intended to be used for DB / role names.
pub type PgIdent = String;
/// Generic trait used to provide quoting / encoding for strings used in the
/// Postgres SQL queries and DATABASE_URL.
pub trait Escaping {
fn pg_quote(&self) -> String;
/// Generic trait used to provide quoting for strings used in the
/// Postgres SQL queries. Currently used only to implement quoting
/// of identifiers, but could be used for literals in the future.
pub trait PgQuote {
fn quote(&self) -> String;
}
impl Escaping for PgIdent {
impl PgQuote for PgIdent {
/// This is intended to mimic Postgres quote_ident(), but for simplicity it
/// always quotes provided string with `""` and escapes every `"`.
/// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
fn pg_quote(&self) -> String {
/// always quotes provided string with `""` and escapes every `"`. Not idempotent,
/// i.e. if string is already escaped it will be escaped again.
fn quote(&self) -> String {
let result = format!("\"{}\"", self.replace('"', "\"\""));
result
}

View File

@@ -1,9 +1,7 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use log::{info, log_enabled, warn, Level};
use postgres::config::Config;
use postgres::{Client, NoTls};
use serde::Deserialize;
@@ -117,8 +115,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if existing_roles.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER ROLE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
op.name.quote(),
new_name.quote()
);
warn!("renaming role '{}' to '{}'", op.name, new_name);
@@ -164,7 +162,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
}
if update_role {
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
let mut query: String = format!("ALTER ROLE {} ", name.quote());
info_print!(" -> update");
query.push_str(&role.to_pg_options());
@@ -172,7 +170,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
}
} else {
info!("role name: '{}'", &name);
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
let mut query: String = format!("CREATE ROLE {} ", name.quote());
info!("role create query: '{}'", &query);
info_print!(" -> create");
@@ -181,7 +179,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
name.pg_quote()
name.quote()
);
xact.execute(grant_query.as_str(), &[])?;
info!("role grant query: '{}'", &grant_query);
@@ -217,7 +215,7 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
@@ -232,16 +230,17 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &node.spec.cluster.databases {
if db.owner != *role_name {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
let mut connstr = node.connstr.clone();
// database name is always the last and the only component of the path
connstr.set_path(&db.name);
let mut client = conf.connect(NoTls)?;
let mut client = Client::connect(connstr.as_str(), NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.pg_quote(),
db.owner.pg_quote()
role_name.quote(),
db.owner.quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
@@ -250,7 +249,7 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
client.simple_query(&drop_query)?;
}
}
@@ -280,7 +279,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
// We do not check either DB exists or not,
// Postgres will take care of it for us
"delete_db" => {
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
warn!("deleting database '{}'", &op.name);
client.execute(query.as_str(), &[])?;
@@ -292,8 +291,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if existing_dbs.iter().any(|r| r.name == op.name) {
let query: String = format!(
"ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(),
new_name.pg_quote()
op.name.quote(),
new_name.quote()
);
warn!("renaming database '{}' to '{}'", op.name, new_name);
@@ -321,7 +320,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
// XXX: db owner name is returned as quoted string from Postgres,
// when quoting is needed.
let new_owner = if r.owner.starts_with('"') {
db.owner.pg_quote()
db.owner.quote()
} else {
db.owner.clone()
};
@@ -329,15 +328,15 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
if new_owner != r.owner {
let query: String = format!(
"ALTER DATABASE {} OWNER TO {}",
name.pg_quote(),
db.owner.pg_quote()
name.quote(),
db.owner.quote()
);
info_print!(" -> update");
client.execute(query.as_str(), &[])?;
}
} else {
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
let mut query: String = format!("CREATE DATABASE {} ", name.quote());
info_print!(" -> create");
query.push_str(&db.to_pg_options());
@@ -367,7 +366,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
.cluster
.roles
.iter()
.map(|r| r.name.pg_quote())
.map(|r| r.name.quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
@@ -375,7 +374,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.pg_quote(),
dbname.quote(),
roles.join(", ")
);
info!("grant query {}", &query);
@@ -386,11 +385,12 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
let mut db_connstr = node.connstr.clone();
for db in &node.spec.cluster.databases {
let mut conf = Config::from_str(node.connstr.as_str())?;
conf.dbname(&db.name);
// database name is always the last and the only component of the path
db_connstr.set_path(&db.name);
let mut db_client = conf.connect(NoTls)?;
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
@@ -419,36 +419,9 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
END IF;\n\
END\n\
$$;",
db.owner.pg_quote()
db.owner.quote()
);
db_client.simple_query(&alter_query)?;
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
// This is needed because since postgres 15 this privilege is removed by default.
let grant_query = "DO $$\n\
BEGIN\n\
IF EXISTS(\n\
SELECT nspname\n\
FROM pg_catalog.pg_namespace\n\
WHERE nspname = 'public'\n\
) AND\n\
current_setting('server_version_num')::int/10000 >= 15\n\
THEN\n\
IF EXISTS(\n\
SELECT rolname\n\
FROM pg_catalog.pg_roles\n\
WHERE rolname = 'web_access'\n\
)\n\
THEN\n\
GRANT CREATE ON SCHEMA public TO web_access;\n\
END IF;\n\
END IF;\n\
END\n\
$$;"
.to_string();
info!("grant query for db {} : {}", &db.name, &grant_query);
db_client.simple_query(&grant_query)?;
}
Ok(())

View File

@@ -33,9 +33,9 @@ mod pg_helpers_tests {
}
#[test]
fn ident_pg_quote() {
fn quote_ident() {
let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");
assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
}
}

View File

@@ -4,19 +4,19 @@ version = "0.1.0"
edition = "2021"
[dependencies]
clap = "4.0"
comfy-table = "6.1"
clap = "3.0"
comfy-table = "5.0.1"
git-version = "0.3.5"
tar = "0.4.38"
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0"
serde_with = "1.12.0"
toml = "0.5"
once_cell = "1.13.0"
regex = "1"
anyhow = "1.0"
thiserror = "1"
nix = "0.25"
nix = "0.23"
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api

View File

@@ -6,7 +6,7 @@
//! rely on `neon_local` to set up the environment for each test.
//!
use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use clap::{App, AppSettings, Arg, ArgMatches};
use control_plane::compute::ComputeControlPlane;
use control_plane::local_env::{EtcdBroker, LocalEnv};
use control_plane::safekeeper::SafekeeperNode;
@@ -85,7 +85,212 @@ struct TimelineTreeEl {
// * Providing CLI api to the pageserver
// * TODO: export/import to/from usual postgres
fn main() -> Result<()> {
let matches = cli().get_matches();
let branch_name_arg = Arg::new("branch-name")
.long("branch-name")
.takes_value(true)
.help("Name of the branch to be created or used as an alias for other services")
.required(false);
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
let tenant_id_arg = Arg::new("tenant-id")
.long("tenant-id")
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
.takes_value(true)
.required(false);
let timeline_id_arg = Arg::new("timeline-id")
.long("timeline-id")
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
.takes_value(true)
.required(false);
let pg_version_arg = Arg::new("pg-version")
.long("pg-version")
.help("Postgres version to use for the initial tenant")
.required(false)
.takes_value(true)
.default_value(DEFAULT_PG_VERSION);
let port_arg = Arg::new("port")
.long("port")
.required(false)
.value_name("port");
let stop_mode_arg = Arg::new("stop-mode")
.short('m')
.takes_value(true)
.possible_values(&["fast", "immediate"])
.help("If 'immediate', don't flush repository data at shutdown")
.required(false)
.value_name("stop-mode");
let pageserver_config_args = Arg::new("pageserver-config-override")
.long("pageserver-config-override")
.takes_value(true)
.number_of_values(1)
.multiple_occurrences(true)
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
.required(false);
let lsn_arg = Arg::new("lsn")
.long("lsn")
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
.takes_value(true)
.required(false);
let matches = App::new("Neon CLI")
.setting(AppSettings::ArgRequiredElseHelp)
.version(GIT_VERSION)
.subcommand(
App::new("init")
.about("Initialize a new Neon repository")
.arg(pageserver_config_args.clone())
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg(
Arg::new("config")
.long("config")
.required(false)
.value_name("config"),
)
.arg(pg_version_arg.clone())
)
.subcommand(
App::new("timeline")
.about("Manage timelines")
.subcommand(App::new("list")
.about("List all timelines, available to this pageserver")
.arg(tenant_id_arg.clone()))
.subcommand(App::new("branch")
.about("Create a new timeline, using another timeline as a base, copying its data")
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name").takes_value(true)
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn").takes_value(true)
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
.subcommand(App::new("create")
.about("Create a new blank timeline")
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(pg_version_arg.clone())
)
.subcommand(App::new("import")
.about("Import timeline from basebackup directory")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone())
.arg(Arg::new("node-name").long("node-name").takes_value(true)
.help("Name to assign to the imported timeline"))
.arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
.help("Basebackup tarfile to import"))
.arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
.help("Lsn the basebackup starts at"))
.arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
.help("Wal to add after base"))
.arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
.help("Lsn the basebackup ends at"))
.arg(pg_version_arg.clone())
)
).subcommand(
App::new("tenant")
.setting(AppSettings::ArgRequiredElseHelp)
.about("Manage tenants")
.subcommand(App::new("list"))
.subcommand(App::new("create")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
.arg(pg_version_arg.clone())
)
.subcommand(App::new("config")
.arg(tenant_id_arg.clone())
.arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
)
)
.subcommand(
App::new("pageserver")
.setting(AppSettings::ArgRequiredElseHelp)
.about("Manage pageserver")
.subcommand(App::new("status"))
.subcommand(App::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.subcommand(App::new("stop").about("Stop local pageserver")
.arg(stop_mode_arg.clone()))
.subcommand(App::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
)
.subcommand(
App::new("safekeeper")
.setting(AppSettings::ArgRequiredElseHelp)
.about("Manage safekeepers")
.subcommand(App::new("start")
.about("Start local safekeeper")
.arg(safekeeper_id_arg.clone())
)
.subcommand(App::new("stop")
.about("Stop local safekeeper")
.arg(safekeeper_id_arg.clone())
.arg(stop_mode_arg.clone())
)
.subcommand(App::new("restart")
.about("Restart local safekeeper")
.arg(safekeeper_id_arg.clone())
.arg(stop_mode_arg.clone())
)
)
.subcommand(
App::new("pg")
.setting(AppSettings::ArgRequiredElseHelp)
.about("Manage postgres instances")
.subcommand(App::new("list").arg(tenant_id_arg.clone()))
.subcommand(App::new("create")
.about("Create a postgres compute node")
.arg(pg_node_arg.clone())
.arg(branch_name_arg.clone())
.arg(tenant_id_arg.clone())
.arg(lsn_arg.clone())
.arg(port_arg.clone())
.arg(
Arg::new("config-only")
.help("Don't do basebackup, create compute node with only config files")
.long("config-only")
.required(false))
.arg(pg_version_arg.clone())
)
.subcommand(App::new("start")
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
.arg(pg_node_arg.clone())
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(timeline_id_arg.clone())
.arg(lsn_arg.clone())
.arg(port_arg.clone())
.arg(pg_version_arg.clone())
)
.subcommand(
App::new("stop")
.arg(pg_node_arg.clone())
.arg(tenant_id_arg.clone())
.arg(
Arg::new("destroy")
.help("Also delete data directory (now optional, should be default in future)")
.long("destroy")
.required(false)
)
)
)
.subcommand(
App::new("start")
.about("Start page server and safekeepers")
.arg(pageserver_config_args)
)
.subcommand(
App::new("stop")
.about("Stop page server and safekeepers")
.arg(stop_mode_arg.clone())
)
.get_matches();
let (sub_name, sub_args) = match matches.subcommand() {
Some(subcommand_data) => subcommand_data,
@@ -153,7 +358,9 @@ fn print_timelines_tree(
// Memorize all direct children of each timeline.
for timeline in timelines.iter() {
if let Some(ancestor_timeline_id) = timeline.ancestor_timeline_id {
if let Some(ancestor_timeline_id) =
timeline.local.as_ref().and_then(|l| l.ancestor_timeline_id)
{
timelines_hash
.get_mut(&ancestor_timeline_id)
.context("missing timeline info in the HashMap")?
@@ -164,7 +371,13 @@ fn print_timelines_tree(
for timeline in timelines_hash.values() {
// Start with root local timelines (no ancestors) first.
if timeline.info.ancestor_timeline_id.is_none() {
if timeline
.info
.local
.as_ref()
.and_then(|l| l.ancestor_timeline_id)
.is_none()
{
print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;
}
}
@@ -181,8 +394,17 @@ fn print_timeline(
timeline: &TimelineTreeEl,
timelines: &HashMap<TimelineId, TimelineTreeEl>,
) -> Result<()> {
let local_remote = match (timeline.info.local.as_ref(), timeline.info.remote.as_ref()) {
(None, None) => unreachable!("in this case no info for a timeline is found"),
(None, Some(_)) => "(R)",
(Some(_), None) => "(L)",
(Some(_), Some(_)) => "(L+R)",
};
// Draw main padding
print!("{} ", local_remote);
if nesting_level > 0 {
let ancestor_lsn = match timeline.info.ancestor_lsn {
let ancestor_lsn = match timeline.info.local.as_ref().and_then(|i| i.ancestor_lsn) {
Some(lsn) => lsn.to_string(),
None => "Unknown Lsn".to_string(),
};
@@ -270,16 +492,16 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
sub_match
.get_one::<String>("tenant-id")
.map(|tenant_id| TenantId::from_str(tenant_id))
.value_of("tenant-id")
.map(TenantId::from_str)
.transpose()
.context("Failed to parse tenant id from the argument string")
}
fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
sub_match
.get_one::<String>("timeline-id")
.map(|timeline_id| TimelineId::from_str(timeline_id))
.value_of("timeline-id")
.map(TimelineId::from_str)
.transpose()
.context("Failed to parse timeline id from the argument string")
}
@@ -288,22 +510,19 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
let initial_timeline_id_arg = parse_timeline_id(init_match)?;
// Create config file
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
// load and parse the file
std::fs::read_to_string(config_path).with_context(|| {
format!(
"Could not read configuration file '{}'",
config_path.display()
)
})?
std::fs::read_to_string(std::path::Path::new(config_path))
.with_context(|| format!("Could not read configuration file '{config_path}'"))?
} else {
// Built-in default config
default_conf(&EtcdBroker::locate_etcd()?)
};
let pg_version = init_match
.get_one::<u32>("pg-version")
.copied()
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
let mut env =
@@ -339,10 +558,9 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
init_match
.get_many::<String>("pageserver-config-override")
.values_of("pageserver-config-override")
.into_iter()
.flatten()
.map(|s| s.as_str())
.collect()
}
@@ -357,7 +575,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
Some(("create", create_match)) => {
let initial_tenant_id = parse_tenant_id(create_match)?;
let tenant_conf: HashMap<_, _> = create_match
.get_many::<String>("config")
.values_of("config")
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
.unwrap_or_default();
let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
@@ -366,8 +584,9 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
// Create an initial timeline for the new tenant
let new_timeline_id = parse_timeline_id(create_match)?;
let pg_version = create_match
.get_one::<u32>("pg-version")
.copied()
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
let timeline_info = pageserver.timeline_create(
@@ -378,7 +597,10 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
Some(pg_version),
)?;
let new_timeline_id = timeline_info.timeline_id;
let last_record_lsn = timeline_info.last_record_lsn;
let last_record_lsn = timeline_info
.local
.context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
.last_record_lsn;
env.register_branch_mapping(
DEFAULT_BRANCH_NAME.to_string(),
@@ -393,7 +615,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
Some(("config", create_match)) => {
let tenant_id = get_tenant_id(create_match, env)?;
let tenant_conf: HashMap<_, _> = create_match
.get_many::<String>("config")
.values_of("config")
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
.unwrap_or_default();
@@ -420,19 +642,23 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
Some(("create", create_match)) => {
let tenant_id = get_tenant_id(create_match, env)?;
let new_branch_name = create_match
.get_one::<String>("branch-name")
.value_of("branch-name")
.ok_or_else(|| anyhow!("No branch name provided"))?;
let pg_version = create_match
.get_one::<u32>("pg-version")
.copied()
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
let timeline_info =
pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
let new_timeline_id = timeline_info.timeline_id;
let last_record_lsn = timeline_info.last_record_lsn;
let last_record_lsn = timeline_info
.local
.expect("no local timeline info")
.last_record_lsn;
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
println!(
@@ -444,32 +670,35 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
let tenant_id = get_tenant_id(import_match, env)?;
let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
let name = import_match
.get_one::<String>("node-name")
.value_of("node-name")
.ok_or_else(|| anyhow!("No node name provided"))?;
// Parse base inputs
let base_tarfile = import_match
.get_one::<PathBuf>("base-tarfile")
.ok_or_else(|| anyhow!("No base-tarfile provided"))?
.to_owned();
.value_of("base-tarfile")
.map(|s| PathBuf::from_str(s).unwrap())
.ok_or_else(|| anyhow!("No base-tarfile provided"))?;
let base_lsn = Lsn::from_str(
import_match
.get_one::<String>("base-lsn")
.value_of("base-lsn")
.ok_or_else(|| anyhow!("No base-lsn provided"))?,
)?;
let base = (base_lsn, base_tarfile);
// Parse pg_wal inputs
let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
let wal_tarfile = import_match
.value_of("wal-tarfile")
.map(|s| PathBuf::from_str(s).unwrap());
let end_lsn = import_match
.get_one::<String>("end-lsn")
.value_of("end-lsn")
.map(|s| Lsn::from_str(s).unwrap());
// TODO validate both or none are provided
let pg_wal = end_lsn.zip(wal_tarfile);
let pg_version = import_match
.get_one::<u32>("pg-version")
.copied()
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
let mut cplane = ComputeControlPlane::load(env.clone())?;
@@ -484,11 +713,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
Some(("branch", branch_match)) => {
let tenant_id = get_tenant_id(branch_match, env)?;
let new_branch_name = branch_match
.get_one::<String>("branch-name")
.value_of("branch-name")
.ok_or_else(|| anyhow!("No branch name provided"))?;
let ancestor_branch_name = branch_match
.get_one::<String>("ancestor-branch-name")
.map(|s| s.as_str())
.value_of("ancestor-branch-name")
.unwrap_or(DEFAULT_BRANCH_NAME);
let ancestor_timeline_id = env
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
@@ -497,8 +725,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
})?;
let start_lsn = branch_match
.get_one::<String>("ancestor-start-lsn")
.map(|lsn_str| Lsn::from_str(lsn_str))
.value_of("ancestor-start-lsn")
.map(Lsn::from_str)
.transpose()
.context("Failed to parse ancestor start Lsn from the request")?;
let timeline_info = pageserver.timeline_create(
@@ -510,7 +738,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
)?;
let new_timeline_id = timeline_info.timeline_id;
let last_record_lsn = timeline_info.last_record_lsn;
let last_record_lsn = timeline_info
.local
.expect("no local timeline info")
.last_record_lsn;
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
@@ -570,7 +801,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
// Use the LSN at the end of the timeline.
timeline_infos
.get(&node.timeline_id)
.map(|bi| bi.last_record_lsn.to_string())
.and_then(|bi| bi.local.as_ref().map(|l| l.last_record_lsn.to_string()))
.unwrap_or_else(|| "?".to_string())
}
Some(lsn) => {
@@ -599,39 +830,45 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
}
"create" => {
let branch_name = sub_args
.get_one::<String>("branch-name")
.map(|s| s.as_str())
.value_of("branch-name")
.unwrap_or(DEFAULT_BRANCH_NAME);
let node_name = sub_args
.get_one::<String>("node")
.map(|node_name| node_name.to_string())
.unwrap_or_else(|| format!("{branch_name}_node"));
.value_of("node")
.map(ToString::to_string)
.unwrap_or_else(|| format!("{}_node", branch_name));
let lsn = sub_args
.get_one::<String>("lsn")
.map(|lsn_str| Lsn::from_str(lsn_str))
.value_of("lsn")
.map(Lsn::from_str)
.transpose()
.context("Failed to parse Lsn from the request")?;
let timeline_id = env
.get_branch_timeline_id(branch_name, tenant_id)
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
.ok_or_else(|| anyhow!("Found no timeline id for branch name '{}'", branch_name))?;
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
let port: Option<u16> = match sub_args.value_of("port") {
Some(p) => Some(p.parse()?),
None => None,
};
let pg_version = sub_args
.get_one::<u32>("pg-version")
.copied()
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
}
"start" => {
let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
let port: Option<u16> = match sub_args.value_of("port") {
Some(p) => Some(p.parse()?),
None => None,
};
let node_name = sub_args
.get_one::<String>("node")
.value_of("node")
.ok_or_else(|| anyhow!("No node name was provided to start"))?;
let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));
let node = cplane.nodes.get(&(tenant_id, node_name.to_owned()));
let auth_token = if matches!(env.pageserver.auth_type, AuthType::NeonJWT) {
let claims = Claims::new(Some(tenant_id), Scope::Tenant);
@@ -642,33 +879,36 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
};
if let Some(node) = node {
println!("Starting existing postgres {node_name}...");
println!("Starting existing postgres {}...", node_name);
node.start(&auth_token)?;
} else {
let branch_name = sub_args
.get_one::<String>("branch-name")
.map(|s| s.as_str())
.value_of("branch-name")
.unwrap_or(DEFAULT_BRANCH_NAME);
let timeline_id = env
.get_branch_timeline_id(branch_name, tenant_id)
.ok_or_else(|| {
anyhow!("Found no timeline id for branch name '{branch_name}'")
anyhow!("Found no timeline id for branch name '{}'", branch_name)
})?;
let lsn = sub_args
.get_one::<String>("lsn")
.map(|lsn_str| Lsn::from_str(lsn_str))
.value_of("lsn")
.map(Lsn::from_str)
.transpose()
.context("Failed to parse Lsn from the request")?;
let pg_version = sub_args
.get_one::<u32>("pg-version")
.copied()
.context("Failed to `pg-version` from the argument string")?;
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?;
// when used with custom port this results in non obvious behaviour
// port is remembered from first start command, i e
// start --port X
// stop
// start <-- will also use port X even without explicit port argument
println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
println!(
"Starting new postgres (v{}) {} on timeline {} ...",
pg_version, node_name, timeline_id
);
let node =
cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
@@ -677,18 +917,18 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
}
"stop" => {
let node_name = sub_args
.get_one::<String>("node")
.value_of("node")
.ok_or_else(|| anyhow!("No node name was provided to stop"))?;
let destroy = sub_args.get_flag("destroy");
let destroy = sub_args.is_present("destroy");
let node = cplane
.nodes
.get(&(tenant_id, node_name.to_string()))
.with_context(|| format!("postgres {node_name} is not found"))?;
.get(&(tenant_id, node_name.to_owned()))
.with_context(|| format!("postgres {} is not found", node_name))?;
node.stop(destroy)?;
}
_ => bail!("Unexpected pg subcommand '{sub_name}'"),
_ => bail!("Unexpected pg subcommand '{}'", sub_name),
}
Ok(())
@@ -706,10 +946,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
}
Some(("stop", stop_match)) => {
let immediate = stop_match
.get_one::<String>("stop-mode")
.map(|s| s.as_str())
== Some("immediate");
let immediate = stop_match.value_of("stop-mode") == Some("immediate");
if let Err(e) = pageserver.stop(immediate) {
eprintln!("pageserver stop failed: {}", e);
@@ -759,7 +996,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
};
// All the commands take an optional safekeeper name argument
let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
let sk_id = if let Some(id_str) = sub_args.value_of("id") {
NodeId(id_str.parse().context("while parsing safekeeper id")?)
} else {
DEFAULT_SAFEKEEPER_ID
@@ -775,8 +1012,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
}
"stop" => {
let immediate =
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
if let Err(e) = safekeeper.stop(immediate) {
eprintln!("safekeeper stop failed: {}", e);
@@ -785,8 +1021,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
}
"restart" => {
let immediate =
sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
let immediate = sub_args.value_of("stop-mode") == Some("immediate");
if let Err(e) = safekeeper.stop(immediate) {
eprintln!("safekeeper stop failed: {}", e);
@@ -830,8 +1065,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
}
fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let immediate =
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
let immediate = sub_match.value_of("stop-mode") == Some("immediate");
let pageserver = PageServerNode::from_env(env);
@@ -864,219 +1098,3 @@ fn try_stop_etcd_process(env: &local_env::LocalEnv) {
eprintln!("etcd stop failed: {e}");
}
}
fn cli() -> Command {
let branch_name_arg = Arg::new("branch-name")
.long("branch-name")
.help("Name of the branch to be created or used as an alias for other services")
.required(false);
let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
let tenant_id_arg = Arg::new("tenant-id")
.long("tenant-id")
.help("Tenant id. Represented as a hexadecimal string 32 symbols length")
.required(false);
let timeline_id_arg = Arg::new("timeline-id")
.long("timeline-id")
.help("Timeline id. Represented as a hexadecimal string 32 symbols length")
.required(false);
let pg_version_arg = Arg::new("pg-version")
.long("pg-version")
.help("Postgres version to use for the initial tenant")
.required(false)
.value_parser(value_parser!(u32))
.default_value(DEFAULT_PG_VERSION);
let port_arg = Arg::new("port")
.long("port")
.required(false)
.value_parser(value_parser!(u16))
.value_name("port");
let stop_mode_arg = Arg::new("stop-mode")
.short('m')
.value_parser(["fast", "immediate"])
.help("If 'immediate', don't flush repository data at shutdown")
.required(false)
.value_name("stop-mode");
let pageserver_config_args = Arg::new("pageserver-config-override")
.long("pageserver-config-override")
.num_args(1)
.action(ArgAction::Append)
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
.required(false);
let lsn_arg = Arg::new("lsn")
.long("lsn")
.help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
.required(false);
Command::new("Neon CLI")
.arg_required_else_help(true)
.version(GIT_VERSION)
.subcommand(
Command::new("init")
.about("Initialize a new Neon repository")
.arg(pageserver_config_args.clone())
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg(
Arg::new("config")
.long("config")
.required(false)
.value_parser(value_parser!(PathBuf))
.value_name("config"),
)
.arg(pg_version_arg.clone())
)
.subcommand(
Command::new("timeline")
.about("Manage timelines")
.subcommand(Command::new("list")
.about("List all timelines, available to this pageserver")
.arg(tenant_id_arg.clone()))
.subcommand(Command::new("branch")
.about("Create a new timeline, using another timeline as a base, copying its data")
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
.arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
.help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
.subcommand(Command::new("create")
.about("Create a new blank timeline")
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone())
.arg(pg_version_arg.clone())
)
.subcommand(Command::new("import")
.about("Import timeline from basebackup directory")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone())
.arg(Arg::new("node-name").long("node-name")
.help("Name to assign to the imported timeline"))
.arg(Arg::new("base-tarfile")
.long("base-tarfile")
.value_parser(value_parser!(PathBuf))
.help("Basebackup tarfile to import")
)
.arg(Arg::new("base-lsn").long("base-lsn")
.help("Lsn the basebackup starts at"))
.arg(Arg::new("wal-tarfile")
.long("wal-tarfile")
.value_parser(value_parser!(PathBuf))
.help("Wal to add after base")
)
.arg(Arg::new("end-lsn").long("end-lsn")
.help("Lsn the basebackup ends at"))
.arg(pg_version_arg.clone())
)
).subcommand(
Command::new("tenant")
.arg_required_else_help(true)
.about("Manage tenants")
.subcommand(Command::new("list"))
.subcommand(Command::new("create")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
.arg(pg_version_arg.clone())
)
.subcommand(Command::new("config")
.arg(tenant_id_arg.clone())
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
)
)
.subcommand(
Command::new("pageserver")
.arg_required_else_help(true)
.about("Manage pageserver")
.subcommand(Command::new("status"))
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.subcommand(Command::new("stop").about("Stop local pageserver")
.arg(stop_mode_arg.clone()))
.subcommand(Command::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
)
.subcommand(
Command::new("safekeeper")
.arg_required_else_help(true)
.about("Manage safekeepers")
.subcommand(Command::new("start")
.about("Start local safekeeper")
.arg(safekeeper_id_arg.clone())
)
.subcommand(Command::new("stop")
.about("Stop local safekeeper")
.arg(safekeeper_id_arg.clone())
.arg(stop_mode_arg.clone())
)
.subcommand(Command::new("restart")
.about("Restart local safekeeper")
.arg(safekeeper_id_arg)
.arg(stop_mode_arg.clone())
)
)
.subcommand(
Command::new("pg")
.arg_required_else_help(true)
.about("Manage postgres instances")
.subcommand(Command::new("list").arg(tenant_id_arg.clone()))
.subcommand(Command::new("create")
.about("Create a postgres compute node")
.arg(pg_node_arg.clone())
.arg(branch_name_arg.clone())
.arg(tenant_id_arg.clone())
.arg(lsn_arg.clone())
.arg(port_arg.clone())
.arg(
Arg::new("config-only")
.help("Don't do basebackup, create compute node with only config files")
.long("config-only")
.required(false))
.arg(pg_version_arg.clone())
)
.subcommand(Command::new("start")
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
.arg(pg_node_arg.clone())
.arg(tenant_id_arg.clone())
.arg(branch_name_arg)
.arg(timeline_id_arg)
.arg(lsn_arg)
.arg(port_arg)
.arg(pg_version_arg)
)
.subcommand(
Command::new("stop")
.arg(pg_node_arg)
.arg(tenant_id_arg)
.arg(
Arg::new("destroy")
.help("Also delete data directory (now optional, should be default in future)")
.long("destroy")
.action(ArgAction::SetTrue)
.required(false)
)
)
)
.subcommand(
Command::new("start")
.about("Start page server and safekeepers")
.arg(pageserver_config_args)
)
.subcommand(
Command::new("stop")
.about("Stop page server and safekeepers")
.arg(stop_mode_arg)
)
}
#[test]
fn verify_cli() {
cli().debug_assert();
}

View File

@@ -183,18 +183,18 @@ impl PostgresNode {
}
fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
let pg_path = self.env.pg_bin_dir(pg_version).join("postgres");
let mut cmd = Command::new(&pg_path);
cmd.arg("--sync-safekeepers")
.env_clear()
.env(
"LD_LIBRARY_PATH",
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
)
.env(
"DYLD_LIBRARY_PATH",
self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
self.env.pg_lib_dir(pg_version).to_str().unwrap(),
)
.env("PGDATA", self.pgdata().to_str().unwrap())
.stdout(Stdio::piped())
@@ -282,7 +282,9 @@ impl PostgresNode {
fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
let mut conf = PostgresConf::new();
conf.append("max_wal_senders", "10");
conf.append("wal_log_hints", "off");
// wal_log_hints is mandatory when running against pageserver (see gh issue#192)
// TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
conf.append("wal_log_hints", "on");
conf.append("max_replication_slots", "10");
conf.append("hot_standby", "on");
conf.append("shared_buffers", "1MB");
@@ -420,7 +422,7 @@ impl PostgresNode {
}
fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
let pg_ctl_path = self.env.pg_bin_dir(self.pg_version).join("pg_ctl");
let mut cmd = Command::new(pg_ctl_path);
cmd.args(
[
@@ -438,11 +440,11 @@ impl PostgresNode {
.env_clear()
.env(
"LD_LIBRARY_PATH",
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
)
.env(
"DYLD_LIBRARY_PATH",
self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
self.env.pg_lib_dir(self.pg_version).to_str().unwrap(),
);
if let Some(token) = auth_token {
cmd.env("ZENITH_AUTH_TOKEN", token);

View File

@@ -52,10 +52,6 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
// size smaller. Our test etcd clusters are very small.
// See https://github.com/etcd-io/etcd/issues/7910
"--quota-backend-bytes=100000000".to_string(),
// etcd doesn't compact (vacuum) with default settings,
// enable it to prevent space exhaustion.
"--auto-compaction-mode=revision".to_string(),
"--auto-compaction-retention=1".to_string(),
])
.stdout(Stdio::from(etcd_stdout_file))
.stderr(Stdio::from(etcd_stderr_file))

View File

@@ -201,28 +201,28 @@ impl LocalEnv {
self.pg_distrib_dir.clone()
}
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
let path = self.pg_distrib_dir.clone();
match pg_version {
14 => Ok(path.join(format!("v{pg_version}"))),
15 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => path.join(format!("v{pg_version}")),
15 => path.join(format!("v{pg_version}")),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => self.pg_distrib_dir(pg_version).join("bin"),
15 => self.pg_distrib_dir(pg_version).join("bin"),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => self.pg_distrib_dir(pg_version).join("lib"),
15 => self.pg_distrib_dir(pg_version).join("lib"),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
@@ -422,10 +422,10 @@ impl LocalEnv {
"directory '{}' already exists. Perhaps already initialized?",
base_path.display()
);
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
if !self.pg_bin_dir(pg_version).join("postgres").exists() {
bail!(
"Can't find postgres binary at {}",
self.pg_bin_dir(pg_version)?.display()
self.pg_bin_dir(pg_version).display()
);
}
for binary in ["pageserver", "safekeeper"] {

View File

@@ -12,8 +12,13 @@ use nix::unistd::Pid;
use postgres::Config;
use reqwest::blocking::{Client, RequestBuilder, Response};
use reqwest::{IntoUrl, Method};
use safekeeper_api::models::TimelineCreateRequest;
use thiserror::Error;
use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};
use utils::{
connstring::connection_address,
http::error::HttpErrorBody,
id::{NodeId, TenantId, TimelineId},
};
use crate::local_env::{LocalEnv, SafekeeperConf};
use crate::storage::PageServerNode;
@@ -123,6 +128,7 @@ impl SafekeeperNode {
.args(&["--id", self.id.to_string().as_ref()])
.args(&["--listen-pg", &listen_pg])
.args(&["--listen-http", &listen_http])
.args(&["--recall", "1 second"])
.arg("--daemonize"),
);
if !self.conf.sync {
@@ -275,4 +281,24 @@ impl SafekeeperNode {
.error_from_body()?;
Ok(())
}
pub fn timeline_create(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
peer_ids: Vec<NodeId>,
) -> Result<()> {
Ok(self
.http_request(
Method::POST,
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
)
.json(&TimelineCreateRequest {
timeline_id,
peer_ids,
})
.send()?
.error_from_body()?
.json()?)
}
}

View File

@@ -1,48 +0,0 @@
#!/bin/bash
set -eux
PG_VERSION=${PG_VERSION:-14}
SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
SPEC_FILE=/tmp/spec.json
echo "Waiting pageserver become ready."
while ! nc -z pageserver 6400; do
sleep 1;
done
echo "Page server is ready."
echo "Create a tenant and timeline"
PARAMS=(
-sb
-X POST
-H "Content-Type: application/json"
-d "{}"
http://pageserver:9898/v1/tenant/
)
tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
PARAMS=(
-sb
-X POST
-H "Content-Type: application/json"
-d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
echo $result | jq .
echo "Overwrite tenant id and timeline id in spec file"
tenant_id=$(echo ${result} | jq -r .tenant_id)
timeline_id=$(echo ${result} | jq -r .timeline_id)
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
cat ${SPEC_FILE}
echo "Start compute node"
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
-C "postgresql://cloud_admin@localhost:55433/postgres" \
-b /usr/local/bin/postgres \
-S ${SPEC_FILE}

View File

@@ -1,141 +0,0 @@
{
"format_version": 1.0,
"timestamp": "2022-10-12T18:00:00.000Z",
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
"cluster": {
"cluster_id": "docker_compose",
"name": "docker_compose_test",
"state": "restarted",
"roles": [
{
"name": "cloud_admin",
"encrypted_password": "b093c0d3b281ba6da1eacc608620abd8",
"options": null
}
],
"databases": [
],
"settings": [
{
"name": "fsync",
"value": "off",
"vartype": "bool"
},
{
"name": "wal_level",
"value": "replica",
"vartype": "enum"
},
{
"name": "hot_standby",
"value": "on",
"vartype": "bool"
},
{
"name": "wal_log_hints",
"value": "on",
"vartype": "bool"
},
{
"name": "log_connections",
"value": "on",
"vartype": "bool"
},
{
"name": "port",
"value": "55433",
"vartype": "integer"
},
{
"name": "shared_buffers",
"value": "1MB",
"vartype": "string"
},
{
"name": "max_connections",
"value": "100",
"vartype": "integer"
},
{
"name": "listen_addresses",
"value": "0.0.0.0",
"vartype": "string"
},
{
"name": "max_wal_senders",
"value": "10",
"vartype": "integer"
},
{
"name": "max_replication_slots",
"value": "10",
"vartype": "integer"
},
{
"name": "wal_sender_timeout",
"value": "5s",
"vartype": "string"
},
{
"name": "wal_keep_size",
"value": "0",
"vartype": "integer"
},
{
"name": "password_encryption",
"value": "md5",
"vartype": "enum"
},
{
"name": "restart_after_crash",
"value": "off",
"vartype": "bool"
},
{
"name": "synchronous_standby_names",
"value": "walproposer",
"vartype": "string"
},
{
"name": "shared_preload_libraries",
"value": "neon",
"vartype": "string"
},
{
"name": "neon.safekeepers",
"value": "safekeeper1:5454,safekeeper2:5454,safekeeper3:5454",
"vartype": "string"
},
{
"name": "neon.timeline_id",
"value": "TIMELINE_ID",
"vartype": "string"
},
{
"name": "neon.tenant_id",
"value": "TENANT_ID",
"vartype": "string"
},
{
"name": "neon.pageserver_connstring",
"value": "host=pageserver port=6400",
"vartype": "string"
},
{
"name": "max_replication_write_lag",
"value": "500MB",
"vartype": "string"
},
{
"name": "max_replication_flush_lag",
"value": "10GB",
"vartype": "string"
}
]
},
"delta_operations": [
]
}

View File

@@ -1,200 +0,0 @@
version: '3'
services:
etcd:
image: quay.io/coreos/etcd:v3.5.4
ports:
- 2379:2379
- 2380:2380
environment:
# This signifficantly speeds up etcd and we anyway don't data persistency there.
ETCD_UNSAFE_NO_FSYNC: "1"
command:
- "etcd"
- "--auto-compaction-mode=revision"
- "--auto-compaction-retention=1"
- "--name=etcd-cluster"
- "--initial-cluster-state=new"
- "--initial-cluster-token=etcd-cluster-1"
- "--initial-cluster=etcd-cluster=http://etcd:2380"
- "--initial-advertise-peer-urls=http://etcd:2380"
- "--advertise-client-urls=http://etcd:2379"
- "--listen-client-urls=http://0.0.0.0:2379"
- "--listen-peer-urls=http://0.0.0.0:2380"
- "--quota-backend-bytes=134217728" # 128 MB
minio:
image: quay.io/minio/minio:RELEASE.2022-10-20T00-55-09Z
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=minio
- MINIO_ROOT_PASSWORD=password
command: server /data --address :9000 --console-address ":9001"
minio_create_buckets:
image: minio/mc
environment:
- MINIO_ROOT_USER=minio
- MINIO_ROOT_PASSWORD=password
entrypoint:
- "/bin/sh"
- "-c"
command:
- "until (/usr/bin/mc alias set minio http://minio:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD) do
echo 'Waiting to start minio...' && sleep 1;
done;
/usr/bin/mc mb minio/neon --region=eu-north-1;
exit 0;"
depends_on:
- minio
pageserver:
image: neondatabase/neon:${TAG:-latest}
environment:
- BROKER_ENDPOINT='http://etcd:2379'
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 6400:6400 # pg protocol handler
- 9898:9898 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "/usr/local/bin/pageserver -D /data/.neon/
-c \"broker_endpoints=[$$BROKER_ENDPOINT]\"
-c \"listen_pg_addr='0.0.0.0:6400'\"
-c \"listen_http_addr='0.0.0.0:9898'\"
-c \"remote_storage={endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/pageserver/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper1:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper1:5454
- SAFEKEEPER_ID=1
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7676:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper2:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper2:5454
- SAFEKEEPER_ID=2
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7677:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
safekeeper3:
image: neondatabase/neon:${TAG:-latest}
environment:
- SAFEKEEPER_ADVERTISE_URL=safekeeper3:5454
- SAFEKEEPER_ID=3
- BROKER_ENDPOINT=http://etcd:2379
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=password
#- RUST_BACKTRACE=1
ports:
#- 5454:5454 # pg protocol handler
- 7678:7676 # http endpoints
entrypoint:
- "/bin/sh"
- "-c"
command:
- "safekeeper --listen-pg=$$SAFEKEEPER_ADVERTISE_URL
--listen-http='0.0.0.0:7676'
--id=$$SAFEKEEPER_ID
--broker-endpoints=$$BROKER_ENDPOINT
-D /data
--remote-storage=\"{endpoint='http://minio:9000',
bucket_name='neon',
bucket_region='eu-north-1',
prefix_in_bucket='/safekeeper/'}\""
depends_on:
- etcd
- minio_create_buckets
compute:
build:
context: ./image/compute
args:
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}:${TAG:-latest}
- http_proxy=$http_proxy
- https_proxy=$https_proxy
environment:
- PG_VERSION=${PG_VERSION:-14}
#- RUST_BACKTRACE=1
volumes:
- ./compute/var/db/postgres/specs/:/var/db/postgres/specs/
- ./compute/shell/:/shell/
ports:
- 55433:55433 # pg protocol handler
- 3080:3080 # http endpoints
entrypoint:
- "/shell/compute.sh"
depends_on:
- safekeeper1
- safekeeper2
- safekeeper3
- pageserver
compute_is_ready:
image: postgres:latest
entrypoint:
- "/bin/bash"
- "-c"
command:
- "until pg_isready -h compute -p 55433 ; do
echo 'Waiting to start compute...' && sleep 1;
done"
depends_on:
- compute

View File

@@ -1,10 +0,0 @@
ARG COMPUTE_IMAGE=compute-node-v14:latest
FROM neondatabase/${COMPUTE_IMAGE}
USER root
RUN apt-get update && \
apt-get install -y curl \
jq \
netcat
USER postgres

View File

@@ -80,6 +80,4 @@
- [015-storage-messaging](rfcs/015-storage-messaging.md)
- [016-connection-routing](rfcs/016-connection-routing.md)
- [017-timeline-data-management](rfcs/017-timeline-data-management.md)
- [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
- [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
- [cluster-size-limits](rfcs/cluster-size-limits.md)

View File

@@ -18,67 +18,3 @@ We build all images after a successful `release` tests run and push automaticall
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
2. `neondatabase/neon`
## Docker Compose example
You can see a [docker compose](https://docs.docker.com/compose/) example to create a neon cluster in [/docker-compose/docker-compose.yml](/docker-compose/docker-compose.yml). It creates the following conatainers.
- etcd x 1
- pageserver x 1
- safekeeper x 3
- compute x 1
- MinIO x 1 # This is Amazon S3 compatible object storage
### How to use
1. create containers
You can specify version of neon cluster using following environment values.
- PG_VERSION: postgres version for compute (default is 14)
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags) (default is latest), which is tagged in [CI test](/.github/workflows/build_and_test.yml)
```
$ cd docker-compose/docker-compose.yml
$ docker-compose down # remove the conainers if exists
$ PG_VERSION=15 TAG=2221 docker-compose up --build -d # You can specify the postgres and image version
Creating network "dockercompose_default" with the default driver
Creating dockercompose_etcd3_1 ...
(...omit...)
```
2. connect compute node
```
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
$ psql -h localhost -p 55433 -U cloud_admin
postgres=# CREATE TABLE t(key int primary key, value text);
CREATE TABLE
postgres=# insert into t values(1,1);
INSERT 0 1
postgres=# select * from t;
key | value
-----+-------
1 | 1
(1 row)
```
3. If you want to see the log, you can use `docker-compose logs` command.
```
# check the container name you want to see
$ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
d6968a5ae912 dockercompose_compute "/shell/compute.sh" 5 minutes ago Up 5 minutes 0.0.0.0:3080->3080/tcp, 0.0.0.0:55433->55433/tcp dockercompose_compute_1
(...omit...)
$ docker logs -f dockercompose_compute_1
2022-10-21 06:15:48.757 GMT [56] LOG: connection authorized: user=cloud_admin database=postgres application_name=psql
2022-10-21 06:17:00.307 GMT [56] LOG: [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'
(...omit...)
```
4. If you want to see durable data in MinIO which is s3 compatible storage
Access http://localhost:9001 and sign in.
- Username: `minio`
- Password: `password`
You can see durable pages and WAL data in `neon` bucket.

View File

@@ -1,163 +0,0 @@
# Storage messaging
Safekeepers need to communicate to each other to
* Trim WAL on safekeepers;
* Decide on which SK should push WAL to the S3;
* Decide on when to shut down SK<->pageserver connection;
* Understand state of each other to perform peer recovery;
Pageservers need to communicate to safekeepers to decide which SK should provide
WAL to the pageserver.
This is an iteration on [015-storage-messaging](https://github.com/neondatabase/neon/blob/main/docs/rfcs/015-storage-messaging.md) describing current situation,
potential performance issue and ways to address it.
## Background
What we have currently is very close to etcd variant described in
015-storage-messaging. Basically, we have single `SkTimelineInfo` message
periodically sent by all safekeepers to etcd for each timeline.
* Safekeepers subscribe to it to learn status of peers (currently they subscribe to
'everything', but they can and should fetch data only for timelines they hold).
* Pageserver subscribes to it (separate watch per timeline) to learn safekeepers
positions; based on that, it decides from which safekeepers to pull WAL.
Also, safekeepers use etcd elections API to make sure only single safekeeper
offloads WAL.
It works, and callmemaybe is gone. However, this has a performance
hazard. Currently deployed etcd can do about 6k puts per second (using its own
`benchmark` tool); on my 6 core laptop, while running on tmpfs, this gets to
35k. Making benchmark closer to our usage [etcd watch bench](https://github.com/arssher/etcd-client/blob/watch-bench/examples/watch_bench.rs),
I get ~10k received messages per second with various number of publisher-subscribers
(laptop, tmpfs). Diving this by 12 (3 sks generate msg, 1 ps + 3 sk consume them) we
get about 800 active timelines, if message is sent each second. Not extremely
low, but quite reachable.
A lot of idle watches seem to be ok though -- which is good, as pageserver
subscribes to all its timelines regardless of their activity.
Also, running etcd with fsyncs disabled is messy -- data dir must be wiped on
each restart or there is a risk of corruption errors.
The reason is etcd making much more than what we need; it is a fault tolerant
store with strong consistency, but I claim all we need here is just simplest pub
sub with best effort delivery, because
* We already have centralized source of truth for long running data, like which
tlis are on which nodes -- the console.
* Momentary data (safekeeper/pageserver progress) doesn't make sense to persist.
Instead of putting each change to broker, expecting it to reliably deliver it
is better to just have constant flow of data for active timelines: 1) they
serve as natural heartbeats -- if node can't send, we shouldn't pull WAL from
it 2) it is simpler -- no need to track delivery to/from the broker.
Moreover, latency here is important: the faster we obtain fresh data, the
faster we can switch to proper safekeeper after failure.
* As for WAL offloading leader election, it is trivial to achieve through these
heartbeats -- just take suitable node through deterministic rule (min node
id). Once network is stable, this is a converging process (well, except
complicated failure topology, but even then making it converge is not
hard). Such elections bear some risk of several offloaders running
concurrently for a short period of time, but that's harmless.
Generally, if one needs strong consistency, electing leader per se is not
enough; it must be accompanied with number (logical clock ts), checked at
every action to track causality. s3 doesn't provide CAS, so it can't
differentiate old/new leader, this must be solved differently.
We could use etcd CAS (its most powerful/useful primitive actually) to issue
these leader numbers (and e.g. prefix files in s3), but currently I don't see
need for that.
Obviously best effort pub sub is much more simpler and performant; the one proposed is
## gRPC broker
I took tonic and [prototyped](https://github.com/neondatabase/neon/blob/asher/neon-broker/broker/src/broker.rs) the replacement of functionality we currently use
with grpc streams and tokio mpsc channels. The implementation description is at the file header.
It is just 500 lines of code and core functionality is complete. 1-1 pub sub
gives about 120k received messages per second; having multiple subscribers in
different connecitons quickly scales to 1 million received messages per second.
I had concerns about many concurrent streams in singe connection, but 2^20
subscribers still work (though eat memory, with 10 publishers 20GB are consumed;
in this implementation each publisher holds full copy of all subscribers). There
is `bench.rs` nearby which I used for testing.
`SkTimelineInfo` is wired here, but another message can be added (e.g. if
pageservers want to communicate with each other) with templating.
### Fault tolerance
Since such broker is stateless, we can run it under k8s. Or add proxying to
other members, with best-effort this is simple.
### Security implications
Communication happens in a private network that is not exposed to users;
additionaly we can add auth to the broker.
## Alternative: get existing pub-sub
We could take some existing pub sub solution, e.g. RabbitMQ, Redis. But in this
case IMV simplicity of our own outweights external dependency costs (RabbitMQ is
much more complicated and needs VM; Redis Rust client maintenance is not
ideal...). Also note that projects like CockroachDB and TiDB are based on gRPC
as well.
## Alternative: direct communication
Apart from being transport, broker solves one more task: discovery, i.e. letting
safekeepers and pageservers find each other. We can let safekeepers know, for
each timeline, both other safekeepers for this timeline and pageservers serving
it. In this case direct communication is possible:
- each safekeeper pushes to each other safekeeper status of timelines residing
on both of them, letting remove WAL, decide who offloads, decide on peer
recovery;
- each safekeeper pushes to each pageserver status of timelines residing on
both of them, letting pageserver choose from which sk to pull WAL;
It was mostly described in [014-safekeeper-gossip](https://github.com/neondatabase/neon/blob/main/docs/rfcs/014-safekeepers-gossip.md), but I want to recap on that.
The main pro is less one dependency: less moving parts, easier to run Neon
locally/manually, less places to monitor. Fault tolerance for broker disappears,
no kuber or something. To me this is a big thing.
Also (though not a big thing) idle watches for inactive timelines disappear:
naturally safekeepers learn about compute connection first and start pushing
status to pageserver(s), notifying it should pull.
Importantly, I think that eventually knowing and persisting peers and
pageservers on safekeepers is inevitable:
- Knowing peer safekeepers for the timeline is required for correct
automatic membership change -- new member set must be hardened on old
majority before proceeding. It is required to get rid of sync-safekeepers
as well (peer recovery up to flush_lsn).
- Knowing pageservers where the timeline is attached is needed to
1. Understand when to shut down activity on the timeline, i.e. push data to
the broker. We can have a lot of timelines sleeping quietly which
shouldn't occupy resources.
2. Preserve WAL for these (currently we offload to s3 and take it from there,
but serving locally is better, and we get one less condition on which WAL
can be removed from s3).
I suppose this membership data should be passed to safekeepers directly from the
console because
1. Console is the original source of this data, conceptually this is the
simplest way (rather than passing it through compute or something).
2. We already have similar code for deleting timeline on safekeepers
(and attaching/detaching timeline on pageserver), this is a typical
action -- queue operation against storage node and execute it until it
completes (or timeline is dropped).
Cons of direct communication are
- It is more complicated: each safekeeper should maintain set of peers it talks
to, and set of timelines for each such peer -- they ought to be multiplexed
into single connection.
- Totally, we have O(n^2) connections instead of O(n) with broker schema
(still O(n) on each node). However, these are relatively stable, async and
thus not very expensive, I don't think this is a big problem. Up to 10k
storage nodes I doubt connection overhead would be noticeable.
I'd use gRPC for direct communication, and in this sense gRPC based broker is a
step towards it.

View File

@@ -1,91 +0,0 @@
# Managing Tenant and Timeline lifecycles
## Summary
The pageserver has a Tenant object in memory for each tenant it manages, and a
Timeline for each timeline. There are a lot of tasks that operate on the tenants
and timelines with references to those objects. We have some mechanisms to track
which tasks are operating on each Tenant and Timeline, and to request them to
shutdown when a tenant or timeline is deleted, but it does not cover all uses,
and as a result we have many race conditions around tenant/timeline shutdown.
## Motivation
We have a bunch of race conditions that can produce weird errors and can be hard
to track down.
## Non Goals
This RFC only covers the problem of ensuring that a task/thread isn't operating
on a Tenant or Timeline. It does not cover what states, aside from Active and
non-Active, each Tenant and Timeline should have, or when exactly the transitions
should happen.
## Impacted components (e.g. pageserver, safekeeper, console, etc)
Pageserver. Although I wonder if the safekeeper should have a similar mechanism.
## Current situation
Most pageserver tasks of are managed by task_mgr.rs:
- LibpqEndpointListener
- HttpEndPointListener
- WalReceiverManager and -Connection
- GarbageCollector and Compaction
- InitialLogicalSizeCalculation
In addition to those tasks, the walreceiver performs some direct tokio::spawn
calls to spawn tasks that are not registered with 'task_mgr'. And all of these
tasks can spawn extra operations with tokio spawn_blocking.
Whenever a tenant or timeline is removed from the system, by pageserver
shutdown, delete_timeline or tenant-detach operation, we rely on the task
registry in 'task_mgr.rs' to wait until there are no tasks operating on the
tenant or timeline, before its Tenant/Timeline object is removed. That relies on
each task to register itself with the tenant/timeline ID in
'task_mgr.rs'. However, there are many gaps in that. For example,
GarbageCollection and Compaction tasks are registered with the tenant, but when
they proceed to operate on a particular timeline of the tenant, they don't
register with timeline ID. Because of that, the timeline can be deleted while GC
or compaction is running on it, causing failures in the GC or compaction (see
https://github.com/neondatabase/neon/issues/2442).
Another problem is that the task registry only works for tokio Tasks. There is
no way to register a piece of code that runs inside spawn_blocking(), for
example.
## Proposed implementation
This "voluntary" registration of tasks is fragile. Let's use Rust language features
to enforce that a tenant/timeline cannot be removed from the system when there is
still some code operating on it.
Let's introduce new Guard objects for Tenant and Timeline, and do all actions through
the Guard object. Something like:
TenantActiveGuard: Guard object over Arc<Tenant>. When you acquire the guard,
the code checks that the tenant is in Active state. If it's not, you get an
error. You can change the state of the tenant to Stopping while there are
ActiveTenantGuard objects still on it, to prevent new ActiveTenantGuards from
being acquired, but the Tenant cannot be removed until all the guards are gone.
TenantMaintenanceGuard: Like ActiveTenantGuard, but can be held even when the
tenant is not in Active state. Used for operations like attach/detach. Perhaps
allow only one such guard on a Tenant at a time.
Similarly for Timelines. We don't currentl have a "state" on Timeline, but I think
we need at least two states: Active and Stopping. The Stopping state is used at
deletion, to prevent new TimelineActiveGuards from appearing, while you wait for
existing TimelineActiveGuards to die out.
The shutdown-signaling, using shutdown_watcher() and is_shutdown_requested(),
probably also needs changes to deal with the new Guards. The rule is that if you
have a TenantActiveGuard, and the tenant's state changes from Active to
Stopping, the is_shutdown_requested() function should return true, and
shutdown_watcher() future should return.
This signaling doesn't neessarily need to cover all cases. For example, if you
have a block of code in spawn_blocking(), it might be acceptable if
is_shutdown_requested() doesn't return true even though the tenant is in
Stopping state, as long as the code finishes reasonably fast.

View File

@@ -8,7 +8,7 @@
regex = "1.4.5"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
serde_with = "1.12.0"
once_cell = "1.13.0"
utils = { path = "../utils" }

View File

@@ -29,9 +29,6 @@ pub struct SkTimelineInfo {
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub local_start_lsn: Option<Lsn>,
/// A connection string to use for WAL receiving.
#[serde(default)]
pub safekeeper_connstr: Option<String>,

View File

@@ -77,16 +77,6 @@ pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
];
pub fn set_build_info_metric(revision: &str) {
let metric = register_int_gauge_vec!(
"libmetrics_build_info",
"Build/version information",
&["revision"]
)
.expect("Failed to register build info metric");
metric.with_label_values(&[revision]).set(1);
}
// Records I/O stats in a "cross-platform" way.
// Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.
// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.

View File

@@ -5,11 +5,8 @@ edition = "2021"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0"
serde_with = "1.12.0"
const_format = "0.2.21"
anyhow = { version = "1.0", features = ["backtrace"] }
bytes = "1.0.1"
utils = { path = "../utils" }
postgres_ffi = { path = "../postgres_ffi" }
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -2,7 +2,6 @@ use const_format::formatcp;
/// Public API types
pub mod models;
pub mod reltag;
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");

View File

@@ -7,10 +7,6 @@ use utils::{
lsn::Lsn,
};
use crate::reltag::RelTag;
use anyhow::bail;
use bytes::{Buf, BufMut, Bytes, BytesMut};
/// A state of a tenant in pageserver's memory.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum TenantState {
@@ -23,22 +19,6 @@ pub enum TenantState {
Broken,
}
/// A state of a timeline in pageserver's memory.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum TimelineState {
/// Timeline is fully operational, its background jobs are running.
Active,
/// A timeline is recognized by pageserver, but not yet ready to operate.
/// The status indicates, that the timeline could eventually go back to Active automatically:
/// for example, if the owning tenant goes back to Active again.
Suspended,
/// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to
/// automatically become Active after certain events: only a management call can change this status.
Paused,
/// A timeline is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
Broken,
}
#[serde_as]
#[derive(Serialize, Deserialize)]
pub struct TimelineCreateRequest {
@@ -143,15 +123,9 @@ pub struct TenantInfo {
pub has_in_progress_downloads: Option<bool>,
}
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TimelineInfo {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId,
pub struct LocalTimelineInfo {
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_timeline_id: Option<TimelineId>,
#[serde_as(as = "Option<DisplayFromStr>")]
@@ -175,35 +149,28 @@ pub struct TimelineInfo {
/// the timestamp (in microseconds) of the last received message
pub last_received_msg_ts: Option<u128>,
pub pg_version: u32,
#[serde_as(as = "Option<DisplayFromStr>")]
pub remote_consistent_lsn: Option<Lsn>,
pub awaits_download: bool,
pub state: TimelineState,
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
// compatility with older clients.
pub local: LocalTimelineInfo,
pub remote: RemoteTimelineInfo,
}
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LocalTimelineInfo {
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_timeline_id: Option<TimelineId>,
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_lsn: Option<Lsn>,
pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
}
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RemoteTimelineInfo {
#[serde_as(as = "Option<DisplayFromStr>")]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "DisplayFromStr")]
pub remote_consistent_lsn: Lsn,
pub awaits_download: bool,
}
///
/// This represents the output of the "timeline_detail" API call.
///
#[serde_as]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TimelineInfo {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId,
pub local: Option<LocalTimelineInfo>,
pub remote: Option<RemoteTimelineInfo>,
}
pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
@@ -223,160 +190,3 @@ pub struct FailpointConfig {
pub struct TimelineGcRequest {
pub gc_horizon: Option<u64>,
}
// Wrapped in libpq CopyData
pub enum PagestreamFeMessage {
Exists(PagestreamExistsRequest),
Nblocks(PagestreamNblocksRequest),
GetPage(PagestreamGetPageRequest),
DbSize(PagestreamDbSizeRequest),
}
// Wrapped in libpq CopyData
pub enum PagestreamBeMessage {
Exists(PagestreamExistsResponse),
Nblocks(PagestreamNblocksResponse),
GetPage(PagestreamGetPageResponse),
Error(PagestreamErrorResponse),
DbSize(PagestreamDbSizeResponse),
}
#[derive(Debug)]
pub struct PagestreamExistsRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug)]
pub struct PagestreamNblocksRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug)]
pub struct PagestreamGetPageRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
pub blkno: u32,
}
#[derive(Debug)]
pub struct PagestreamDbSizeRequest {
pub latest: bool,
pub lsn: Lsn,
pub dbnode: u32,
}
#[derive(Debug)]
pub struct PagestreamExistsResponse {
pub exists: bool,
}
#[derive(Debug)]
pub struct PagestreamNblocksResponse {
pub n_blocks: u32,
}
#[derive(Debug)]
pub struct PagestreamGetPageResponse {
pub page: Bytes,
}
#[derive(Debug)]
pub struct PagestreamErrorResponse {
pub message: String,
}
#[derive(Debug)]
pub struct PagestreamDbSizeResponse {
pub db_size: i64,
}
impl PagestreamFeMessage {
pub fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
// TODO these gets can fail
// these correspond to the NeonMessageTag enum in pagestore_client.h
//
// TODO: consider using protobuf or serde bincode for less error prone
// serialization.
let msg_tag = body.get_u8();
match msg_tag {
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
blkno: body.get_u32(),
})),
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
dbnode: body.get_u32(),
})),
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
}
}
}
impl PagestreamBeMessage {
pub fn serialize(&self) -> Bytes {
let mut bytes = BytesMut::new();
match self {
Self::Exists(resp) => {
bytes.put_u8(100); /* tag from pagestore_client.h */
bytes.put_u8(resp.exists as u8);
}
Self::Nblocks(resp) => {
bytes.put_u8(101); /* tag from pagestore_client.h */
bytes.put_u32(resp.n_blocks);
}
Self::GetPage(resp) => {
bytes.put_u8(102); /* tag from pagestore_client.h */
bytes.put(&resp.page[..]);
}
Self::Error(resp) => {
bytes.put_u8(103); /* tag from pagestore_client.h */
bytes.put(resp.message.as_bytes());
bytes.put_u8(0); // null terminator
}
Self::DbSize(resp) => {
bytes.put_u8(104); /* tag from pagestore_client.h */
bytes.put_i64(resp.db_size);
}
}
bytes.into()
}
}

View File

@@ -13,7 +13,7 @@ crc32c = "0.6.0"
hex = "0.4.3"
once_cell = "1.13.0"
log = "0.4.14"
memoffset = "0.7"
memoffset = "0.6.2"
thiserror = "1.0"
serde = { version = "1.0", features = ["derive"] }
utils = { path = "../utils" }
@@ -26,4 +26,4 @@ wal_craft = { path = "wal_craft" }
[build-dependencies]
anyhow = "1.0"
bindgen = "0.61"
bindgen = "0.60.1"

View File

@@ -7,7 +7,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
clap = "4.0"
clap = "3.0"
env_logger = "0.9"
log = "0.4"
once_cell = "1.13.0"

View File

@@ -1,19 +1,68 @@
use anyhow::*;
use clap::{value_parser, Arg, ArgMatches, Command};
use std::{path::PathBuf, str::FromStr};
use clap::{App, Arg, ArgMatches};
use std::str::FromStr;
use wal_craft::*;
fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
.init();
let arg_matches = cli().get_matches();
let type_arg = &Arg::new("type")
.takes_value(true)
.help("Type of WAL to craft")
.possible_values([
Simple::NAME,
LastWalRecordXlogSwitch::NAME,
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
LastWalRecordCrossingSegment::NAME,
])
.required(true);
let arg_matches = App::new("Postgres WAL crafter")
.about("Crafts Postgres databases with specific WAL properties")
.subcommand(
App::new("print-postgres-config")
.about("Print the configuration required for PostgreSQL server before running this script")
)
.subcommand(
App::new("with-initdb")
.about("Craft WAL in a new data directory first initialized with initdb")
.arg(type_arg)
.arg(
Arg::new("datadir")
.takes_value(true)
.help("Data directory for the Postgres server")
.required(true)
)
.arg(
Arg::new("pg-distrib-dir")
.long("pg-distrib-dir")
.takes_value(true)
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
.default_value("/usr/local")
)
.arg(
Arg::new("pg-version")
.long("pg-version")
.help("Postgres version to use for the initial tenant")
.required(true)
.takes_value(true)
)
)
.subcommand(
App::new("in-existing")
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
.arg(type_arg)
.arg(
Arg::new("connection")
.takes_value(true)
.help("Connection string to the Postgres database to populate")
.required(true)
)
)
.get_matches();
let wal_craft = |arg_matches: &ArgMatches, client| {
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches
.get_one::<String>("type")
.map(|s| s.as_str())
.context("'type' is required")?
{
let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
Simple::NAME => Simple::craft(client)?,
LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
@@ -23,12 +72,12 @@ fn main() -> Result<()> {
WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
}
LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
a => panic!("Unknown --type argument: {a}"),
a => panic!("Unknown --type argument: {}", a),
};
for lsn in intermediate_lsns {
println!("intermediate_lsn = {lsn}");
println!("intermediate_lsn = {}", lsn);
}
println!("end_of_wal = {end_of_wal_lsn}");
println!("end_of_wal = {}", end_of_wal_lsn);
Ok(())
};
@@ -36,24 +85,20 @@ fn main() -> Result<()> {
None => panic!("No subcommand provided"),
Some(("print-postgres-config", _)) => {
for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
println!("{cfg}");
println!("{}", cfg);
}
Ok(())
}
Some(("with-initdb", arg_matches)) => {
let cfg = Conf {
pg_version: *arg_matches
.get_one::<u32>("pg-version")
.context("'pg-version' is required")?,
pg_distrib_dir: arg_matches
.get_one::<PathBuf>("pg-distrib-dir")
.context("'pg-distrib-dir' is required")?
.to_owned(),
datadir: arg_matches
.get_one::<PathBuf>("datadir")
.context("'datadir' is required")?
.to_owned(),
pg_version: arg_matches
.value_of("pg-version")
.unwrap()
.parse::<u32>()
.context("Failed to parse postgres version from the argument string")?,
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
datadir: arg_matches.value_of("datadir").unwrap().into(),
};
cfg.initdb()?;
let srv = cfg.start_server()?;
@@ -63,77 +108,9 @@ fn main() -> Result<()> {
}
Some(("in-existing", arg_matches)) => wal_craft(
arg_matches,
&mut postgres::Config::from_str(
arg_matches
.get_one::<String>("connection")
.context("'connection' is required")?,
)
.context(
"'connection' argument value could not be parsed as a postgres connection string",
)?
.connect(postgres::NoTls)?,
&mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
.connect(postgres::NoTls)?,
),
Some(_) => panic!("Unknown subcommand"),
}
}
fn cli() -> Command {
let type_arg = &Arg::new("type")
.help("Type of WAL to craft")
.value_parser([
Simple::NAME,
LastWalRecordXlogSwitch::NAME,
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
LastWalRecordCrossingSegment::NAME,
])
.required(true);
Command::new("Postgres WAL crafter")
.about("Crafts Postgres databases with specific WAL properties")
.subcommand(
Command::new("print-postgres-config")
.about("Print the configuration required for PostgreSQL server before running this script")
)
.subcommand(
Command::new("with-initdb")
.about("Craft WAL in a new data directory first initialized with initdb")
.arg(type_arg)
.arg(
Arg::new("datadir")
.help("Data directory for the Postgres server")
.value_parser(value_parser!(PathBuf))
.required(true)
)
.arg(
Arg::new("pg-distrib-dir")
.long("pg-distrib-dir")
.value_parser(value_parser!(PathBuf))
.help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
.default_value("/usr/local")
)
.arg(
Arg::new("pg-version")
.long("pg-version")
.help("Postgres version to use for the initial tenant")
.value_parser(value_parser!(u32))
.required(true)
)
)
.subcommand(
Command::new("in-existing")
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
.arg(type_arg)
.arg(
Arg::new("connection")
.help("Connection string to the Postgres database to populate")
.required(true)
)
)
}
#[test]
fn verify_cli() {
cli().debug_assert();
}

View File

@@ -37,22 +37,22 @@ pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
});
impl Conf {
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
pub fn pg_distrib_dir(&self) -> PathBuf {
let path = self.pg_distrib_dir.clone();
match self.pg_version {
14 => Ok(path.join(format!("v{}", self.pg_version))),
15 => Ok(path.join(format!("v{}", self.pg_version))),
_ => bail!("Unsupported postgres version: {}", self.pg_version),
14 => path.join(format!("v{}", self.pg_version)),
15 => path.join(format!("v{}", self.pg_version)),
_ => panic!("Unsupported postgres version: {}", self.pg_version),
}
}
fn pg_bin_dir(&self) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir()?.join("bin"))
fn pg_bin_dir(&self) -> PathBuf {
self.pg_distrib_dir().join("bin")
}
fn pg_lib_dir(&self) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir()?.join("lib"))
fn pg_lib_dir(&self) -> PathBuf {
self.pg_distrib_dir().join("lib")
}
pub fn wal_dir(&self) -> PathBuf {
@@ -60,12 +60,12 @@ impl Conf {
}
fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
let path = self.pg_bin_dir()?.join(command);
let path = self.pg_bin_dir().join(command);
ensure!(path.exists(), "Command {:?} does not exist", path);
let mut cmd = Command::new(path);
cmd.env_clear()
.env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
.env("LD_LIBRARY_PATH", self.pg_lib_dir())
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
Ok(cmd)
}

View File

@@ -15,7 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
tokio-util = { version = "0.7", features = ["io"] }
toml_edit = { version = "0.14", features = ["easy"] }
toml_edit = { version = "0.13", features = ["easy"] }
tracing = "0.1.27"
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -16,7 +16,7 @@ use tokio::{
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
};
use tracing::*;
use utils::crashsafe::path_with_suffix_extension;
use utils::crashsafe_dir::path_with_suffix_extension;
use crate::{Download, DownloadError, RemoteObjectId};

View File

@@ -5,7 +5,7 @@ edition = "2021"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0"
serde_with = "1.12.0"
const_format = "0.2.21"
utils = { path = "../utils" }

View File

@@ -1,24 +1,8 @@
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::id::{NodeId, TimelineId};
use utils::{
id::{NodeId, TenantId, TimelineId},
lsn::Lsn,
};
#[serde_as]
#[derive(Serialize, Deserialize)]
pub struct TimelineCreateRequest {
#[serde_as(as = "DisplayFromStr")]
pub tenant_id: TenantId,
#[serde_as(as = "DisplayFromStr")]
pub timeline_id: TimelineId,
pub peer_ids: Option<Vec<NodeId>>,
pub pg_version: u32,
pub system_id: Option<u64>,
pub wal_seg_size: Option<u32>,
#[serde_as(as = "DisplayFromStr")]
pub commit_lsn: Lsn,
// If not passed, it is assigned to the beginning of commit_lsn segment.
pub local_start_lsn: Option<Lsn>,
pub peer_ids: Vec<NodeId>,
}

View File

@@ -19,8 +19,8 @@ thiserror = "1.0"
tokio = { version = "1.17", features = ["macros"]}
tokio-rustls = "0.23"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
nix = "0.25"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
nix = "0.23.0"
signal-hook = "0.3.10"
rand = "0.8.3"
jsonwebtoken = "8"
@@ -28,10 +28,8 @@ hex = { version = "0.4.3", features = ["serde"] }
rustls = "0.20.2"
rustls-split = "0.3.0"
git-version = "0.3.5"
serde_with = "2.0"
serde_with = "1.12.0"
once_cell = "1.13.0"
strum = "0.24"
strum_macros = "0.24"
metrics = { path = "../metrics" }
@@ -42,7 +40,7 @@ byteorder = "1.4.3"
bytes = "1.0.1"
hex-literal = "0.3"
tempfile = "3.2"
criterion = "0.4"
criterion = "0.3"
rustls-pemfile = "1"
[[bench]]

View File

@@ -12,8 +12,16 @@ pub fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
let path = path.as_ref();
fs::create_dir(path)?;
fsync_file_and_parent(path)?;
Ok(())
File::open(path)?.sync_all()?;
if let Some(parent) = path.parent() {
File::open(parent)?.sync_all()
} else {
Err(io::Error::new(
io::ErrorKind::InvalidInput,
"can't find parent",
))
}
}
/// Similar to [`std::fs::create_dir_all`], except we fsync all
@@ -57,12 +65,12 @@ pub fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {
// Fsync the created directories from child to parent.
for &path in dirs_to_create.iter() {
fsync(path)?;
File::open(path)?.sync_all()?;
}
// If we created any new directories, fsync the parent.
if !dirs_to_create.is_empty() {
fsync(path)?;
File::open(path)?.sync_all()?;
}
Ok(())
@@ -84,33 +92,6 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
.with_extension(new_extension.as_ref())
}
pub fn fsync_file_and_parent(file_path: &Path) -> io::Result<()> {
let parent = file_path.parent().ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
format!("File {file_path:?} has no parent"),
)
})?;
fsync(file_path)?;
fsync(parent)?;
Ok(())
}
pub fn fsync(path: &Path) -> io::Result<()> {
File::open(path)
.map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
.and_then(|file| {
file.sync_all().map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to sync file {path:?} data and metadata: {e}"),
)
})
})
.map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
}
#[cfg(test)]
mod tests {
use tempfile::tempdir;

View File

@@ -75,12 +75,6 @@ impl From<[u8; 16]> for Id {
}
}
impl From<Id> for u128 {
fn from(id: Id) -> Self {
u128::from_le_bytes(id.0)
}
}
impl fmt::Display for Id {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.hex_encode())
@@ -142,12 +136,6 @@ macro_rules! id_newtype {
}
}
impl From<$t> for u128 {
fn from(id: $t) -> Self {
u128::from(id.0)
}
}
impl fmt::Display for $t {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)

View File

@@ -22,8 +22,8 @@ pub mod pq_proto;
// dealing with connstring parsing and handy access to it's parts
pub mod connstring;
// helper functions for creating and fsyncing
pub mod crashsafe;
// helper functions for creating and fsyncing directories/trees
pub mod crashsafe_dir;
// common authentication routines
pub mod auth;

View File

@@ -1,35 +1,11 @@
use std::{
fs::{File, OpenOptions},
path::Path,
str::FromStr,
};
use anyhow::{Context, Result};
use strum_macros::{EnumString, EnumVariantNames};
#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
#[strum(serialize_all = "snake_case")]
pub enum LogFormat {
Plain,
Json,
}
impl LogFormat {
pub fn from_config(s: &str) -> anyhow::Result<LogFormat> {
use strum::VariantNames;
LogFormat::from_str(s).with_context(|| {
format!(
"Unrecognized log format. Please specify one of: {:?}",
LogFormat::VARIANTS
)
})
}
}
pub fn init(
log_filename: impl AsRef<Path>,
daemonize: bool,
log_format: LogFormat,
) -> Result<File> {
pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
// Don't open the same file for output multiple times;
// the different fds could overwrite each other's output.
let log_file = OpenOptions::new()
@@ -45,50 +21,22 @@ pub fn init(
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));
let x: File = log_file.try_clone().unwrap();
let base_logger = tracing_subscriber::fmt()
.with_env_filter(env_filter)
.with_target(false)
.with_ansi(false)
.with_writer(move || -> Box<dyn std::io::Write> {
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
// for example to be in line with docker log command which expects logs comimg from stdout
if daemonize {
Box::new(x.try_clone().unwrap())
} else {
Box::new(std::io::stdout())
}
});
.with_target(false) // don't include event targets
.with_ansi(false); // don't use colors in log file;
match log_format {
LogFormat::Json => base_logger.json().init(),
LogFormat::Plain => base_logger.init(),
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
// for example to be in line with docker log command which expects logs comimg from stdout
if daemonize {
let x = log_file.try_clone().unwrap();
base_logger
.with_writer(move || x.try_clone().unwrap())
.init();
} else {
base_logger.init();
}
Ok(log_file)
}
// #[cfg(test)]
// Due to global logger, can't run tests in same process.
// So until there's a non-global one, the tests are in ../tests/ as separate files.
#[macro_export(local_inner_macros)]
macro_rules! test_init_file_logger {
($log_level:expr, $log_format:expr) => {{
use std::str::FromStr;
std::env::set_var("RUST_LOG", $log_level);
let tmp_dir = tempfile::TempDir::new().unwrap();
let log_file_path = tmp_dir.path().join("logfile");
let log_format = $crate::logging::LogFormat::from_str($log_format).unwrap();
let _log_file = $crate::logging::init(&log_file_path, true, log_format).unwrap();
let log_file = std::fs::OpenOptions::new()
.read(true)
.open(&log_file_path)
.unwrap();
log_file
}};
}

View File

@@ -66,11 +66,6 @@ impl Lsn {
(self.0 % seg_sz as u64) as usize
}
/// Compute LSN of the segment start.
pub fn segment_lsn(self, seg_sz: usize) -> Lsn {
Lsn(self.0 - (self.0 % seg_sz as u64))
}
/// Compute the segment number
pub fn segment_number(self, seg_sz: usize) -> u64 {
self.0 / seg_sz as u64

View File

@@ -15,7 +15,7 @@ use std::sync::Arc;
use std::task::Poll;
use tracing::{debug, error, trace};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tokio_rustls::TlsAcceptor;
#[async_trait::async_trait]
@@ -66,8 +66,8 @@ pub enum ProcessMsgResult {
/// Always-writeable sock_split stream.
/// May not be readable. See [`PostgresBackend::take_stream_in`]
pub enum Stream {
Unencrypted(BufReader<tokio::net::TcpStream>),
Tls(Box<tokio_rustls::server::TlsStream<BufReader<tokio::net::TcpStream>>>),
Unencrypted(tokio::net::TcpStream),
Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
Broken,
}
@@ -157,7 +157,7 @@ impl PostgresBackend {
let peer_addr = socket.peer_addr()?;
Ok(Self {
stream: Stream::Unencrypted(BufReader::new(socket)),
stream: Stream::Unencrypted(socket),
buf_out: BytesMut::with_capacity(10 * 1024),
state: ProtoState::Initialization,
md5_salt: [0u8; 4],

View File

@@ -10,7 +10,6 @@ use serde::{Deserialize, Serialize};
use std::{
borrow::Cow,
collections::HashMap,
fmt,
future::Future,
io::{self, Cursor},
str,
@@ -125,19 +124,6 @@ pub struct CancelKeyData {
pub cancel_key: i32,
}
impl fmt::Display for CancelKeyData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let hi = (self.backend_pid as u64) << 32;
let lo = self.cancel_key as u64;
let id = hi | lo;
// This format is more compact and might work better for logs.
f.debug_tuple("CancelKeyData")
.field(&format_args!("{:x}", id))
.finish()
}
}
use rand::distributions::{Distribution, Standard};
impl Distribution<CancelKeyData> for Standard {
fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {

View File

@@ -1,36 +0,0 @@
// This could be in ../src/logging.rs but since the logger is global, these
// can't be run in threads of the same process
use std::fs::File;
use std::io::{BufRead, BufReader, Lines};
use tracing::*;
use utils::test_init_file_logger;
fn read_lines(file: File) -> Lines<BufReader<File>> {
BufReader::new(file).lines()
}
#[test]
fn test_json_format_has_message_and_custom_field() {
std::env::set_var("RUST_LOG", "info");
let log_file = test_init_file_logger!("info", "json");
let custom_field: &str = "hi";
trace!(custom = %custom_field, "test log message");
debug!(custom = %custom_field, "test log message");
info!(custom = %custom_field, "test log message");
warn!(custom = %custom_field, "test log message");
error!(custom = %custom_field, "test log message");
let lines = read_lines(log_file);
for line in lines {
let content = line.unwrap();
let json_object = serde_json::from_str::<serde_json::Value>(&content).unwrap();
assert_eq!(json_object["fields"]["custom"], "hi");
assert_eq!(json_object["fields"]["message"], "test log message");
assert_ne!(json_object["level"], "TRACE");
assert_ne!(json_object["level"], "DEBUG");
}
}

View File

@@ -1,36 +0,0 @@
// This could be in ../src/logging.rs but since the logger is global, these
// can't be run in threads of the same process
use std::fs::File;
use std::io::{BufRead, BufReader, Lines};
use tracing::*;
use utils::test_init_file_logger;
fn read_lines(file: File) -> Lines<BufReader<File>> {
BufReader::new(file).lines()
}
#[test]
fn test_plain_format_has_message_and_custom_field() {
std::env::set_var("RUST_LOG", "warn");
let log_file = test_init_file_logger!("warn", "plain");
let custom_field: &str = "hi";
trace!(custom = %custom_field, "test log message");
debug!(custom = %custom_field, "test log message");
info!(custom = %custom_field, "test log message");
warn!(custom = %custom_field, "test log message");
error!(custom = %custom_field, "test log message");
let lines = read_lines(log_file);
for line in lines {
let content = line.unwrap();
serde_json::from_str::<serde_json::Value>(&content).unwrap_err();
assert!(content.contains("custom=hi"));
assert!(content.contains("test log message"));
assert!(!content.contains("TRACE"));
assert!(!content.contains("DEBUG"));
assert!(!content.contains("INFO"));
}
}

View File

@@ -23,7 +23,7 @@ futures = "0.3.13"
hex = "0.4.3"
hyper = "0.14"
itertools = "0.10.3"
clap = { version = "4.0", features = ["string"] }
clap = "3.0"
daemonize = "0.4.1"
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
@@ -38,25 +38,25 @@ tar = "0.4.33"
humantime = "2.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "2.0"
serde_with = "1.12.0"
humantime-serde = "1.1.1"
pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }
toml_edit = { version = "0.14", features = ["easy"] }
toml_edit = { version = "0.13", features = ["easy"] }
scopeguard = "1.1.0"
const_format = "0.2.21"
tracing = "0.1.36"
signal-hook = "0.3.10"
url = "2"
nix = "0.25"
nix = "0.23"
once_cell = "1.13.0"
crossbeam-utils = "0.8.5"
fail = "0.5.0"
git-version = "0.3.5"
rstar = "0.9.3"
num-traits = "0.2.15"
amplify_num = { git = "https://github.com/hlinnaka/rust-amplify.git", branch = "unsigned-int-perf" }
amplify_num = "0.4.1"
pageserver_api = { path = "../libs/pageserver_api" }
postgres_ffi = { path = "../libs/postgres_ffi" }
@@ -67,13 +67,7 @@ remote_storage = { path = "../libs/remote_storage" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }
close_fds = "0.3.2"
walkdir = "2.3.2"
svg_fmt = "0.4.1"
[dev-dependencies]
criterion = "0.4"
hex-literal = "0.3"
tempfile = "3.2"
[[bench]]
name = "bench_layer_map"
harness = false

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,8 @@ use std::time::SystemTime;
use tar::{Builder, EntryType, Header};
use tracing::*;
use crate::reltag::{RelTag, SlruKind};
use crate::tenant::Timeline;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};

View File

@@ -1,150 +0,0 @@
//! A tool for visualizing the arrangement of layerfiles within a timeline.
//!
//! It reads filenames from stdin and prints a svg on stdout. The image is a plot in
//! page-lsn space, where every delta layer is a rectangle and every image layer is a
//! thick line. Legend:
//! - The x axis (left to right) represents page index.
//! - The y axis represents LSN, growing upwards.
//!
//! Coordinates in both axis are compressed for better readability.
//! (see https://medium.com/algorithms-digest/coordinate-compression-2fff95326fb)
//!
//! Example use:
//! ```
//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
//! $ firefox out.svg
//! ```
//!
//! This API was chosen so that we can easily work with filenames extracted from ssh,
//! or from pageserver log files.
//!
//! TODO Consider shipping this as a grafana panel plugin:
//! https://grafana.com/tutorials/build-a-panel-plugin/
use anyhow::Result;
use pageserver::repository::Key;
use std::cmp::Ordering;
use std::io::{self, BufRead};
use std::{
collections::{BTreeMap, BTreeSet},
ops::Range,
};
use svg_fmt::{rectangle, rgb, BeginSvg, EndSvg, Fill, Stroke};
use utils::{lsn::Lsn, project_git_version};
project_git_version!(GIT_VERSION);
// Map values to their compressed coordinate - the index the value
// would have in a sorted and deduplicated list of all values.
fn build_coordinate_compression_map<T: Ord + Copy>(coords: Vec<T>) -> BTreeMap<T, usize> {
let set: BTreeSet<T> = coords.into_iter().collect();
let mut map: BTreeMap<T, usize> = BTreeMap::new();
for (i, e) in set.iter().enumerate() {
map.insert(*e, i);
}
map
}
fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
let split: Vec<&str> = name.split("__").collect();
let keys: Vec<&str> = split[0].split('-').collect();
let mut lsns: Vec<&str> = split[1].split('-').collect();
if lsns.len() == 1 {
lsns.push(lsns[0]);
}
let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
(keys, lsns)
}
fn main() -> Result<()> {
// Parse layer filenames from stdin
let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
let stdin = io::stdin();
for line in stdin.lock().lines() {
let range = parse_filename(&line.unwrap());
ranges.push(range);
}
// Collect all coordinates
let mut keys: Vec<Key> = vec![];
let mut lsns: Vec<Lsn> = vec![];
for (keyr, lsnr) in &ranges {
keys.push(keyr.start);
keys.push(keyr.end);
lsns.push(lsnr.start);
lsns.push(lsnr.end);
}
// Analyze
let key_map = build_coordinate_compression_map(keys);
let lsn_map = build_coordinate_compression_map(lsns);
// Initialize stats
let mut num_deltas = 0;
let mut num_images = 0;
// Draw
let stretch = 3.0; // Stretch out vertically for better visibility
println!(
"{}",
BeginSvg {
w: key_map.len() as f32,
h: stretch * lsn_map.len() as f32
}
);
for (keyr, lsnr) in &ranges {
let key_start = *key_map.get(&keyr.start).unwrap();
let key_end = *key_map.get(&keyr.end).unwrap();
let key_diff = key_end - key_start;
let lsn_max = lsn_map.len();
if key_start >= key_end {
panic!("Invalid key range {}-{}", key_start, key_end);
}
let lsn_start = *lsn_map.get(&lsnr.start).unwrap();
let lsn_end = *lsn_map.get(&lsnr.end).unwrap();
let mut lsn_diff = (lsn_end - lsn_start) as f32;
let mut fill = Fill::None;
let mut margin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas
let mut lsn_offset = 0.0;
// Fill in and thicken rectangle if it's an
// image layer so that we can see it.
match lsn_start.cmp(&lsn_end) {
Ordering::Less => num_deltas += 1,
Ordering::Equal => {
num_images += 1;
lsn_diff = 0.3;
lsn_offset = -lsn_diff / 2.0;
margin = 0.05;
fill = Fill::Color(rgb(0, 0, 0));
}
Ordering::Greater => panic!("Invalid lsn range {}-{}", lsn_start, lsn_end),
}
println!(
" {}",
rectangle(
key_start as f32 + stretch * margin,
stretch * (lsn_max as f32 - (lsn_end as f32 - margin - lsn_offset)),
key_diff as f32 - stretch * 2.0 * margin,
stretch * (lsn_diff - 2.0 * margin)
)
.fill(fill)
.stroke(Stroke::Color(rgb(0, 0, 0), 0.1))
.border_radius(0.4)
);
}
println!("{}", EndSvg);
eprintln!("num_images: {}", num_images);
eprintln!("num_deltas: {}", num_deltas);
Ok(())
}

View File

@@ -0,0 +1,35 @@
//! Main entry point for the dump_layerfile executable
//!
//! A handy tool for debugging, that's all.
use anyhow::Result;
use clap::{App, Arg};
use pageserver::page_cache;
use pageserver::tenant::dump_layerfile_from_path;
use pageserver::virtual_file;
use std::path::PathBuf;
use utils::project_git_version;
project_git_version!(GIT_VERSION);
fn main() -> Result<()> {
let arg_matches = App::new("Neon dump_layerfile utility")
.about("Dump contents of one layer file, for debugging")
.version(GIT_VERSION)
.arg(
Arg::new("path")
.help("Path to file to dump")
.required(true)
.index(1),
)
.get_matches();
let path = PathBuf::from(arg_matches.value_of("path").unwrap());
// Basic initialization of things that don't change after startup
virtual_file::init(10);
page_cache::init(100);
dump_layerfile_from_path(&path, true)?;
Ok(())
}

View File

@@ -6,15 +6,13 @@ use tracing::*;
use anyhow::{anyhow, bail, Context, Result};
use clap::{Arg, ArgAction, Command};
use clap::{App, Arg};
use daemonize::Daemonize;
use fail::FailScenario;
use metrics::set_build_info_metric;
use pageserver::{
config::{defaults::*, PageServerConf},
http, page_cache, page_image_cache, page_service, profiling, task_mgr,
http, page_cache, page_service, profiling, task_mgr,
task_mgr::TaskKind,
task_mgr::{
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
@@ -33,35 +31,72 @@ use utils::{
project_git_version!(GIT_VERSION);
const FEATURES: &[&str] = &[
#[cfg(feature = "testing")]
"testing",
#[cfg(feature = "fail/failpoints")]
"fail/failpoints",
#[cfg(feature = "profiling")]
"profiling",
];
fn version() -> String {
format!(
"{GIT_VERSION} failpoints: {}, features: {:?}",
fail::has_failpoints(),
FEATURES,
"{GIT_VERSION} profiling:{} failpoints:{}",
cfg!(feature = "profiling"),
fail::has_failpoints()
)
}
fn main() -> anyhow::Result<()> {
let arg_matches = cli().get_matches();
let arg_matches = App::new("Neon page server")
.about("Materializes WAL stream to pages and serves them to the postgres")
.version(&*version())
.arg(
if arg_matches.get_flag("enabled-features") {
println!("{{\"features\": {FEATURES:?} }}");
Arg::new("daemonize")
.short('d')
.long("daemonize")
.takes_value(false)
.help("Run in the background"),
)
.arg(
Arg::new("init")
.long("init")
.takes_value(false)
.help("Initialize pageserver with all given config overrides"),
)
.arg(
Arg::new("workdir")
.short('D')
.long("workdir")
.takes_value(true)
.help("Working directory for the pageserver"),
)
// See `settings.md` for more details on the extra configuration patameters pageserver can process
.arg(
Arg::new("config-override")
.short('c')
.takes_value(true)
.number_of_values(1)
.multiple_occurrences(true)
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
)
.arg(Arg::new("update-config").long("update-config").takes_value(false).help(
"Update the config file when started",
))
.arg(
Arg::new("enabled-features")
.long("enabled-features")
.takes_value(false)
.help("Show enabled compile time features"),
)
.get_matches();
if arg_matches.is_present("enabled-features") {
let features: &[&str] = &[
#[cfg(feature = "testing")]
"testing",
#[cfg(feature = "profiling")]
"profiling",
];
println!("{{\"features\": {features:?} }}");
return Ok(());
}
let workdir = arg_matches
.get_one::<String>("workdir")
.map(Path::new)
.unwrap_or_else(|| Path::new(".neon"));
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
let workdir = workdir
.canonicalize()
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
@@ -75,7 +110,7 @@ fn main() -> anyhow::Result<()> {
)
})?;
let daemonize = arg_matches.get_flag("daemonize");
let daemonize = arg_matches.is_present("daemonize");
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
ControlFlow::Continue(conf) => conf,
@@ -87,7 +122,7 @@ fn main() -> anyhow::Result<()> {
let tenants_path = conf.tenants_path();
if !tenants_path.exists() {
utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
format!(
"Failed to create tenants root dir at '{}'",
tenants_path.display()
@@ -101,7 +136,6 @@ fn main() -> anyhow::Result<()> {
// Basic initialization of things that don't change after startup
virtual_file::init(conf.max_file_descriptors);
page_cache::init(conf.page_cache_size);
page_image_cache::init(64 * conf.page_cache_size); // temporary hack for benchmarking
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
@@ -114,8 +148,8 @@ fn initialize_config(
arg_matches: clap::ArgMatches,
workdir: &Path,
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
let init = arg_matches.get_flag("init");
let update_config = init || arg_matches.get_flag("update-config");
let init = arg_matches.is_present("init");
let update_config = init || arg_matches.is_present("update-config");
let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
if init {
@@ -157,10 +191,13 @@ fn initialize_config(
)
};
if let Some(values) = arg_matches.get_many::<String>("config-override") {
if let Some(values) = arg_matches.values_of("config-override") {
for option_line in values {
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
format!("Option '{option_line}' could not be parsed as a toml document")
format!(
"Option '{}' could not be parsed as a toml document",
option_line
)
})?;
for (key, item) in doc.iter() {
@@ -200,9 +237,9 @@ fn initialize_config(
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
// Initialize logger
let log_file = logging::init(LOG_FILE_NAME, daemonize, conf.log_format)?;
let log_file = logging::init(LOG_FILE_NAME, daemonize)?;
info!("version: {}", version());
info!("version: {GIT_VERSION}");
// TODO: Check that it looks like a valid repository before going further
@@ -319,8 +356,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
},
);
set_build_info_metric(GIT_VERSION);
// All started up! Now just sit and wait for shutdown signal.
signals.handle(|signal| match signal {
Signal::Quit => {
@@ -343,55 +378,3 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
}
})
}
fn cli() -> Command {
Command::new("Neon page server")
.about("Materializes WAL stream to pages and serves them to the postgres")
.version(version())
.arg(
Arg::new("daemonize")
.short('d')
.long("daemonize")
.action(ArgAction::SetTrue)
.help("Run in the background"),
)
.arg(
Arg::new("init")
.long("init")
.action(ArgAction::SetTrue)
.help("Initialize pageserver with all given config overrides"),
)
.arg(
Arg::new("workdir")
.short('D')
.long("workdir")
.help("Working directory for the pageserver"),
)
// See `settings.md` for more details on the extra configuration patameters pageserver can process
.arg(
Arg::new("config-override")
.short('c')
.num_args(1)
.action(ArgAction::Append)
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
)
.arg(
Arg::new("update-config")
.long("update-config")
.action(ArgAction::SetTrue)
.help("Update the config file when started"),
)
.arg(
Arg::new("enabled-features")
.long("enabled-features")
.action(ArgAction::SetTrue)
.help("Show enabled compile time features"),
)
}
#[test]
fn verify_cli() {
cli().debug_assert();
}

View File

@@ -1,154 +0,0 @@
//! A helper tool to manage pageserver binary files.
//! Accepts a file as an argument, attempts to parse it with all ways possible
//! and prints its interpreted context.
//!
//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
use std::{
path::{Path, PathBuf},
str::FromStr,
};
use anyhow::Context;
use clap::{value_parser, Arg, Command};
use pageserver::{
page_cache,
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
virtual_file,
};
use postgres_ffi::ControlFileData;
use utils::{lsn::Lsn, project_git_version};
project_git_version!(GIT_VERSION);
const METADATA_SUBCOMMAND: &str = "metadata";
fn main() -> anyhow::Result<()> {
let arg_matches = cli().get_matches();
match arg_matches.subcommand() {
Some((subcommand_name, subcommand_matches)) => {
let path = subcommand_matches
.get_one::<PathBuf>("metadata_path")
.context("'metadata_path' argument is missing")?
.to_path_buf();
anyhow::ensure!(
subcommand_name == METADATA_SUBCOMMAND,
"Unknown subcommand {subcommand_name}"
);
handle_metadata(&path, subcommand_matches)?;
}
None => {
let path = arg_matches
.get_one::<PathBuf>("path")
.context("'path' argument is missing")?
.to_path_buf();
println!(
"No subcommand specified, attempting to guess the format for file {}",
path.display()
);
if let Err(e) = read_pg_control_file(&path) {
println!(
"Failed to read input file as a pg control one: {e:#}\n\
Attempting to read it as layer file"
);
print_layerfile(&path)?;
}
}
};
Ok(())
}
fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
let control_file = ControlFileData::decode(&std::fs::read(&control_file_path)?)?;
println!("{control_file:?}");
let control_file_initdb = Lsn(control_file.checkPoint);
println!(
"pg_initdb_lsn: {}, aligned: {}",
control_file_initdb,
control_file_initdb.align()
);
Ok(())
}
fn print_layerfile(path: &Path) -> anyhow::Result<()> {
// Basic initialization of things that don't change after startup
virtual_file::init(10);
page_cache::init(100);
dump_layerfile_from_path(path, true)
}
fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), anyhow::Error> {
let metadata_bytes = std::fs::read(&path)?;
let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
println!("Current metadata:\n{meta:?}");
let mut update_meta = false;
if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
meta = TimelineMetadata::new(
Lsn::from_str(disk_consistent_lsn)?,
meta.prev_record_lsn(),
meta.ancestor_timeline(),
meta.ancestor_lsn(),
meta.latest_gc_cutoff_lsn(),
meta.initdb_lsn(),
meta.pg_version(),
);
update_meta = true;
}
if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
meta = TimelineMetadata::new(
meta.disk_consistent_lsn(),
Some(Lsn::from_str(prev_record_lsn)?),
meta.ancestor_timeline(),
meta.ancestor_lsn(),
meta.latest_gc_cutoff_lsn(),
meta.initdb_lsn(),
meta.pg_version(),
);
update_meta = true;
}
if update_meta {
let metadata_bytes = meta.to_bytes()?;
std::fs::write(&path, &metadata_bytes)?;
}
Ok(())
}
fn cli() -> Command {
Command::new("Neon Pageserver binutils")
.about("Reads pageserver (and related) binary files management utility")
.version(GIT_VERSION)
.arg(
Arg::new("path")
.help("Input file path")
.value_parser(value_parser!(PathBuf))
.required(false),
)
.subcommand(
Command::new(METADATA_SUBCOMMAND)
.about("Read and update pageserver metadata file")
.arg(
Arg::new("metadata_path")
.help("Input metadata file path")
.value_parser(value_parser!(PathBuf))
.required(false),
)
.arg(
Arg::new("disk_consistent_lsn")
.long("disk_consistent_lsn")
.help("Replace disk consistent Lsn"),
)
.arg(
Arg::new("prev_record_lsn")
.long("prev_record_lsn")
.help("Replace previous record Lsn"),
),
)
}
#[test]
fn verify_cli() {
cli().debug_assert();
}

View File

@@ -0,0 +1,75 @@
//! Main entry point for the edit_metadata executable
//!
//! A handy tool for debugging, that's all.
use anyhow::Result;
use clap::{App, Arg};
use pageserver::tenant::metadata::TimelineMetadata;
use std::path::PathBuf;
use std::str::FromStr;
use utils::{lsn::Lsn, project_git_version};
project_git_version!(GIT_VERSION);
fn main() -> Result<()> {
let arg_matches = App::new("Neon update metadata utility")
.about("Dump or update metadata file")
.version(GIT_VERSION)
.arg(
Arg::new("path")
.help("Path to metadata file")
.required(true),
)
.arg(
Arg::new("disk_lsn")
.short('d')
.long("disk_lsn")
.takes_value(true)
.help("Replace disk constistent lsn"),
)
.arg(
Arg::new("prev_lsn")
.short('p')
.long("prev_lsn")
.takes_value(true)
.help("Previous record LSN"),
)
.get_matches();
let path = PathBuf::from(arg_matches.value_of("path").unwrap());
let metadata_bytes = std::fs::read(&path)?;
let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
println!("Current metadata:\n{:?}", &meta);
let mut update_meta = false;
if let Some(disk_lsn) = arg_matches.value_of("disk_lsn") {
meta = TimelineMetadata::new(
Lsn::from_str(disk_lsn)?,
meta.prev_record_lsn(),
meta.ancestor_timeline(),
meta.ancestor_lsn(),
meta.latest_gc_cutoff_lsn(),
meta.initdb_lsn(),
meta.pg_version(),
);
update_meta = true;
}
if let Some(prev_lsn) = arg_matches.value_of("prev_lsn") {
meta = TimelineMetadata::new(
meta.disk_consistent_lsn(),
Some(Lsn::from_str(prev_lsn)?),
meta.ancestor_timeline(),
meta.ancestor_lsn(),
meta.latest_gc_cutoff_lsn(),
meta.initdb_lsn(),
meta.pg_version(),
);
update_meta = true;
}
if update_meta {
let metadata_bytes = meta.to_bytes()?;
std::fs::write(&path, &metadata_bytes)?;
}
Ok(())
}

View File

@@ -7,7 +7,6 @@
use anyhow::{anyhow, bail, ensure, Context, Result};
use remote_storage::RemoteStorageConfig;
use std::env;
use utils::crashsafe::path_with_suffix_extension;
use std::path::{Path, PathBuf};
use std::str::FromStr;
@@ -17,7 +16,6 @@ use toml_edit::{Document, Item};
use url::Url;
use utils::{
id::{NodeId, TenantId, TimelineId},
logging::LogFormat,
postgres_backend::AuthType,
};
@@ -26,7 +24,6 @@ use crate::tenant_config::{TenantConf, TenantConfOpt};
/// The name of the metadata file pageserver creates per timeline.
pub const METADATA_FILE_NAME: &str = "metadata";
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
const TENANT_CONFIG_NAME: &str = "config";
pub mod defaults {
@@ -46,8 +43,6 @@ pub mod defaults {
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
pub const DEFAULT_LOG_FORMAT: &str = "plain";
///
/// Default built-in configuration file.
///
@@ -66,7 +61,6 @@ pub mod defaults {
# initial superuser role name to use when creating a new tenant
#initial_superuser_name = '{DEFAULT_SUPERUSER}'
#log_format = '{DEFAULT_LOG_FORMAT}'
# [tenant_config]
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -130,8 +124,6 @@ pub struct PageServerConf {
/// Etcd broker endpoints to connect to.
pub broker_endpoints: Vec<Url>,
pub log_format: LogFormat,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -198,8 +190,6 @@ struct PageServerConfigBuilder {
profiling: BuilderValue<ProfilingConfig>,
broker_etcd_prefix: BuilderValue<String>,
broker_endpoints: BuilderValue<Vec<Url>>,
log_format: BuilderValue<LogFormat>,
}
impl Default for PageServerConfigBuilder {
@@ -227,7 +217,6 @@ impl Default for PageServerConfigBuilder {
profiling: Set(ProfilingConfig::Disabled),
broker_etcd_prefix: Set(etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string()),
broker_endpoints: Set(Vec::new()),
log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
}
}
}
@@ -300,10 +289,6 @@ impl PageServerConfigBuilder {
self.profiling = BuilderValue::Set(profiling)
}
pub fn log_format(&mut self, log_format: LogFormat) {
self.log_format = BuilderValue::Set(log_format)
}
pub fn build(self) -> anyhow::Result<PageServerConf> {
let broker_endpoints = self
.broker_endpoints
@@ -348,7 +333,6 @@ impl PageServerConfigBuilder {
broker_etcd_prefix: self
.broker_etcd_prefix
.ok_or(anyhow!("missing broker_etcd_prefix"))?,
log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
})
}
}
@@ -380,17 +364,6 @@ impl PageServerConf {
self.timelines_path(tenant_id).join(timeline_id.to_string())
}
pub fn timeline_uninit_mark_file_path(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> PathBuf {
path_with_suffix_extension(
self.timeline_path(&timeline_id, &tenant_id),
TIMELINE_UNINIT_MARK_SUFFIX,
)
}
/// Points to a place in pageserver's local directory,
/// where certain timeline's metadata file should be located.
pub fn metadata_path(&self, timeline_id: TimelineId, tenant_id: TenantId) -> PathBuf {
@@ -401,28 +374,28 @@ impl PageServerConf {
//
// Postgres distribution paths
//
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_distrib_dir(&self, pg_version: u32) -> PathBuf {
let path = self.pg_distrib_dir.clone();
match pg_version {
14 => Ok(path.join(format!("v{pg_version}"))),
15 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => path.join(format!("v{pg_version}")),
15 => path.join(format!("v{pg_version}")),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_bin_dir(&self, pg_version: u32) -> PathBuf {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => self.pg_distrib_dir(pg_version).join("bin"),
15 => self.pg_distrib_dir(pg_version).join("bin"),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
pub fn pg_lib_dir(&self, pg_version: u32) -> PathBuf {
match pg_version {
14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
_ => bail!("Unsupported postgres version: {}", pg_version),
14 => self.pg_distrib_dir(pg_version).join("lib"),
15 => self.pg_distrib_dir(pg_version).join("lib"),
_ => panic!("Unsupported postgres version: {}", pg_version),
}
}
@@ -473,9 +446,6 @@ impl PageServerConf {
})
.collect::<anyhow::Result<_>>()?,
),
"log_format" => builder.log_format(
LogFormat::from_config(&parse_toml_string(key, item)?)?
),
_ => bail!("unrecognized pageserver option '{key}'"),
}
}
@@ -588,7 +558,6 @@ impl PageServerConf {
default_tenant_conf: TenantConf::dummy_conf(),
broker_endpoints: Vec::new(),
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
}
}
}
@@ -683,8 +652,6 @@ max_file_descriptors = 333
initial_superuser_name = 'zzzz'
id = 10
log_format = 'json'
"#;
#[test]
@@ -724,7 +691,6 @@ log_format = 'json'
.parse()
.expect("Failed to parse a valid broker endpoint URL")],
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
},
"Correct defaults should be used when no config values are provided"
);
@@ -769,7 +735,6 @@ log_format = 'json'
.parse()
.expect("Failed to parse a valid broker endpoint URL")],
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
log_format: LogFormat::Json,
},
"Should be able to parse all basic config values correctly"
);

View File

@@ -1,11 +1,7 @@
openapi: "3.0.2"
info:
title: Page Server API
description: Neon Pageserver API
version: "1.0"
license:
name: "Apache"
url: https://github.com/neondatabase/neon/blob/main/LICENSE
servers:
- url: ""
paths:
@@ -211,6 +207,7 @@ paths:
schema:
$ref: "#/components/schemas/Error"
/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:
parameters:
- name: tenant_id
@@ -615,10 +612,6 @@ components:
required:
- timeline_id
- tenant_id
- last_record_lsn
- disk_consistent_lsn
- awaits_download
- state
properties:
timeline_id:
type: string
@@ -626,15 +619,33 @@ components:
tenant_id:
type: string
format: hex
local:
$ref: "#/components/schemas/LocalTimelineInfo"
remote:
$ref: "#/components/schemas/RemoteTimelineInfo"
RemoteTimelineInfo:
type: object
required:
- awaits_download
- remote_consistent_lsn
properties:
awaits_download:
type: boolean
remote_consistent_lsn:
type: string
format: hex
LocalTimelineInfo:
type: object
required:
- last_record_lsn
- disk_consistent_lsn
properties:
last_record_lsn:
type: string
format: hex
disk_consistent_lsn:
type: string
format: hex
remote_consistent_lsn:
type: string
format: hex
ancestor_timeline_id:
type: string
format: hex
@@ -659,41 +670,7 @@ components:
format: hex
last_received_msg_ts:
type: integer
awaits_download:
type: boolean
state:
type: string
# These 'local' and 'remote' fields just duplicate some of the fields
# above. They are kept for backwards-compatibility. They can be removed,
# when the control plane has been updated to look at the above fields
# directly.
local:
$ref: "#/components/schemas/LocalTimelineInfo"
remote:
$ref: "#/components/schemas/RemoteTimelineInfo"
LocalTimelineInfo:
type: object
properties:
ancestor_timeline_id:
type: string
format: hex
ancestor_lsn:
type: string
format: hex
current_logical_size:
type: integer
current_physical_size:
type: integer
RemoteTimelineInfo:
type: object
required:
- remote_consistent_lsn
properties:
remote_consistent_lsn:
type: string
format: hex
Error:
type: object
required:

View File

@@ -79,13 +79,13 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
get_state(request).conf
}
// Helper function to construct a TimelineInfo struct for a timeline
async fn build_timeline_info(
state: &State,
// Helper functions to construct a LocalTimelineInfo struct for a timeline
fn local_timeline_info_from_timeline(
timeline: &Arc<Timeline>,
include_non_incremental_logical_size: bool,
include_non_incremental_physical_size: bool,
) -> anyhow::Result<TimelineInfo> {
) -> anyhow::Result<LocalTimelineInfo> {
let last_record_lsn = timeline.get_last_record_lsn();
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
let guard = timeline.last_received_wal.lock().unwrap();
@@ -100,48 +100,24 @@ async fn build_timeline_info(
}
};
let (remote_consistent_lsn, awaits_download) = if let Some(remote_entry) = state
.remote_index
.read()
.await
.timeline_entry(&TenantTimelineId {
tenant_id: timeline.tenant_id,
timeline_id: timeline.timeline_id,
}) {
(
Some(remote_entry.metadata.disk_consistent_lsn()),
remote_entry.awaits_download,
)
} else {
(None, false)
};
let ancestor_timeline_id = timeline.get_ancestor_timeline_id();
let ancestor_lsn = match timeline.get_ancestor_lsn() {
Lsn(0) => None,
lsn @ Lsn(_) => Some(lsn),
};
let current_logical_size = match timeline.get_current_logical_size() {
Ok(size) => Some(size),
Err(err) => {
error!("Timeline info creation failed to get current logical size: {err:?}");
None
}
};
let current_physical_size = Some(timeline.get_physical_size());
let state = timeline.current_state();
let info = TimelineInfo {
tenant_id: timeline.tenant_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id,
ancestor_lsn,
let info = LocalTimelineInfo {
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
ancestor_lsn: {
match timeline.get_ancestor_lsn() {
Lsn(0) => None,
lsn @ Lsn(_) => Some(lsn),
}
},
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
last_record_lsn,
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
current_logical_size,
current_physical_size,
current_logical_size: Some(
timeline
.get_current_logical_size()
.context("Timeline info creation failed to get current logical size")?,
),
current_physical_size: Some(timeline.get_physical_size()),
current_logical_size_non_incremental: if include_non_incremental_logical_size {
Some(timeline.get_current_logical_size_non_incremental(last_record_lsn)?)
} else {
@@ -156,26 +132,32 @@ async fn build_timeline_info(
last_received_msg_lsn,
last_received_msg_ts,
pg_version: timeline.pg_version,
remote_consistent_lsn,
awaits_download,
state,
// Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
// with the control plane.
local: LocalTimelineInfo {
ancestor_timeline_id,
ancestor_lsn,
current_logical_size,
current_physical_size,
},
remote: RemoteTimelineInfo {
remote_consistent_lsn,
},
};
Ok(info)
}
fn list_local_timelines(
tenant_id: TenantId,
include_non_incremental_logical_size: bool,
include_non_incremental_physical_size: bool,
) -> Result<Vec<(TimelineId, LocalTimelineInfo)>> {
let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
let timelines = tenant.list_timelines();
let mut local_timeline_info = Vec::with_capacity(timelines.len());
for (timeline_id, repository_timeline) in timelines {
local_timeline_info.push((
timeline_id,
local_timeline_info_from_timeline(
&repository_timeline,
include_non_incremental_logical_size,
include_non_incremental_physical_size,
)?,
))
}
Ok(local_timeline_info)
}
// healthcheck handler
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let config = get_config(&request);
@@ -187,8 +169,6 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
check_permission(&request, Some(tenant_id))?;
let state = get_state(&request);
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
let new_timeline_info = async {
match tenant.create_timeline(
@@ -199,10 +179,14 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
).await {
Ok(Some(new_timeline)) => {
// Created. Construct a TimelineInfo for it.
let timeline_info = build_timeline_info(state, &new_timeline, false, false)
.await
let local_info = local_timeline_info_from_timeline(&new_timeline, false, false)
.map_err(ApiError::InternalServerError)?;
Ok(Some(timeline_info))
Ok(Some(TimelineInfo {
tenant_id,
timeline_id: new_timeline.timeline_id,
local: Some(local_info),
remote: None,
}))
}
Ok(None) => Ok(None), // timeline already exists
Err(err) => Err(ApiError::InternalServerError(err)),
@@ -225,8 +209,6 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
query_param_present(&request, "include-non-incremental-physical-size");
check_permission(&request, Some(tenant_id))?;
let state = get_state(&request);
let timelines = tokio::task::spawn_blocking(move || {
let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
@@ -236,18 +218,36 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
let mut response_data = Vec::with_capacity(timelines.len());
for timeline in timelines {
let timeline_info = build_timeline_info(
state,
for (timeline_id, timeline) in timelines {
let local = match local_timeline_info_from_timeline(
&timeline,
include_non_incremental_logical_size,
include_non_incremental_physical_size,
)
.await
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
.map_err(ApiError::InternalServerError)?;
) {
Ok(local) => Some(local),
Err(e) => {
error!("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}");
None
}
};
response_data.push(timeline_info);
response_data.push(TimelineInfo {
tenant_id,
timeline_id,
local,
remote: get_state(&request)
.remote_index
.read()
.await
.timeline_entry(&TenantTimelineId {
tenant_id,
timeline_id,
})
.map(|remote_entry| RemoteTimelineInfo {
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
awaits_download: remote_entry.awaits_download,
}),
})
}
json_response(StatusCode::OK, response_data)
@@ -292,33 +292,59 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
query_param_present(&request, "include-non-incremental-physical-size");
check_permission(&request, Some(tenant_id))?;
let state = get_state(&request);
let timeline_info = async {
let (local_timeline_info, remote_timeline_info) = async {
let timeline = tokio::task::spawn_blocking(move || {
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id, false)
tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id)
})
.await
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
let timeline = timeline.map_err(ApiError::NotFound)?;
let local_timeline_info = match timeline.and_then(|timeline| {
local_timeline_info_from_timeline(
&timeline,
include_non_incremental_logical_size,
include_non_incremental_physical_size,
)
}) {
Ok(local_info) => Some(local_info),
Err(e) => {
error!("Failed to get local timeline info: {e:#}");
None
}
};
let timeline_info = build_timeline_info(
state,
&timeline,
include_non_incremental_logical_size,
include_non_incremental_physical_size,
)
.await
.context("Failed to get local timeline info: {e:#}")
.map_err(ApiError::InternalServerError)?;
Ok::<_, ApiError>(timeline_info)
let remote_timeline_info = {
let remote_index_read = get_state(&request).remote_index.read().await;
remote_index_read
.timeline_entry(&TenantTimelineId {
tenant_id,
timeline_id,
})
.map(|remote_entry| RemoteTimelineInfo {
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
awaits_download: remote_entry.awaits_download,
})
};
Ok::<_, ApiError>((local_timeline_info, remote_timeline_info))
}
.instrument(info_span!("timeline_detail", tenant = %tenant_id, timeline = %timeline_id))
.await?;
json_response(StatusCode::OK, timeline_info)
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
Err(ApiError::NotFound(anyhow!(
"Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
)))
} else {
json_response(
StatusCode::OK,
TimelineInfo {
tenant_id,
timeline_id,
local: local_timeline_info,
remote: remote_timeline_info,
},
)
}
}
async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -333,13 +359,14 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
let timeline = tenant_mgr::get_tenant(tenant_id, true)
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
.and_then(|tenant| tenant.get_timeline(timeline_id))
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
.map_err(ApiError::NotFound)?;
let result = match timeline
.find_lsn_for_timestamp(timestamp_pg)
.map_err(ApiError::InternalServerError)?
{
LsnForTimestamp::Present(lsn) => format!("{lsn}"),
LsnForTimestamp::Present(lsn) => format!("{}", lsn),
LsnForTimestamp::Future(_lsn) => "future".into(),
LsnForTimestamp::Past(_lsn) => "past".into(),
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
@@ -387,7 +414,7 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
}
return json_response(StatusCode::ACCEPTED, ());
}
// no tenant in the index, release the lock to make the potentially lengthy download operation
// no tenant in the index, release the lock to make the potentially lengthy download opetation
drop(index_accessor);
// download index parts for every tenant timeline
@@ -539,27 +566,36 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
false
});
let (tenant_state, current_physical_size) = match tenant {
Ok(tenant) => {
let timelines = tenant.list_timelines();
// Calculate total physical size of all timelines
let mut current_physical_size = 0;
for timeline in timelines {
current_physical_size += timeline.get_physical_size();
}
(tenant.current_state(), Some(current_physical_size))
}
let tenant_state = match tenant {
Ok(tenant) => tenant.current_state(),
Err(e) => {
error!("Failed to get local tenant state: {e:#}");
if has_in_progress_downloads {
(TenantState::Paused, None)
TenantState::Paused
} else {
(TenantState::Broken, None)
TenantState::Broken
}
}
};
let current_physical_size =
match tokio::task::spawn_blocking(move || list_local_timelines(tenant_id, false, false))
.await
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
{
Err(err) => {
// Getting local timelines can fail when no local tenant directory is on disk (e.g, when tenant data is being downloaded).
// In that case, put a warning message into log and operate normally.
warn!("Failed to get local timelines for tenant {tenant_id}: {err}");
None
}
Ok(local_timeline_infos) => Some(
local_timeline_infos
.into_iter()
.fold(0, |acc, x| acc + x.1.current_physical_size.unwrap()),
),
};
json_response(
StatusCode::OK,
TenantInfo {
@@ -748,7 +784,7 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
json_response(StatusCode::OK, ())
}
#[cfg(feature = "testing")]
#[cfg(any(feature = "testing", feature = "failpoints"))]
async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
if !fail::has_failpoints() {
return Err(ApiError::BadRequest(anyhow!(
@@ -782,6 +818,11 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
}
// Run GC immediately on given timeline.
// FIXME: This is just for tests. See test_runner/regress/test_gc.py.
// This probably should require special authentication or a global flag to
// enable, I don't think we want to or need to allow regular clients to invoke
// GC.
// @hllinnaka in commits ec44f4b29, 3aca717f3
#[cfg(feature = "testing")]
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
@@ -789,16 +830,16 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
check_permission(&request, Some(tenant_id))?;
// FIXME: currently this will return a 500 error on bad tenant id; it should be 4XX
let tenant = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
let repo = tenant_mgr::get_tenant(tenant_id, false).map_err(ApiError::NotFound)?;
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
let _span_guard =
info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id).entered();
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| repo.get_gc_horizon());
// Use tenant's pitr setting
let pitr = tenant.get_pitr_interval();
let result = tenant
let pitr = repo.get_pitr_interval();
let result = repo
.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
// better once the types support it.
@@ -807,15 +848,19 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
}
// Run compaction immediately on given timeline.
// FIXME This is just for tests. Don't expect this to be exposed to
// the users or the api.
// @dhammika in commit a0781f229
#[cfg(feature = "testing")]
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
let timeline = tenant
.get_timeline(timeline_id, true)
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
let timeline = repo
.get_timeline(timeline_id)
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
.map_err(ApiError::NotFound)?;
timeline.compact().map_err(ApiError::InternalServerError)?;
@@ -829,9 +874,10 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
let timeline = tenant
.get_timeline(timeline_id, true)
let repo = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
let timeline = repo
.get_timeline(timeline_id)
.with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
.map_err(ApiError::NotFound)?;
timeline
.checkpoint(CheckpointConfig::Forced)

View File

@@ -12,10 +12,10 @@ use tracing::*;
use walkdir::WalkDir;
use crate::pgdatadir_mapping::*;
use crate::reltag::{RelTag, SlruKind};
use crate::tenant::Timeline;
use crate::walingest::WalIngest;
use crate::walrecord::DecodedWALRecord;
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::pg_constants;
use postgres_ffi::relfile_utils::*;
use postgres_ffi::waldecoder::WalStreamDecoder;
@@ -43,19 +43,19 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
/// The code that deals with the checkpoint would not work right if the
/// cluster was not shut down cleanly.
pub fn import_timeline_from_postgres_datadir(
path: &Path,
tline: &Timeline,
pgdata_path: &Path,
pgdata_lsn: Lsn,
lsn: Lsn,
) -> Result<()> {
let mut pg_control: Option<ControlFileData> = None;
// TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
// Then fishing out pg_control would be unnecessary
let mut modification = tline.begin_modification(pgdata_lsn);
let mut modification = tline.begin_modification(lsn);
modification.init_empty()?;
// Import all but pg_wal
let all_but_wal = WalkDir::new(pgdata_path)
let all_but_wal = WalkDir::new(path)
.into_iter()
.filter_entry(|entry| !entry.path().ends_with("pg_wal"));
for entry in all_but_wal {
@@ -63,7 +63,7 @@ pub fn import_timeline_from_postgres_datadir(
let metadata = entry.metadata().expect("error getting dir entry metadata");
if metadata.is_file() {
let absolute_path = entry.path();
let relative_path = absolute_path.strip_prefix(pgdata_path)?;
let relative_path = absolute_path.strip_prefix(path)?;
let file = File::open(absolute_path)?;
let len = metadata.len() as usize;
@@ -84,7 +84,7 @@ pub fn import_timeline_from_postgres_datadir(
"Postgres cluster was not shut down cleanly"
);
ensure!(
pg_control.checkPointCopy.redo == pgdata_lsn.0,
pg_control.checkPointCopy.redo == lsn.0,
"unexpected checkpoint REDO pointer"
);
@@ -92,10 +92,10 @@ pub fn import_timeline_from_postgres_datadir(
// this reads the checkpoint record itself, advancing the tip of the timeline to
// *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'.
import_wal(
&pgdata_path.join("pg_wal"),
&path.join("pg_wal"),
tline,
Lsn(pg_control.checkPointCopy.redo),
pgdata_lsn,
lsn,
)?;
Ok(())

View File

@@ -5,10 +5,10 @@ pub mod import_datadir;
pub mod keyspace;
pub mod metrics;
pub mod page_cache;
pub mod page_image_cache;
pub mod page_service;
pub mod pgdatadir_mapping;
pub mod profiling;
pub mod reltag;
pub mod repository;
pub mod storage_sync;
pub mod task_mgr;
@@ -46,8 +46,6 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
pub const LOG_FILE_NAME: &str = "pageserver.log";
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
/// Config for the Repository checkpointer
#[derive(Debug, Clone, Copy)]
pub enum CheckpointConfig {
@@ -121,6 +119,32 @@ impl<T> TenantTimelineValues<T> {
fn new() -> Self {
Self(HashMap::new())
}
fn with_capacity(capacity: usize) -> Self {
Self(HashMap::with_capacity(capacity))
}
/// A convenience method to map certain values and omit some of them, if needed.
/// Tenants that won't have any timeline entries due to the filtering, will still be preserved
/// in the structure.
fn filter_map<F, NewT>(self, map: F) -> TenantTimelineValues<NewT>
where
F: Fn(T) -> Option<NewT>,
{
let capacity = self.0.len();
self.0.into_iter().fold(
TenantTimelineValues::<NewT>::with_capacity(capacity),
|mut new_values, (tenant_id, old_values)| {
let new_timeline_values = new_values.0.entry(tenant_id).or_default();
for (timeline_id, old_value) in old_values {
if let Some(new_value) = map(old_value) {
new_timeline_values.insert(timeline_id, new_value);
}
}
new_values
},
)
}
}
/// A suffix to be used during file sync from the remote storage,
@@ -157,3 +181,35 @@ mod backoff_defaults_tests {
);
}
}
#[cfg(test)]
mod tests {
use crate::tenant::harness::TIMELINE_ID;
use super::*;
#[test]
fn tenant_timeline_value_mapping() {
let first_tenant = TenantId::generate();
let second_tenant = TenantId::generate();
assert_ne!(first_tenant, second_tenant);
let mut initial = TenantTimelineValues::new();
initial
.0
.entry(first_tenant)
.or_default()
.insert(TIMELINE_ID, "test_value");
let _ = initial.0.entry(second_tenant).or_default();
assert_eq!(initial.0.len(), 2, "Should have entries for both tenants");
let filtered = initial.filter_map(|_| None::<&str>).0;
assert_eq!(
filtered.len(),
2,
"Should have entries for both tenants even after filtering away all entries"
);
assert!(filtered.contains_key(&first_tenant));
assert!(filtered.contains_key(&second_tenant));
}
}

View File

@@ -107,20 +107,18 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
// or in testing they estimate how much we would upload if we did.
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"pageserver_created_persistent_files_total",
"Number of files created that are meant to be uploaded to cloud storage",
&["tenant_id", "timeline_id"]
)
.expect("failed to define a metric")
});
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"pageserver_written_persistent_bytes_total",
"Total bytes written that are meant to be uploaded to cloud storage",
&["tenant_id", "timeline_id"]
)
.expect("failed to define a metric")
});
@@ -277,15 +275,11 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
/// smallest redo processing times. These buckets allow us to measure down
/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
///
/// Values up to 1s are recorded because metrics show that we have redo
/// durations and lock times larger than 0.250s.
macro_rules! redo_histogram_time_buckets {
() => {
vec![
0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1.000_000,
0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000,
]
};
}
@@ -300,17 +294,6 @@ macro_rules! redo_histogram_count_buckets {
};
}
macro_rules! redo_bytes_histogram_count_buckets {
() => {
// powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
// rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
vec![
24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
]
};
}
pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_wal_redo_seconds",
@@ -338,15 +321,6 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
.expect("failed to define a metric")
});
pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_wal_redo_bytes_histogram",
"Histogram of number of records replayed per redo",
redo_bytes_histogram_count_buckets!(),
)
.expect("failed to define a metric")
});
pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"pageserver_replayed_wal_records_total",
@@ -412,12 +386,8 @@ impl TimelineMetrics {
let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
.unwrap();
let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED.clone();
let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN.clone();
TimelineMetrics {
tenant_id,
@@ -449,8 +419,6 @@ impl Drop for TimelineMetrics {
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
let _ = CURRENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
for op in STORAGE_TIME_OPERATIONS {
let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]);

View File

@@ -108,10 +108,10 @@ enum CacheKey {
}
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct MaterializedPageHashKey {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub key: Key,
struct MaterializedPageHashKey {
tenant_id: TenantId,
timeline_id: TimelineId,
key: Key,
}
#[derive(Clone)]

View File

@@ -1,308 +0,0 @@
//!
//! Global page image cache
//!
//! Unlike page_cache it holds only most recent version of reconstructed page images.
//! And it uses invalidation mechanism to avoid layer ap lookups.
use crate::page_cache::MaterializedPageHashKey;
use crate::pgdatadir_mapping::{rel_block_to_key, BlockNumber};
use crate::repository::Key;
use crate::tenant::Timeline;
use anyhow::{bail, Result};
use bytes::Bytes;
use once_cell::sync::OnceCell;
use pageserver_api::reltag::RelTag;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::sync::{Arc, Condvar, Mutex};
use utils::{
id::{TenantId, TimelineId},
lsn::Lsn,
};
static PAGE_CACHE: OnceCell<Mutex<PageImageCache>> = OnceCell::new();
const TEST_PAGE_CACHE_SIZE: usize = 50;
enum PageImageState {
Vacant, // entry is not used
Loaded(Option<Bytes>), // page is loaded or has failed
Loading(Option<Arc<Condvar>>), // page in process of loading, Condvar is created on demand when some thread need to wait load completion
}
struct CacheEntry {
key: MaterializedPageHashKey,
// next+prev are used for LRU L2-list and next is also used for L1 free pages list
next: usize,
prev: usize,
collision: usize, // L1 hash collision chain
state: PageImageState,
}
pub struct PageImageCache {
free_list: usize, // L1 list of free entries
pages: Vec<CacheEntry>,
hash_table: Vec<usize>, // indexes in pages array
}
///
/// Initialize the page cache. This must be called once at page server startup.
///
pub fn init(size: usize) {
if PAGE_CACHE
.set(Mutex::new(PageImageCache::new(size)))
.is_err()
{
panic!("page cache already initialized");
}
}
///
/// Get a handle to the page cache.
///
pub fn get() -> &'static Mutex<PageImageCache> {
//
// In unit tests, page server startup doesn't happen and no one calls
// page_image_cache::init(). Initialize it here with a tiny cache, so that the
// page cache is usable in unit tests.
//
if cfg!(test) {
PAGE_CACHE.get_or_init(|| Mutex::new(PageImageCache::new(TEST_PAGE_CACHE_SIZE)))
} else {
PAGE_CACHE.get().expect("page cache not initialized")
}
}
fn hash<T: Hash>(t: &T) -> usize {
let mut s = DefaultHasher::new();
t.hash(&mut s);
s.finish() as usize
}
impl PageImageCache {
fn new(size: usize) -> Self {
let mut pages: Vec<CacheEntry> = Vec::with_capacity(size + 1);
let hash_table = vec![0usize; size];
// Dummy key
let dummy_key = MaterializedPageHashKey {
key: Key::MIN,
tenant_id: TenantId::from([0u8; 16]),
timeline_id: TimelineId::from([0u8; 16]),
};
// LRU list head
pages.push(CacheEntry {
key: dummy_key.clone(),
next: 0,
prev: 0,
collision: 0,
state: PageImageState::Vacant,
});
// Construct L1 free page list
for i in 0..size {
pages.push(CacheEntry {
key: dummy_key.clone(),
next: i + 2, // build L1-list of free pages
prev: 0,
collision: 0,
state: PageImageState::Vacant,
});
}
pages[size - 1].next = 0; // en of free page list
PageImageCache {
free_list: 1,
pages,
hash_table,
}
}
// Unlink from L2-list
fn unlink(&mut self, index: usize) {
let next = self.pages[index].next;
let prev = self.pages[index].prev;
self.pages[next].prev = prev;
self.pages[prev].next = next;
}
// Link in L2-list after specified element
fn link_after(&mut self, after: usize, index: usize) {
let next = self.pages[after].next;
self.pages[index].prev = after;
self.pages[index].next = next;
self.pages[next].prev = index;
self.pages[after].next = index;
}
fn prune(&mut self, index: usize) {
self.pages[index].prev = index;
self.pages[index].next = index;
}
fn is_empty(&self, index: usize) -> bool {
self.pages[index].next == index
}
}
// Remove entry from cache: o page invalidation or drop relation
pub fn remove(key: Key, tenant_id: TenantId, timeline_id: TimelineId) {
let key = MaterializedPageHashKey {
key,
tenant_id,
timeline_id,
};
let this = get();
let mut cache = this.lock().unwrap();
let h = hash(&key) % cache.hash_table.len();
let mut index = cache.hash_table[h];
let mut prev = 0usize;
while index != 0 {
if cache.pages[index].key == key {
if !cache.is_empty(index) {
cache.pages[index].state = PageImageState::Vacant;
// Remove from LRU list
cache.unlink(index);
// Insert entry in free list
cache.pages[index].next = cache.free_list;
cache.free_list = index;
} else {
// Page is process of loading: we can not remove it righ now,
// so just mark for deletion
cache.pages[index].next = 0; // make is_empty == false
}
// Remove from hash table
if prev == 0 {
cache.hash_table[h] = cache.pages[index].collision;
} else {
cache.pages[prev].collision = cache.pages[index].collision;
}
break;
}
prev = index;
index = cache.pages[index].collision;
}
// It's Ok if image not found
}
// Find or load page image in the cache
pub fn lookup(timeline: &Timeline, rel: RelTag, blkno: BlockNumber, lsn: Lsn) -> Result<Bytes> {
let key = MaterializedPageHashKey {
key: rel_block_to_key(rel, blkno),
tenant_id: timeline.tenant_id,
timeline_id: timeline.timeline_id,
};
let this = get();
let mut cache = this.lock().unwrap();
let h = hash(&key) % cache.hash_table.len();
'lookup: loop {
let mut index = cache.hash_table[h];
while index != 0 {
if cache.pages[index].key == key {
// cache hit
match &cache.pages[index].state {
PageImageState::Loaded(cached_page) => {
// Move to the head of LRU list
let page = cached_page.clone();
cache.unlink(index);
cache.link_after(0, index);
return page.ok_or_else(|| anyhow::anyhow!("page loading failed earlier"));
}
PageImageState::Loading(event) => {
// Create event on which to sleep if not yet assigned
let cv = match event {
None => {
let cv = Arc::new(Condvar::new());
cache.pages[index].state =
PageImageState::Loading(Some(cv.clone()));
cv
}
Some(cv) => cv.clone(),
};
cache = cv.wait(cache).unwrap();
// Retry lookup
continue 'lookup;
}
PageImageState::Vacant => bail!("Vacant entry is not expected here"),
};
}
index = cache.pages[index].collision;
}
// Cache miss
index = cache.free_list;
if index == 0 {
// no free items
let victim = cache.pages[0].prev; // take least recently used element from the tail of LRU list
assert!(victim != 0);
// Remove victim from hash table
let h = hash(&cache.pages[victim].key) % cache.hash_table.len();
index = cache.hash_table[h];
let mut prev = 0usize;
while index != victim {
assert!(index != 0);
prev = index;
index = cache.pages[index].collision;
}
if prev == 0 {
cache.hash_table[h] = cache.pages[victim].collision;
} else {
cache.pages[prev].collision = cache.pages[victim].collision;
}
// and from LRU list
cache.unlink(victim);
index = victim;
} else {
// Use next free item
cache.free_list = cache.pages[index].next;
}
// Make is_empty(index) == true. If entry is removed in process of loaded,
// it will be updated so that !is_empty(index)
cache.prune(index);
// Insert in hash table
cache.pages[index].collision = cache.hash_table[h];
cache.hash_table[h] = index;
cache.pages[index].key = key;
cache.pages[index].state = PageImageState::Loading(None);
drop(cache); //release lock
// Load page
let res = timeline.get_rel_page_at_lsn(rel, blkno, lsn, true);
cache = this.lock().unwrap();
if let PageImageState::Loading(event) = &cache.pages[index].state {
// Are there soMe waiting threads?
if let Some(cv) = event {
// If so, then wakeup them
cv.notify_all();
}
} else {
bail!("Loading state is expected");
}
if cache.is_empty(index) {
// entry was not marked as deleted {
// Page is loaded
// match &res { ... } is same as `res.as_ref().ok().cloned()`
cache.pages[index].state = PageImageState::Loaded(match &res {
Ok(page) => Some(page.clone()),
Err(_) => None,
});
// Link the page to the head of LRU list
cache.link_after(0, index);
} else {
cache.pages[index].state = PageImageState::Vacant;
// Return page to free list
cache.pages[index].next = cache.free_list;
cache.free_list = index;
}
// only the first one gets the full error from `get_rel_page_at_lsn`
return res;
}
}

View File

@@ -10,15 +10,8 @@
//
use anyhow::{bail, ensure, Context, Result};
use bytes::Bytes;
use bytes::{Buf, BufMut, Bytes, BytesMut};
use futures::{Stream, StreamExt};
use pageserver_api::models::{
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
PagestreamNblocksRequest, PagestreamNblocksResponse,
};
use std::io;
use std::net::TcpListener;
use std::str;
@@ -39,10 +32,10 @@ use utils::{
use crate::basebackup;
use crate::config::{PageServerConf, ProfilingConfig};
use crate::import_datadir::import_wal_from_tar;
use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
use crate::page_image_cache;
use crate::profiling::profpoint_start;
use crate::reltag::RelTag;
use crate::task_mgr;
use crate::task_mgr::TaskKind;
use crate::tenant::Timeline;
@@ -52,6 +45,163 @@ use crate::CheckpointConfig;
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
use postgres_ffi::BLCKSZ;
// Wrapped in libpq CopyData
enum PagestreamFeMessage {
Exists(PagestreamExistsRequest),
Nblocks(PagestreamNblocksRequest),
GetPage(PagestreamGetPageRequest),
DbSize(PagestreamDbSizeRequest),
}
// Wrapped in libpq CopyData
enum PagestreamBeMessage {
Exists(PagestreamExistsResponse),
Nblocks(PagestreamNblocksResponse),
GetPage(PagestreamGetPageResponse),
Error(PagestreamErrorResponse),
DbSize(PagestreamDbSizeResponse),
}
#[derive(Debug)]
struct PagestreamExistsRequest {
latest: bool,
lsn: Lsn,
rel: RelTag,
}
#[derive(Debug)]
struct PagestreamNblocksRequest {
latest: bool,
lsn: Lsn,
rel: RelTag,
}
#[derive(Debug)]
struct PagestreamGetPageRequest {
latest: bool,
lsn: Lsn,
rel: RelTag,
blkno: u32,
}
#[derive(Debug)]
struct PagestreamDbSizeRequest {
latest: bool,
lsn: Lsn,
dbnode: u32,
}
#[derive(Debug)]
struct PagestreamExistsResponse {
exists: bool,
}
#[derive(Debug)]
struct PagestreamNblocksResponse {
n_blocks: u32,
}
#[derive(Debug)]
struct PagestreamGetPageResponse {
page: Bytes,
}
#[derive(Debug)]
struct PagestreamErrorResponse {
message: String,
}
#[derive(Debug)]
struct PagestreamDbSizeResponse {
db_size: i64,
}
impl PagestreamFeMessage {
fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
// TODO these gets can fail
// these correspond to the NeonMessageTag enum in pagestore_client.h
//
// TODO: consider using protobuf or serde bincode for less error prone
// serialization.
let msg_tag = body.get_u8();
match msg_tag {
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
blkno: body.get_u32(),
})),
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
dbnode: body.get_u32(),
})),
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
}
}
}
impl PagestreamBeMessage {
fn serialize(&self) -> Bytes {
let mut bytes = BytesMut::new();
match self {
Self::Exists(resp) => {
bytes.put_u8(100); /* tag from pagestore_client.h */
bytes.put_u8(resp.exists as u8);
}
Self::Nblocks(resp) => {
bytes.put_u8(101); /* tag from pagestore_client.h */
bytes.put_u32(resp.n_blocks);
}
Self::GetPage(resp) => {
bytes.put_u8(102); /* tag from pagestore_client.h */
bytes.put(&resp.page[..]);
}
Self::Error(resp) => {
bytes.put_u8(103); /* tag from pagestore_client.h */
bytes.put(resp.message.as_bytes());
bytes.put_u8(0); // null terminator
}
Self::DbSize(resp) => {
bytes.put_u8(104); /* tag from pagestore_client.h */
bytes.put_i64(resp.db_size);
}
}
bytes.into()
}
}
fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
async_stream::try_stream! {
loop {
@@ -350,8 +500,11 @@ impl PageServerHandler {
task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
// Create empty timeline
info!("creating new timeline");
let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version)?;
let timeline = tenant_mgr::get_tenant(tenant_id, true)?.create_empty_timeline(
timeline_id,
base_lsn,
pg_version,
)?;
// TODO mark timeline as not ready until it reaches end_lsn.
// We might have some wal to import as well, and we should prevent compute
@@ -374,8 +527,7 @@ impl PageServerHandler {
// - use block_in_place()
let mut copyin_stream = Box::pin(copyin_stream(pgb));
let reader = SyncIoBridge::new(StreamReader::new(&mut copyin_stream));
tokio::task::block_in_place(|| timeline.import_basebackup_from_tar(reader, base_lsn))?;
timeline.initialize()?;
tokio::task::block_in_place(|| import_basebackup_from_tar(&timeline, reader, base_lsn))?;
// Drain the rest of the Copy data
let mut bytes_after_tar = 0;
@@ -392,6 +544,12 @@ impl PageServerHandler {
// It wouldn't work if base came from vanilla postgres though,
// since we discard some log files.
// Flush data to disk, then upload to s3
info!("flushing layers");
timeline.checkpoint(CheckpointConfig::Flush)?;
timeline.launch_wal_receiver()?;
info!("done");
Ok(())
}
@@ -582,12 +740,8 @@ impl PageServerHandler {
// current profiling is based on a thread-local variable, so it doesn't work
// across awaits
let _profiling_guard = profpoint_start(self.conf, ProfilingConfig::PageRequests);
let page = timeline.get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest)?;
let page = if req.latest {
page_image_cache::lookup(timeline, req.rel, req.blkno, lsn)
} else {
timeline.get_rel_page_at_lsn(req.rel, req.blkno, lsn, false)
}?;
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
page,
}))
@@ -914,8 +1068,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
}
fn get_local_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> Result<Arc<Timeline>> {
tenant_mgr::get_tenant(tenant_id, true)
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
tenant_mgr::get_tenant(tenant_id, true).and_then(|tenant| tenant.get_timeline(timeline_id))
}
///

View File

@@ -7,12 +7,12 @@
//! Clarify that)
//!
use crate::keyspace::{KeySpace, KeySpaceAccum};
use crate::reltag::{RelTag, SlruKind};
use crate::repository::*;
use crate::tenant::Timeline;
use crate::walrecord::NeonWalRecord;
use anyhow::{bail, ensure, Result};
use bytes::{Buf, Bytes};
use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::BLCKSZ;
use postgres_ffi::{Oid, TimestampTz, TransactionId};
@@ -1179,7 +1179,7 @@ fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
}
}
pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
Key {
field1: 0x00,
field2: rel.spcnode,
@@ -1373,17 +1373,6 @@ fn is_rel_block_key(key: Key) -> bool {
key.field1 == 0x00 && key.field4 != 0
}
pub fn is_rel_fsm_block_key(key: Key) -> bool {
key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
}
pub fn is_rel_vm_block_key(key: Key) -> bool {
key.field1 == 0x00
&& key.field4 != 0
&& key.field5 == VISIBILITYMAP_FORKNUM
&& key.field6 != 0xffffffff
}
pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
Ok(match key.field1 {
0x01 => {
@@ -1414,9 +1403,7 @@ pub fn create_test_timeline(
timeline_id: utils::id::TimelineId,
pg_version: u32,
) -> Result<std::sync::Arc<Timeline>> {
let tline = tenant
.create_empty_timeline(timeline_id, Lsn(8), pg_version)?
.initialize()?;
let tline = tenant.create_empty_timeline(timeline_id, Lsn(8), pg_version)?;
let mut m = tline.begin_modification(Lsn(8));
m.init_empty()?;
m.commit()?;

View File

@@ -169,14 +169,9 @@ use self::{
upload::{upload_index_part, upload_timeline_layers, UploadedTimeline},
};
use crate::{
config::PageServerConf,
exponential_backoff,
storage_sync::index::{LayerFileMetadata, RemoteIndex},
task_mgr,
task_mgr::TaskKind,
task_mgr::BACKGROUND_RUNTIME,
tenant::metadata::TimelineMetadata,
tenant_mgr::{attach_local_tenants, TenantAttachData},
config::PageServerConf, exponential_backoff, storage_sync::index::RemoteIndex, task_mgr,
task_mgr::TaskKind, task_mgr::BACKGROUND_RUNTIME, tenant::metadata::TimelineMetadata,
tenant_mgr::attach_local_tenants,
};
use crate::{
metrics::{IMAGE_SYNC_TIME, REMAINING_SYNC_ITEMS, REMOTE_INDEX_UPLOAD},
@@ -193,7 +188,7 @@ static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
/// A timeline status to share with pageserver's sync counterpart,
/// after comparing local and remote timeline state.
#[derive(Clone, PartialEq, Eq)]
#[derive(Clone)]
pub enum LocalTimelineInitStatus {
/// The timeline has every remote layer present locally.
/// There could be some layers requiring uploading,
@@ -316,7 +311,7 @@ impl SyncQueue {
/// A task to run in the async download/upload loop.
/// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone)]
enum SyncTask {
/// A checkpoint outcome with possible local file updates that need actualization in the remote storage.
/// Not necessary more fresh than the one already uploaded.
@@ -427,7 +422,7 @@ impl SyncTaskBatch {
.extend(new_delete.data.deleted_layers.iter().cloned());
}
if let Some(batch_upload) = &mut self.upload {
let not_deleted = |layer: &PathBuf, _: &mut LayerFileMetadata| {
let not_deleted = |layer: &PathBuf| {
!new_delete.data.layers_to_delete.contains(layer)
&& !new_delete.data.deleted_layers.contains(layer)
};
@@ -455,35 +450,21 @@ impl SyncTaskBatch {
#[derive(Debug, Clone, PartialEq, Eq)]
struct LayersUpload {
/// Layer file path in the pageserver workdir, that were added for the corresponding checkpoint.
layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
layers_to_upload: HashSet<PathBuf>,
/// Already uploaded layers. Used to store the data about the uploads between task retries
/// and to record the data into the remote index after the task got completed or evicted.
uploaded_layers: HashMap<PathBuf, LayerFileMetadata>,
uploaded_layers: HashSet<PathBuf>,
metadata: Option<TimelineMetadata>,
}
/// A timeline download task.
/// Does not contain the file list to download, to allow other
/// parts of the pageserer code to schedule the task
/// without using the remote index or any other ways to list the remote timeline files.
/// without using the remote index or any other ways to list the remote timleine files.
/// Skips the files that are already downloaded.
#[derive(Debug, Clone, PartialEq, Eq)]
struct LayersDownload {
layers_to_skip: HashSet<PathBuf>,
/// Paths which have been downloaded, and had their metadata verified or generated.
///
/// Metadata generation happens when upgrading from past version of `IndexPart`.
gathered_metadata: HashMap<PathBuf, LayerFileMetadata>,
}
impl LayersDownload {
fn from_skipped_layers(layers_to_skip: HashSet<PathBuf>) -> Self {
LayersDownload {
layers_to_skip,
gathered_metadata: HashMap::default(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -505,7 +486,7 @@ struct LayersDeletion {
pub fn schedule_layer_upload(
tenant_id: TenantId,
timeline_id: TimelineId,
layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
layers_to_upload: HashSet<PathBuf>,
metadata: Option<TimelineMetadata>,
) {
let sync_queue = match SYNC_QUEUE.get() {
@@ -522,7 +503,7 @@ pub fn schedule_layer_upload(
},
SyncTask::upload(LayersUpload {
layers_to_upload,
uploaded_layers: HashMap::new(),
uploaded_layers: HashSet::new(),
metadata,
}),
);
@@ -580,44 +561,18 @@ pub fn schedule_layer_download(tenant_id: TenantId, timeline_id: TimelineId) {
tenant_id,
timeline_id,
},
SyncTask::download(LayersDownload::from_skipped_layers(HashSet::new())),
SyncTask::download(LayersDownload {
layers_to_skip: HashSet::new(),
}),
);
debug!("Download task for tenant {tenant_id}, timeline {timeline_id} sent")
}
/// Local existing timeline files
///
/// Values of this type serve different meanings in different contexts. On startup, collected
/// timelines come with the full collected information and when signalling readyness to attach
/// after completed download. After the download the file information is no longer carried, because
/// it is already merged into [`RemoteTimeline`].
#[derive(Debug)]
pub struct TimelineLocalFiles(TimelineMetadata, HashMap<PathBuf, LayerFileMetadata>);
impl TimelineLocalFiles {
pub fn metadata(&self) -> &TimelineMetadata {
&self.0
}
/// Called during startup, for all of the local files with full metadata.
pub(crate) fn collected(
metadata: TimelineMetadata,
timeline_files: HashMap<PathBuf, LayerFileMetadata>,
) -> TimelineLocalFiles {
TimelineLocalFiles(metadata, timeline_files)
}
/// Called near the end of tenant initialization, to signal readyness to attach tenants.
pub(crate) fn ready(metadata: TimelineMetadata) -> Self {
TimelineLocalFiles(metadata, HashMap::new())
}
}
/// Launch a thread to perform remote storage sync tasks.
/// See module docs for loop step description.
pub fn spawn_storage_sync_task(
conf: &'static PageServerConf,
local_timeline_files: HashMap<TenantId, HashMap<TimelineId, TimelineLocalFiles>>,
local_timeline_files: TenantTimelineValues<(TimelineMetadata, HashSet<PathBuf>)>,
storage: GenericRemoteStorage,
max_concurrent_timelines_sync: NonZeroUsize,
max_sync_errors: NonZeroU32,
@@ -640,7 +595,7 @@ pub fn spawn_storage_sync_task(
let mut keys_for_index_part_downloads = HashSet::new();
let mut timelines_to_sync = HashMap::new();
for (tenant_id, timeline_data) in local_timeline_files {
for (tenant_id, timeline_data) in local_timeline_files.0 {
if timeline_data.is_empty() {
info!("got empty tenant {}", tenant_id);
let _ = empty_tenants.0.entry(tenant_id).or_default();
@@ -743,7 +698,7 @@ async fn storage_sync_loop(
"Sync loop step completed, {} new tenant state update(s)",
updated_tenants.len()
);
let mut timelines_to_attach = HashMap::new();
let mut timelines_to_attach = TenantTimelineValues::new();
let index_accessor = index.read().await;
for tenant_id in updated_tenants {
let tenant_entry = match index_accessor.tenant_entry(&tenant_id) {
@@ -769,16 +724,12 @@ async fn storage_sync_loop(
// and register them all at once in a tenant for download
// to be submitted in a single operation to tenant
// so it can apply them at once to internal timeline map.
timelines_to_attach.insert(
timelines_to_attach.0.insert(
tenant_id,
TenantAttachData::Ready(
tenant_entry
.iter()
.map(|(&id, entry)| {
(id, TimelineLocalFiles::ready(entry.metadata.clone()))
})
.collect(),
),
tenant_entry
.iter()
.map(|(&id, entry)| (id, entry.metadata.clone()))
.collect(),
);
}
}
@@ -1020,27 +971,15 @@ async fn download_timeline_data(
}
DownloadedTimeline::Successful(mut download_data) => {
match update_local_metadata(conf, sync_id, current_remote_timeline).await {
Ok(()) => {
let mut g = index.write().await;
match g.set_awaits_download(&sync_id, false) {
Ok(()) => {
let timeline = g
.timeline_entry_mut(&sync_id)
.expect("set_awaits_download verified existence");
timeline.merge_metadata_from_downloaded(
&download_data.data.gathered_metadata,
);
register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
return DownloadStatus::Downloaded;
}
Err(e) => {
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
}
};
}
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
Ok(()) => {
register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
return DownloadStatus::Downloaded;
}
Err(e) => {
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
}
},
Err(e) => {
error!("Failed to update local timeline metadata: {e:?}");
download_data.retries += 1;
@@ -1243,18 +1182,11 @@ async fn update_remote_data(
}
if upload_failed {
existing_entry.add_upload_failures(
uploaded_data
.layers_to_upload
.iter()
.map(|(k, v)| (k.to_owned(), v.to_owned())),
uploaded_data.layers_to_upload.iter().cloned(),
);
} else {
existing_entry.add_timeline_layers(
uploaded_data
.uploaded_layers
.iter()
.map(|(k, v)| (k.to_owned(), v.to_owned())),
);
existing_entry
.add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
}
}
RemoteDataUpdate::Delete(layers_to_remove) => {
@@ -1274,19 +1206,11 @@ async fn update_remote_data(
};
let mut new_remote_timeline = RemoteTimeline::new(new_metadata.clone());
if upload_failed {
new_remote_timeline.add_upload_failures(
uploaded_data
.layers_to_upload
.iter()
.map(|(k, v)| (k.to_owned(), v.to_owned())),
);
new_remote_timeline
.add_upload_failures(uploaded_data.layers_to_upload.iter().cloned());
} else {
new_remote_timeline.add_timeline_layers(
uploaded_data
.uploaded_layers
.iter()
.map(|(k, v)| (k.to_owned(), v.to_owned())),
);
new_remote_timeline
.add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
}
index_accessor.add_timeline_entry(sync_id, new_remote_timeline.clone());
@@ -1334,14 +1258,13 @@ async fn validate_task_retries(
fn schedule_first_sync_tasks(
index: &mut RemoteTimelineIndex,
sync_queue: &SyncQueue,
local_timeline_files: HashMap<TenantTimelineId, TimelineLocalFiles>,
local_timeline_files: HashMap<TenantTimelineId, (TimelineMetadata, HashSet<PathBuf>)>,
) -> TenantTimelineValues<LocalTimelineInitStatus> {
let mut local_timeline_init_statuses = TenantTimelineValues::new();
let mut new_sync_tasks = VecDeque::with_capacity(local_timeline_files.len());
for (sync_id, local_timeline) in local_timeline_files {
let TimelineLocalFiles(local_metadata, local_files) = local_timeline;
for (sync_id, (local_metadata, local_files)) in local_timeline_files {
match index.timeline_entry_mut(&sync_id) {
Some(remote_timeline) => {
let (timeline_status, awaits_download) = compare_local_and_remote_timeline(
@@ -1385,7 +1308,7 @@ fn schedule_first_sync_tasks(
sync_id,
SyncTask::upload(LayersUpload {
layers_to_upload: local_files,
uploaded_layers: HashMap::new(),
uploaded_layers: HashSet::new(),
metadata: Some(local_metadata.clone()),
}),
));
@@ -1412,46 +1335,20 @@ fn compare_local_and_remote_timeline(
new_sync_tasks: &mut VecDeque<(TenantTimelineId, SyncTask)>,
sync_id: TenantTimelineId,
local_metadata: TimelineMetadata,
local_files: HashMap<PathBuf, LayerFileMetadata>,
local_files: HashSet<PathBuf>,
remote_entry: &RemoteTimeline,
) -> (LocalTimelineInitStatus, bool) {
let _entered = info_span!("compare_local_and_remote_timeline", sync_id = %sync_id).entered();
let needed_to_download_files = remote_entry
.stored_files()
.iter()
.filter_map(|(layer_file, remote_metadata)| {
if let Some(local_metadata) = local_files.get(layer_file) {
match (remote_metadata.file_size(), local_metadata.file_size()) {
(Some(x), Some(y)) if x == y => { None },
(None, Some(_)) => {
// upgrading from an earlier IndexPart without metadata
None
},
_ => {
// having to deal with other than (Some(x), Some(y)) where x != y here is a
// bummer, but see #2582 and #2610 for attempts and discussion.
warn!("Redownloading locally existing {layer_file:?} due to size mismatch, size on index: {:?}, on disk: {:?}", remote_metadata.file_size(), local_metadata.file_size());
Some(layer_file)
},
}
} else {
// doesn't exist locally
Some(layer_file)
}
})
.collect::<HashSet<_>>();
let remote_files = remote_entry.stored_files();
let (initial_timeline_status, awaits_download) = if !needed_to_download_files.is_empty() {
let number_of_layers_to_download = remote_files.difference(&local_files).count();
let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
new_sync_tasks.push_back((
sync_id,
SyncTask::download(LayersDownload::from_skipped_layers(
local_files
.keys()
.filter(|path| !needed_to_download_files.contains(path))
.cloned()
.collect(),
)),
SyncTask::download(LayersDownload {
layers_to_skip: local_files.clone(),
}),
));
info!("NeedsSync");
(LocalTimelineInitStatus::NeedsSync, true)
@@ -1466,22 +1363,15 @@ fn compare_local_and_remote_timeline(
};
let layers_to_upload = local_files
.iter()
.filter_map(|(local_file, metadata)| {
if !remote_entry.stored_files().contains_key(local_file) {
Some((local_file.to_owned(), metadata.to_owned()))
} else {
None
}
})
.collect::<HashMap<_, _>>();
.difference(remote_files)
.cloned()
.collect::<HashSet<_>>();
if !layers_to_upload.is_empty() {
new_sync_tasks.push_back((
sync_id,
SyncTask::upload(LayersUpload {
layers_to_upload,
uploaded_layers: HashMap::new(),
uploaded_layers: HashSet::new(),
metadata: Some(local_metadata),
}),
));
@@ -1537,12 +1427,11 @@ mod test_utils {
let timeline_path = harness.timeline_path(&timeline_id);
fs::create_dir_all(&timeline_path).await?;
let mut layers_to_upload = HashMap::with_capacity(filenames.len());
let mut layers_to_upload = HashSet::with_capacity(filenames.len());
for &file in filenames {
let file_path = timeline_path.join(file);
fs::write(&file_path, dummy_contents(file).into_bytes()).await?;
let metadata = LayerFileMetadata::new(file_path.metadata()?.len());
layers_to_upload.insert(file_path, metadata);
layers_to_upload.insert(file_path);
}
fs::write(
@@ -1553,7 +1442,7 @@ mod test_utils {
Ok(LayersUpload {
layers_to_upload,
uploaded_layers: HashMap::new(),
uploaded_layers: HashSet::new(),
metadata: Some(metadata),
})
}
@@ -1608,13 +1497,12 @@ mod tests {
assert!(sync_id_2 != sync_id_3);
assert!(sync_id_3 != TEST_SYNC_ID);
let download_task =
SyncTask::download(LayersDownload::from_skipped_layers(HashSet::from([
PathBuf::from("sk"),
])));
let download_task = SyncTask::download(LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk")]),
});
let upload_task = SyncTask::upload(LayersUpload {
layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
layers_to_upload: HashSet::from([PathBuf::from("up")]),
uploaded_layers: HashSet::from([PathBuf::from("upl")]),
metadata: Some(dummy_metadata(Lsn(2))),
});
let delete_task = SyncTask::delete(LayersDeletion {
@@ -1658,10 +1546,12 @@ mod tests {
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
assert_eq!(sync_queue.len(), 0);
let download = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk")]));
let download = LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk")]),
};
let upload = LayersUpload {
layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
layers_to_upload: HashSet::from([PathBuf::from("up")]),
uploaded_layers: HashSet::from([PathBuf::from("upl")]),
metadata: Some(dummy_metadata(Lsn(2))),
};
let delete = LayersDeletion {
@@ -1709,10 +1599,18 @@ mod tests {
#[tokio::test]
async fn same_task_id_same_tasks_batch() {
let sync_queue = SyncQueue::new(NonZeroUsize::new(1).unwrap());
let download_1 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk1")]));
let download_2 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk2")]));
let download_3 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk3")]));
let download_4 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk4")]));
let download_1 = LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk1")]),
};
let download_2 = LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk2")]),
};
let download_3 = LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk3")]),
};
let download_4 = LayersDownload {
layers_to_skip: HashSet::from([PathBuf::from("sk4")]),
};
let sync_id_2 = TenantTimelineId {
tenant_id: TenantId::from_array(hex!("22223344556677881122334455667788")),
@@ -1736,15 +1634,15 @@ mod tests {
Some(SyncTaskBatch {
download: Some(SyncData {
retries: 0,
data: LayersDownload::from_skipped_layers(
{
data: LayersDownload {
layers_to_skip: {
let mut set = HashSet::new();
set.extend(download_1.layers_to_skip.into_iter());
set.extend(download_2.layers_to_skip.into_iter());
set.extend(download_4.layers_to_skip.into_iter());
set
},
)
}
}),
upload: None,
delete: None,
@@ -1760,148 +1658,4 @@ mod tests {
"Should have one task left out of the batch"
);
}
mod local_and_remote_comparisons {
use super::*;
#[test]
fn ready() {
let mut new_sync_tasks = VecDeque::default();
let sync_id = TenantTimelineId::generate();
let local_metadata = dummy_metadata(0x02.into());
let local_files =
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
remote_entry
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
let (status, sync_needed) = compare_local_and_remote_timeline(
&mut new_sync_tasks,
sync_id,
local_metadata.clone(),
local_files,
&remote_entry,
);
assert_eq!(
status,
LocalTimelineInitStatus::LocallyComplete(local_metadata)
);
assert!(!sync_needed);
assert!(new_sync_tasks.is_empty(), "{:?}", new_sync_tasks);
}
#[test]
fn needs_download() {
let mut new_sync_tasks = VecDeque::default();
let sync_id = TenantTimelineId::generate();
let local_metadata = dummy_metadata(0x02.into());
let local_files = HashMap::default();
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
remote_entry
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
let (status, sync_needed) = compare_local_and_remote_timeline(
&mut new_sync_tasks,
sync_id,
local_metadata,
local_files.clone(),
&remote_entry,
);
assert_eq!(status, LocalTimelineInitStatus::NeedsSync);
assert!(sync_needed);
let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
assert_eq!(
&new_sync_tasks,
&[(
sync_id,
SyncTask::download(LayersDownload::from_skipped_layers(
local_files.keys().cloned().collect()
))
)]
);
}
#[test]
fn redownload_is_not_needed_on_upgrade() {
// originally the implementation missed the `(None, Some(_))` case in the match, and
// proceeded to always redownload if the remote metadata was not available.
let mut new_sync_tasks = VecDeque::default();
let sync_id = TenantTimelineId::generate();
let local_metadata = dummy_metadata(0x02.into());
// type system would in general allow that LayerFileMetadata would be created with
// file_size: None, however `LayerFileMetadata::default` is only allowed from tests,
// and so everywhere within the system valid LayerFileMetadata is being created, it is
// created through `::new`.
let local_files =
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
// RemoteTimeline is constructed out of an older version IndexPart, which didn't carry
// any metadata.
remote_entry
.add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::default())]);
let (status, sync_needed) = compare_local_and_remote_timeline(
&mut new_sync_tasks,
sync_id,
local_metadata.clone(),
local_files,
&remote_entry,
);
assert_eq!(
status,
LocalTimelineInitStatus::LocallyComplete(local_metadata)
);
assert!(!sync_needed);
}
#[test]
fn needs_upload() {
let mut new_sync_tasks = VecDeque::default();
let sync_id = TenantTimelineId::generate();
let local_metadata = dummy_metadata(0x02.into());
let local_files =
HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
remote_entry.add_timeline_layers([]);
let (status, sync_needed) = compare_local_and_remote_timeline(
&mut new_sync_tasks,
sync_id,
local_metadata.clone(),
local_files.clone(),
&remote_entry,
);
assert_eq!(
status,
LocalTimelineInitStatus::LocallyComplete(local_metadata.clone())
);
assert!(!sync_needed);
let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
assert_eq!(
&new_sync_tasks,
&[(
sync_id,
SyncTask::upload(LayersUpload {
layers_to_upload: local_files,
uploaded_layers: HashMap::default(),
metadata: Some(local_metadata),
})
)]
);
}
}
}

View File

@@ -171,7 +171,7 @@ mod tests {
let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
let timeline_upload =
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
for (local_path, _metadata) in timeline_upload.layers_to_upload {
for local_path in timeline_upload.layers_to_upload {
let remote_path =
local_storage.resolve_in_storage(&local_storage.remote_object_id(&local_path)?)?;
let remote_parent_dir = remote_path.parent().unwrap();

View File

@@ -16,13 +16,9 @@ use tokio::{
};
use tracing::{debug, error, info, warn};
use crate::{
config::PageServerConf,
storage_sync::{index::LayerFileMetadata, SyncTask},
TEMP_FILE_SUFFIX,
};
use crate::{config::PageServerConf, storage_sync::SyncTask, TEMP_FILE_SUFFIX};
use utils::{
crashsafe::path_with_suffix_extension,
crashsafe_dir::path_with_suffix_extension,
id::{TenantId, TenantTimelineId, TimelineId},
};
@@ -223,14 +219,8 @@ pub(super) async fn download_timeline_layers<'a>(
let layers_to_download = remote_timeline
.stored_files()
.iter()
.filter_map(|(layer_path, metadata)| {
if !download.layers_to_skip.contains(layer_path) {
Some((layer_path.to_owned(), metadata.to_owned()))
} else {
None
}
})
.difference(&download.layers_to_skip)
.cloned()
.collect::<Vec<_>>();
debug!("Layers to download: {layers_to_download:?}");
@@ -243,129 +233,89 @@ pub(super) async fn download_timeline_layers<'a>(
let mut download_tasks = layers_to_download
.into_iter()
.map(|(layer_destination_path, metadata)| async move {
.map(|layer_destination_path| async move {
if layer_destination_path.exists() {
debug!(
"Layer already exists locally, skipping download: {}",
layer_destination_path.display()
);
} else {
// Perform a rename inspired by durable_rename from file_utils.c.
// The sequence:
// write(tmp)
// fsync(tmp)
// rename(tmp, new)
// fsync(new)
// fsync(parent)
// For more context about durable_rename check this email from postgres mailing list:
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
let temp_file_path =
path_with_suffix_extension(&layer_destination_path, TEMP_FILE_SUFFIX);
match layer_destination_path.metadata() {
Ok(m) if m.is_file() => {
// the file exists from earlier round when we failed after renaming it as
// layer_destination_path
let verified = if let Some(expected) = metadata.file_size() {
m.len() == expected
} else {
// behaviour before recording metadata was to accept any existing
true
};
let mut destination_file =
fs::File::create(&temp_file_path).await.with_context(|| {
format!(
"Failed to create a destination file for layer '{}'",
temp_file_path.display()
)
})?;
if verified {
debug!(
"Layer already exists locally, skipping download: {}",
layer_destination_path.display()
);
return Ok((layer_destination_path, LayerFileMetadata::new(m.len())))
} else {
// no need to remove it, it will be overwritten by fs::rename
// after successful download
warn!("Downloaded layer exists already but layer file metadata mismatches: {}, metadata {:?}", layer_destination_path.display(), metadata);
}
}
Ok(m) => {
return Err(anyhow::anyhow!("Downloaded layer destination exists but is not a file: {m:?}, target needs to be removed/archived manually: {layer_destination_path:?}"));
}
Err(_) => {
// behave as the file didn't exist
}
let mut layer_download = storage.download_storage_object(None, &layer_destination_path)
.await
.with_context(|| {
format!(
"Failed to initiate the download the layer for {sync_id} into file '{}'",
temp_file_path.display()
)
})?;
io::copy(&mut layer_download.download_stream, &mut destination_file)
.await
.with_context(|| {
format!(
"Failed to download the layer for {sync_id} into file '{}'",
temp_file_path.display()
)
})?;
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
// A file will not be closed immediately when it goes out of scope if there are any IO operations
// that have not yet completed. To ensure that a file is closed immediately when it is dropped,
// you should call flush before dropping it.
//
// From the tokio code I see that it waits for pending operations to complete. There shouldn't be any because
// we assume that `destination_file` file is fully written. I.e there is no pending .write(...).await operations.
// But for additional safety let's check/wait for any pending operations.
destination_file.flush().await.with_context(|| {
format!(
"failed to flush source file at {}",
temp_file_path.display()
)
})?;
// not using sync_data because it can lose file size update
destination_file.sync_all().await.with_context(|| {
format!(
"failed to fsync source file at {}",
temp_file_path.display()
)
})?;
drop(destination_file);
fail::fail_point!("remote-storage-download-pre-rename", |_| {
anyhow::bail!("remote-storage-download-pre-rename failpoint triggered")
});
fs::rename(&temp_file_path, &layer_destination_path).await?;
fsync_path(&layer_destination_path).await.with_context(|| {
format!(
"Cannot fsync layer destination path {}",
layer_destination_path.display(),
)
})?;
}
// Perform a rename inspired by durable_rename from file_utils.c.
// The sequence:
// write(tmp)
// fsync(tmp)
// rename(tmp, new)
// fsync(new)
// fsync(parent)
// For more context about durable_rename check this email from postgres mailing list:
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
let temp_file_path =
path_with_suffix_extension(&layer_destination_path, TEMP_FILE_SUFFIX);
// TODO: this doesn't use the cached fd for some reason?
let mut destination_file =
fs::File::create(&temp_file_path).await.with_context(|| {
format!(
"Failed to create a destination file for layer '{}'",
temp_file_path.display()
)
})?;
let mut layer_download = storage.download_storage_object(None, &layer_destination_path)
.await
.with_context(|| {
format!(
"Failed to initiate the download the layer for {sync_id} into file '{}'",
temp_file_path.display()
)
})?;
let bytes_amount = io::copy(&mut layer_download.download_stream, &mut destination_file)
.await
.with_context(|| {
format!(
"Failed to download the layer for {sync_id} into file '{}'",
temp_file_path.display()
)
})?;
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
// A file will not be closed immediately when it goes out of scope if there are any IO operations
// that have not yet completed. To ensure that a file is closed immediately when it is dropped,
// you should call flush before dropping it.
//
// From the tokio code I see that it waits for pending operations to complete. There shouldn't be any because
// we assume that `destination_file` file is fully written. I.e there is no pending .write(...).await operations.
// But for additional safety let's check/wait for any pending operations.
destination_file.flush().await.with_context(|| {
format!(
"failed to flush source file at {}",
temp_file_path.display()
)
})?;
match metadata.file_size() {
Some(expected) if expected != bytes_amount => {
anyhow::bail!(
"According to layer file metadata should had downloaded {expected} bytes but downloaded {bytes_amount} bytes into file '{}'",
temp_file_path.display()
);
},
Some(_) | None => {
// matches, or upgrading from an earlier IndexPart version
}
}
// not using sync_data because it can lose file size update
destination_file.sync_all().await.with_context(|| {
format!(
"failed to fsync source file at {}",
temp_file_path.display()
)
})?;
drop(destination_file);
fail::fail_point!("remote-storage-download-pre-rename", |_| {
anyhow::bail!("remote-storage-download-pre-rename failpoint triggered")
});
fs::rename(&temp_file_path, &layer_destination_path).await?;
fsync_path(&layer_destination_path).await.with_context(|| {
format!(
"Cannot fsync layer destination path {}",
layer_destination_path.display(),
)
})?;
Ok::<_, anyhow::Error>((layer_destination_path, LayerFileMetadata::new(bytes_amount)))
Ok::<_, anyhow::Error>(layer_destination_path)
})
.collect::<FuturesUnordered<_>>();
@@ -374,12 +324,9 @@ pub(super) async fn download_timeline_layers<'a>(
let mut undo = HashSet::new();
while let Some(download_result) = download_tasks.next().await {
match download_result {
Ok((downloaded_path, metadata)) => {
Ok(downloaded_path) => {
undo.insert(downloaded_path.clone());
download.layers_to_skip.insert(downloaded_path.clone());
// what if the key existed already? ignore, because then we would had
// downloaded a partial file, and had to retry
download.gathered_metadata.insert(downloaded_path, metadata);
download.layers_to_skip.insert(downloaded_path);
}
Err(e) => {
errors_happened = true;
@@ -402,8 +349,6 @@ pub(super) async fn download_timeline_layers<'a>(
);
for item in undo {
download.layers_to_skip.remove(&item);
// intentionally don't clear the gathered_metadata because it exists for fsync_path
// failure on parent directory
}
errors_happened = true;
}
@@ -508,9 +453,9 @@ mod tests {
let timeline_upload =
create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
for local_path in timeline_upload.layers_to_upload.keys() {
for local_path in timeline_upload.layers_to_upload {
let remote_path =
local_storage.resolve_in_storage(&storage.remote_object_id(local_path)?)?;
local_storage.resolve_in_storage(&storage.remote_object_id(&local_path)?)?;
let remote_parent_dir = remote_path.parent().unwrap();
if !remote_parent_dir.exists() {
fs::create_dir_all(&remote_parent_dir).await?;
@@ -528,19 +473,11 @@ mod tests {
let mut remote_timeline = RemoteTimeline::new(metadata.clone());
remote_timeline.awaits_download = true;
remote_timeline.add_timeline_layers(layer_files.iter().map(|layer| {
let layer_path = local_timeline_path.join(layer);
// this could had also been LayerFileMetadata::default(), but since in this test we
// don't do the merge operation done by storage_sync::download_timeline_data, it would
// not be merged back to timeline.
let metadata_from_upload = timeline_upload
.layers_to_upload
.get(&layer_path)
.expect("layer must exist in previously uploaded paths")
.to_owned();
(layer_path, metadata_from_upload)
}));
remote_timeline.add_timeline_layers(
layer_files
.iter()
.map(|layer| local_timeline_path.join(layer)),
);
let download_data = match download_timeline_layers(
harness.conf,
@@ -550,9 +487,9 @@ mod tests {
sync_id,
SyncData::new(
current_retries,
LayersDownload::from_skipped_layers(HashSet::from([
local_timeline_path.join("layer_to_skip")
])),
LayersDownload {
layers_to_skip: HashSet::from([local_timeline_path.join("layer_to_skip")]),
},
),
)
.await
@@ -615,7 +552,12 @@ mod tests {
&sync_queue,
None,
sync_id,
SyncData::new(0, LayersDownload::from_skipped_layers(HashSet::new())),
SyncData::new(
0,
LayersDownload {
layers_to_skip: HashSet::new(),
},
),
)
.await;
assert!(
@@ -634,7 +576,12 @@ mod tests {
&sync_queue,
Some(&not_expecting_download_remote_timeline),
sync_id,
SyncData::new(0, LayersDownload::from_skipped_layers(HashSet::new())),
SyncData::new(
0,
LayersDownload {
layers_to_skip: HashSet::new(),
},
),
)
.await;
assert!(

View File

@@ -212,8 +212,8 @@ impl RemoteTimelineIndex {
/// Restored index part data about the timeline, stored in the remote index.
#[derive(Debug, Clone)]
pub struct RemoteTimeline {
timeline_layers: HashMap<PathBuf, LayerFileMetadata>,
missing_layers: HashMap<PathBuf, LayerFileMetadata>,
timeline_layers: HashSet<PathBuf>,
missing_layers: HashSet<PathBuf>,
pub metadata: TimelineMetadata,
pub awaits_download: bool,
@@ -222,161 +222,62 @@ pub struct RemoteTimeline {
impl RemoteTimeline {
pub fn new(metadata: TimelineMetadata) -> Self {
Self {
timeline_layers: HashMap::default(),
missing_layers: HashMap::default(),
timeline_layers: HashSet::new(),
missing_layers: HashSet::new(),
metadata,
awaits_download: false,
}
}
pub fn add_timeline_layers(
&mut self,
new_layers: impl IntoIterator<Item = (PathBuf, LayerFileMetadata)>,
) {
self.timeline_layers.extend(new_layers);
pub fn add_timeline_layers(&mut self, new_layers: impl IntoIterator<Item = PathBuf>) {
self.timeline_layers.extend(new_layers.into_iter());
}
pub fn add_upload_failures(
&mut self,
upload_failures: impl IntoIterator<Item = (PathBuf, LayerFileMetadata)>,
) {
self.missing_layers.extend(upload_failures);
pub fn add_upload_failures(&mut self, upload_failures: impl IntoIterator<Item = PathBuf>) {
self.missing_layers.extend(upload_failures.into_iter());
}
pub fn remove_layers(&mut self, layers_to_remove: &HashSet<PathBuf>) {
self.timeline_layers
.retain(|layer, _| !layers_to_remove.contains(layer));
.retain(|layer| !layers_to_remove.contains(layer));
self.missing_layers
.retain(|layer, _| !layers_to_remove.contains(layer));
.retain(|layer| !layers_to_remove.contains(layer));
}
/// Lists all layer files in the given remote timeline. Omits the metadata file.
pub fn stored_files(&self) -> &HashMap<PathBuf, LayerFileMetadata> {
pub fn stored_files(&self) -> &HashSet<PathBuf> {
&self.timeline_layers
}
/// Combines metadata gathered or verified during downloading needed layer files to metadata on
/// the [`RemoteIndex`], so it can be uploaded later.
pub fn merge_metadata_from_downloaded(
&mut self,
downloaded: &HashMap<PathBuf, LayerFileMetadata>,
) {
downloaded.iter().for_each(|(path, metadata)| {
if let Some(upgraded) = self.timeline_layers.get_mut(path) {
upgraded.merge(metadata);
}
});
}
pub fn from_index_part(timeline_path: &Path, index_part: IndexPart) -> anyhow::Result<Self> {
let metadata = TimelineMetadata::from_bytes(&index_part.metadata_bytes)?;
let default_metadata = &IndexLayerMetadata::default();
let find_metadata = |key: &RelativePath| -> LayerFileMetadata {
index_part
.layer_metadata
.get(key)
.unwrap_or(default_metadata)
.into()
};
Ok(Self {
timeline_layers: index_part
.timeline_layers
.iter()
.map(|layer_path| (layer_path.as_path(timeline_path), find_metadata(layer_path)))
.collect(),
missing_layers: index_part
.missing_layers
.iter()
.map(|layer_path| (layer_path.as_path(timeline_path), find_metadata(layer_path)))
.collect(),
timeline_layers: to_local_paths(timeline_path, index_part.timeline_layers),
missing_layers: to_local_paths(timeline_path, index_part.missing_layers),
metadata,
awaits_download: false,
})
}
}
/// Metadata gathered for each of the layer files.
///
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(test, derive(Default))]
pub struct LayerFileMetadata {
file_size: Option<u64>,
}
impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
fn from(other: &IndexLayerMetadata) -> Self {
LayerFileMetadata {
file_size: other.file_size,
}
}
}
impl LayerFileMetadata {
pub fn new(file_size: u64) -> Self {
LayerFileMetadata {
file_size: Some(file_size),
}
}
pub fn file_size(&self) -> Option<u64> {
self.file_size
}
/// Metadata has holes due to version upgrades. This method is called to upgrade self with the
/// other value.
///
/// This is called on the possibly outdated version.
pub fn merge(&mut self, other: &Self) {
self.file_size = other.file_size.or(self.file_size);
}
}
/// Part of the remote index, corresponding to a certain timeline.
/// Contains the data about all files in the timeline, present remotely and its metadata.
///
/// This type needs to be backwards and forwards compatible. When changing the fields,
/// remember to add a test case for the changed version.
#[serde_as]
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct IndexPart {
/// Debugging aid describing the version of this type.
#[serde(default)]
version: usize,
/// Each of the layers present on remote storage.
///
/// Additional metadata can might exist in `layer_metadata`.
timeline_layers: HashSet<RelativePath>,
/// Currently is not really used in pageserver,
/// present to manually keep track of the layer files that pageserver might never retrieve.
///
/// Such "holes" might appear if any upload task was evicted on an error threshold:
/// the this layer will only be rescheduled for upload on pageserver restart.
missing_layers: HashSet<RelativePath>,
/// Per layer file metadata, which can be present for a present or missing layer file.
///
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
/// that latest version stores.
#[serde(default)]
layer_metadata: HashMap<RelativePath, IndexLayerMetadata>,
#[serde_as(as = "DisplayFromStr")]
disk_consistent_lsn: Lsn,
metadata_bytes: Vec<u8>,
}
impl IndexPart {
/// When adding or modifying any parts of `IndexPart`, increment the version so that it can be
/// used to understand later versions.
///
/// Version is currently informative only.
const LATEST_VERSION: usize = 1;
pub const FILE_NAME: &'static str = "index_part.json";
#[cfg(test)]
@@ -387,10 +288,8 @@ impl IndexPart {
metadata_bytes: Vec<u8>,
) -> Self {
Self {
version: Self::LATEST_VERSION,
timeline_layers,
missing_layers,
layer_metadata: HashMap::default(),
disk_consistent_lsn,
metadata_bytes,
}
@@ -405,68 +304,35 @@ impl IndexPart {
remote_timeline: RemoteTimeline,
) -> anyhow::Result<Self> {
let metadata_bytes = remote_timeline.metadata.to_bytes()?;
let mut layer_metadata = HashMap::new();
let mut missing_layers = HashSet::new();
separate_paths_and_metadata(
timeline_path,
&remote_timeline.missing_layers,
&mut missing_layers,
&mut layer_metadata,
)
.context("Failed to convert missing layers' paths to relative ones")?;
let mut timeline_layers = HashSet::new();
separate_paths_and_metadata(
timeline_path,
&remote_timeline.timeline_layers,
&mut timeline_layers,
&mut layer_metadata,
)
.context("Failed to convert timeline layers' paths to relative ones")?;
Ok(Self {
version: Self::LATEST_VERSION,
timeline_layers,
missing_layers,
layer_metadata,
timeline_layers: to_relative_paths(timeline_path, remote_timeline.timeline_layers)
.context("Failed to convert timeline layers' paths to relative ones")?,
missing_layers: to_relative_paths(timeline_path, remote_timeline.missing_layers)
.context("Failed to convert missing layers' paths to relative ones")?,
disk_consistent_lsn: remote_timeline.metadata.disk_consistent_lsn(),
metadata_bytes,
})
}
}
/// Serialized form of [`LayerFileMetadata`].
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
pub struct IndexLayerMetadata {
file_size: Option<u64>,
}
impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
fn from(other: &'_ LayerFileMetadata) -> Self {
IndexLayerMetadata {
file_size: other.file_size,
}
}
}
fn separate_paths_and_metadata(
fn to_local_paths(
timeline_path: &Path,
input: &HashMap<PathBuf, LayerFileMetadata>,
output: &mut HashSet<RelativePath>,
layer_metadata: &mut HashMap<RelativePath, IndexLayerMetadata>,
) -> anyhow::Result<()> {
for (path, metadata) in input {
let rel_path = RelativePath::new(timeline_path, path)?;
let metadata = IndexLayerMetadata::from(metadata);
paths: impl IntoIterator<Item = RelativePath>,
) -> HashSet<PathBuf> {
paths
.into_iter()
.map(|path| path.as_path(timeline_path))
.collect()
}
layer_metadata.insert(rel_path.clone(), metadata);
output.insert(rel_path);
}
Ok(())
fn to_relative_paths(
timeline_path: &Path,
paths: impl IntoIterator<Item = PathBuf>,
) -> anyhow::Result<HashSet<RelativePath>> {
paths
.into_iter()
.map(|path| RelativePath::new(timeline_path, path))
.collect()
}
#[cfg(test)]
@@ -491,13 +357,13 @@ mod tests {
DEFAULT_PG_VERSION,
);
let remote_timeline = RemoteTimeline {
timeline_layers: HashMap::from([
(timeline_path.join("layer_1"), LayerFileMetadata::new(1)),
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
timeline_layers: HashSet::from([
timeline_path.join("layer_1"),
timeline_path.join("layer_2"),
]),
missing_layers: HashMap::from([
(timeline_path.join("missing_1"), LayerFileMetadata::new(3)),
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
missing_layers: HashSet::from([
timeline_path.join("missing_1"),
timeline_path.join("missing_2"),
]),
metadata: metadata.clone(),
awaits_download: false,
@@ -619,13 +485,13 @@ mod tests {
let conversion_result = IndexPart::from_remote_timeline(
&timeline_path,
RemoteTimeline {
timeline_layers: HashMap::from([
(PathBuf::from("bad_path"), LayerFileMetadata::new(1)),
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
timeline_layers: HashSet::from([
PathBuf::from("bad_path"),
timeline_path.join("layer_2"),
]),
missing_layers: HashMap::from([
(timeline_path.join("missing_1"), LayerFileMetadata::new(3)),
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
missing_layers: HashSet::from([
timeline_path.join("missing_1"),
timeline_path.join("missing_2"),
]),
metadata: metadata.clone(),
awaits_download: false,
@@ -636,13 +502,13 @@ mod tests {
let conversion_result = IndexPart::from_remote_timeline(
&timeline_path,
RemoteTimeline {
timeline_layers: HashMap::from([
(timeline_path.join("layer_1"), LayerFileMetadata::new(1)),
(timeline_path.join("layer_2"), LayerFileMetadata::new(2)),
timeline_layers: HashSet::from([
timeline_path.join("layer_1"),
timeline_path.join("layer_2"),
]),
missing_layers: HashMap::from([
(PathBuf::from("bad_path"), LayerFileMetadata::new(3)),
(timeline_path.join("missing_2"), LayerFileMetadata::new(4)),
missing_layers: HashSet::from([
PathBuf::from("bad_path"),
timeline_path.join("missing_2"),
]),
metadata,
awaits_download: false,
@@ -650,63 +516,4 @@ mod tests {
);
assert!(conversion_result.is_err(), "Should not be able to convert metadata with missing layer paths that are not in the timeline directory");
}
#[test]
fn v0_indexpart_is_parsed() {
let example = r#"{
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
"missing_layers":["not_a_real_layer_but_adding_coverage"],
"disk_consistent_lsn":"0/16960E8",
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
}"#;
let expected = IndexPart {
version: 0,
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
layer_metadata: HashMap::default(),
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
};
let part = serde_json::from_str::<IndexPart>(example).unwrap();
assert_eq!(part, expected);
}
#[test]
fn v1_indexpart_is_parsed() {
let example = r#"{
"version":1,
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
"missing_layers":["not_a_real_layer_but_adding_coverage"],
"layer_metadata":{
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
"not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
},
"disk_consistent_lsn":"0/16960E8",
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
}"#;
let expected = IndexPart {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 1,
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
layer_metadata: HashMap::from([
(RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned()), IndexLayerMetadata {
file_size: Some(25600000),
}),
(RelativePath("not_a_real_layer_but_adding_coverage".to_owned()), IndexLayerMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: Some(9007199254741001),
})
]),
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
};
let part = serde_json::from_str::<IndexPart>(example).unwrap();
assert_eq!(part, expected);
}
}

View File

@@ -69,25 +69,14 @@ pub(super) async fn upload_timeline_layers<'a>(
.map(|meta| meta.disk_consistent_lsn());
let already_uploaded_layers = remote_timeline
.map(|timeline| {
timeline
.stored_files()
.keys()
.cloned()
.collect::<std::collections::HashSet<_>>()
})
.map(|timeline| timeline.stored_files())
.cloned()
.unwrap_or_default();
let layers_to_upload = upload
.layers_to_upload
.iter()
.filter_map(|(k, v)| {
if !already_uploaded_layers.contains(k) {
Some((k.to_owned(), v.to_owned()))
} else {
None
}
})
.difference(&already_uploaded_layers)
.cloned()
.collect::<Vec<_>>();
if layers_to_upload.is_empty() {
@@ -109,7 +98,7 @@ pub(super) async fn upload_timeline_layers<'a>(
let mut upload_tasks = layers_to_upload
.into_iter()
.map(|(source_path, known_metadata)| async move {
.map(|source_path| async move {
let source_file = match fs::File::open(&source_path).await.with_context(|| {
format!(
"Failed to upen a source file for layer '{}'",
@@ -120,7 +109,7 @@ pub(super) async fn upload_timeline_layers<'a>(
Err(e) => return Err(UploadError::MissingLocalFile(source_path, e)),
};
let fs_size = source_file
let source_size = source_file
.metadata()
.await
.with_context(|| {
@@ -130,24 +119,10 @@ pub(super) async fn upload_timeline_layers<'a>(
)
})
.map_err(UploadError::Other)?
.len();
// FIXME: this looks bad
if let Some(metadata_size) = known_metadata.file_size() {
if metadata_size != fs_size {
return Err(UploadError::Other(anyhow::anyhow!(
"File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"
)));
}
} else {
// this is a silly state we would like to avoid
}
let fs_size = usize::try_from(fs_size).with_context(|| format!("File {source_path:?} size {fs_size} could not be converted to usize"))
.map_err(UploadError::Other)?;
.len() as usize;
match storage
.upload_storage_object(Box::new(source_file), fs_size, &source_path)
.upload_storage_object(Box::new(source_file), source_size, &source_path)
.await
.with_context(|| format!("Failed to upload layer file for {sync_id}"))
{
@@ -161,11 +136,8 @@ pub(super) async fn upload_timeline_layers<'a>(
while let Some(upload_result) = upload_tasks.next().await {
match upload_result {
Ok(uploaded_path) => {
let metadata = upload
.layers_to_upload
.remove(&uploaded_path)
.expect("metadata should always exist, assuming no double uploads");
upload.uploaded_layers.insert(uploaded_path, metadata);
upload.layers_to_upload.remove(&uploaded_path);
upload.uploaded_layers.insert(uploaded_path);
}
Err(e) => match e {
UploadError::Other(e) => {
@@ -290,7 +262,7 @@ mod tests {
assert_eq!(
upload
.uploaded_layers
.keys()
.iter()
.cloned()
.collect::<BTreeSet<_>>(),
layer_files
@@ -385,7 +357,7 @@ mod tests {
assert_eq!(
upload
.uploaded_layers
.keys()
.iter()
.cloned()
.collect::<BTreeSet<_>>(),
layer_files

Some files were not shown because too many files have changed in this diff Show More